-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathsigs.py
More file actions
executable file
·304 lines (266 loc) · 10.6 KB
/
sigs.py
File metadata and controls
executable file
·304 lines (266 loc) · 10.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
#!/usr/bin/env python3
"""Calculate MD5, SHA1, SHA256, SHA512, SHA3-224, and SHA3-384 hashes of files."""
#
# Rewrite of my perl sigs script in python.
# Calculate hashes of files
#
# Author: Jim Clausing
# Date: 2026-03-17
# Version: 1.8.0
from __future__ import print_function
import sys
import os
import argparse
import hashlib
import base64
import contextlib
import codecs
import signal
from collections import namedtuple
if sys.version_info < (3, 6):
import sha3 # pylint: disable=import-error,unused-import
__version_info__ = (1, 9, 1)
__version__ = ".".join(map(str, __version_info__))
# Single source of truth for supported hash algorithms.
# digest_len is the length of the formatted (hex or base64) digest string,
# used by check_hashes() to identify the algorithm from a hash-file line.
HashSpec = namedtuple(
"HashSpec",
"arg_attr psv_header verbose_label factory format_fn digest_len",
)
def _hex(h):
"""Return hex digest of a hashlib object."""
return h.hexdigest()
def _b64(h):
"""Return base64-encoded digest of a hashlib object (used for SHA512)."""
return codecs.decode(base64.b64encode(h.digest()))
HASH_SPECS = (
HashSpec("md5", "md5", " MD5: ", hashlib.md5, _hex, 32),
HashSpec("sha1", "sha1", " SHA1: ", hashlib.sha1, _hex, 40),
HashSpec("sha256", "sha256", " SHA256: ", hashlib.sha256, _hex, 64),
HashSpec("sha512", "sha512", " SHA512: ", hashlib.sha512, _b64, 88),
HashSpec("sha3_224", "sha3-224", " SHA3-224: ", hashlib.sha3_224, _hex, 56),
HashSpec("sha3", "sha3-384", " SHA3-384: ", hashlib.sha3_384, _hex, 96),
)
args = None # pylint: disable=invalid-name
@contextlib.contextmanager
def smart_open(filepath=None):
"""Open a file for binary reading, or yield stdin.buffer if no path given."""
if filepath and filepath != "-":
fh = open(filepath, "rb")
else:
fh = sys.stdin.buffer
try:
yield fh
finally:
if fh is not sys.stdin.buffer:
fh.close()
def selected_specs():
"""Return the list of HASH_SPECS the user has selected via args."""
return [s for s in HASH_SPECS if getattr(args, s.arg_attr) or args.all]
def print_header():
"""Print PSV column headers for selected hash types."""
for spec in selected_specs():
sys.stdout.write(spec.psv_header + "|")
print("filename")
sys.stdout.flush()
def hash_file(fname): # pylint: disable=redefined-outer-name
"""Compute selected hashes for fname (or '-' for stdin).
Returns a dict {arg_attr: hash_obj} on success, or None on IO/permission
error. For regular files, stat() is captured before and after reading;
a warning is printed to stderr if size or mtime changed mid-read.
"""
hashes = {spec.arg_attr: spec.factory() for spec in selected_specs()}
stat_before = None
if fname and fname != "-":
try:
stat_before = os.stat(fname)
except OSError:
pass
try:
with smart_open(fname) as f:
for block in iter(lambda: f.read(args.block), b""):
for h in hashes.values():
h.update(block)
except (IOError, PermissionError):
return None
if stat_before is not None:
try:
stat_after = os.stat(fname)
if (stat_before.st_size != stat_after.st_size
or stat_before.st_mtime_ns != stat_after.st_mtime_ns):
print(
f"{sys.argv[0]}: {fname}: file changed during hashing "
f"(size {stat_before.st_size} -> {stat_after.st_size}, "
f"mtime {stat_before.st_mtime_ns} -> {stat_after.st_mtime_ns})",
file=sys.stderr,
)
except OSError:
pass
return hashes
def print_hashes(fname, hashes): # pylint: disable=redefined-outer-name
"""Print computed hashes for fname in the selected output format.
`hashes` is the dict returned by hash_file(), or None on permission/IO error
(in which case "(Permission Problem)" is substituted for each digest).
"""
specs = selected_specs()
perm_fail = hashes is None
def digest(spec):
return "(Permission Problem)" if perm_fail else spec.format_fn(hashes[spec.arg_attr])
if len(specs) == 1:
spec = specs[0]
suffix = fname if fname != "-" else ""
print(f"{digest(spec)} {suffix}")
elif args.psv:
for spec in specs:
sys.stdout.write(digest(spec) + "|")
print(fname)
else:
if fname != "-":
print(fname + ":")
for spec in specs:
print(spec.verbose_label + digest(spec))
sys.stdout.flush()
def count_hashes():
"""Return how many hash types are selected."""
return len(selected_specs())
def check_hashes():
"""Read hash-file(s) from args.files and verify each listed file.
Algorithm is inferred from digest length. Lengths are unambiguous within
the set this script supports; SHA3-256/SHA3-512 are not supported, so
SHA3-224 (56) and SHA3-384 (96) are the only SHA3 variants in play.
"""
failures = 0
for fpath in args.files:
if os.path.isfile(fpath) or fpath == "-":
with smart_open(fpath) as f:
for line in f:
try:
line = line.decode('utf-8')
except UnicodeDecodeError as exc:
print(
f"{sys.argv[0]}: skipping line with encoding error: {exc}",
file=sys.stderr,
)
continue
line = line.strip('\n')
parts = str(line).split(" ")
if len(parts) < 2 or not parts[1].strip():
continue
spec = next(
(s for s in HASH_SPECS if s.digest_len == len(parts[0])),
None,
)
if spec is None:
continue
if not os.path.isfile(parts[1]):
print(parts[1], ": File not found")
failures += 1
continue
setattr(args, spec.arg_attr, True)
hashes = hash_file(parts[1])
if hashes is None or parts[0] != spec.format_fn(hashes[spec.arg_attr]):
print(parts[1] + ": FAILED")
failures += 1
else:
print(parts[1] + ": OK")
sys.stdout.flush()
if failures > 0:
print(sys.argv[0] + ": WARNING: " + str(failures) + " checksums did not match")
sys.exit(255)
if __name__ == "__main__":
# restore default SIGPIPE behavior so piping to head/less doesn't traceback
if hasattr(signal, "SIGPIPE"):
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
# define switches and commandline arguments
parser = argparse.ArgumentParser(description="Calculate hashes")
parser.add_argument("files", metavar="FILE", nargs="*", default=["-"], help="files to hash")
parser.add_argument(
"-V", "--version", action="version",
help="print version number", version="%(prog)s v" + __version__
)
parser.add_argument(
"-r", "--recursive", action="store_true",
help="recursive mode. All subdirectories are traversed"
)
parser.add_argument(
"-a",
"--all",
action="store_true",
help="All (MD5, SHA1, SHA256, SHA512, and SHA3-384), default if no other options chosen",
)
parser.add_argument(
"-m", "--md5", action="store_true", help="MD5 signature (md5sum equivalent output)"
)
parser.add_argument(
"-s", "--sha1", action="store_true", help="SHA1 signature (sha1sum equivalent output)"
)
parser.add_argument(
"-2", "--sha256", action="store_true",
help="SHA2 (aka SHA2-256) signature (sha256sum equivalent output)"
)
parser.add_argument("-3", "--sha3", action="store_true", help="SHA3-384 signature")
parser.add_argument("-t", "--sha3_224", action="store_true", help="SHA3-224 signature")
parser.add_argument(
"-5",
"--sha512",
action="store_true",
help="SHA512 (aka SHA2-512) signature (note: base64 encoded rather than hex)",
)
parser.add_argument(
"-f", "--fullpath", action="store_true", help="print full path rather than relative"
)
parser.add_argument(
"-B", "--block", metavar="blk", type=int, default=65536,
help="block size to read file, default = 65536"
)
parser.add_argument(
"-c", "--check", action="store_true", help="read sums from FILE and check them"
)
#parser.add_argument("-b", "--base", action="store_true",
# help="match only basename, only valid with -c")
parser.add_argument(
"-p", "--psv", action="store_true", help="write output as pipe separated values"
)
args = parser.parse_args()
if args.block <= 0:
parser.error("block size must be > 0")
# default to --all only when no specific hash switch and not in check mode
any_hash = args.md5 or args.sha1 or args.sha256 or args.sha3 or args.sha3_224 or args.sha512
if any_hash or args.check:
args.all = False
elif not args.all:
args.all = True
#if args.base and not args.check:
# print("-b not valid without -c")
# sys.exit(255)
if args.psv:
print_header()
if args.check:
check_hashes()
sys.exit(0)
# process commandline arguments
# pylint: disable=invalid-name
had_error = False
for path in args.files:
if os.path.isdir(os.path.abspath(path)) and args.recursive:
if args.fullpath:
path = os.path.abspath(path)
for root, directories, filenames in os.walk(path):
for filename in filenames:
fname = os.path.join(root, filename)
if os.path.isfile(fname):
result = hash_file(fname)
if result is None:
had_error = True
print_hashes(fname, result)
else:
if os.path.isfile(path) or path == "-":
result = hash_file(path)
if result is None:
had_error = True
print_hashes(path, result)
else:
print(f"{sys.argv[0]}: {path}: No such file or directory", file=sys.stderr)
had_error = True
sys.exit(1 if had_error else 0)