Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124196
D410.id1273.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D410.id1273.diff
View Options
diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py
--- a/swh/model/hashutil.py
+++ b/swh/model/hashutil.py
@@ -162,7 +162,8 @@
return _new_hashlib_hash(algo)
-def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None):
+def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS,
+ chunk_cb=None, with_length=False, hexdigest=False):
"""Hash the contents of the given file object with the given algorithms.
Args:
@@ -171,11 +172,15 @@
git-specific algorithms)
algorithms: the hashing algorithms to be used, as an iterable over
strings
+ with_length (bool): Include length in the dict result
+ hexdigest (bool): False returns the hash as binary, otherwise
+ returns as hex
- Returns: a dict mapping each algorithm to a bytes digest.
+ Returns: a dict mapping each algorithm to a digest (bytes by default).
Raises:
ValueError if algorithms contains an unknown hash algorithm.
+
"""
hashes = {algo: _new_hash(algo, length) for algo in algorithms}
@@ -188,10 +193,56 @@
if chunk_cb:
chunk_cb(chunk)
- return {algo: hash.digest() for algo, hash in hashes.items()}
+ if hexdigest:
+ h = {algo: hash.hexdigest() for algo, hash in hashes.items()}
+ else:
+ h = {algo: hash.digest() for algo, hash in hashes.items()}
+ if with_length:
+ h['length'] = length
+ return h
+
+
+def hash_stream(s, length=None, algorithms=DEFAULT_ALGORITHMS,
+ chunk_cb=None, with_length=False, hexdigest=False):
+ """Hash the contents of the given stream with the given algorithms.
+
+ Args:
+ s (stream): a stream object (e.g requests.get(stream=True))
+ length (int): the length of the contents of the stream (for the
+ git-specific algorithms)
+ algorithms (dict): the hashing algorithms to be used, as an
+ iterable over strings
+ with_length (bool): Include length in the dict result
+ hexdigest (bool): False returns the hash as binary, otherwise
+ returns as hex
+
+ Returns: a dict mapping each algorithm to a digest (bytes by default).
+
+ Raises:
+ ValueError if algorithms contains an unknown hash algorithm.
+
+ """
+ hashes = {algo: _new_hash(algo, length) for algo in algorithms}
+
+ for chunk in s.iter_content():
+ if not chunk:
+ break
+ for hash in hashes.values():
+ hash.update(chunk)
+ if chunk_cb:
+ chunk_cb(chunk)
+
+ if hexdigest:
+ h = {algo: hash.hexdigest() for algo, hash in hashes.items()}
+ else:
+ h = {algo: hash.digest() for algo, hash in hashes.items()}
+ if with_length:
+ h['length'] = length
+ return h
-def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None):
+def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None,
+ with_length=True, hexdigest=False):
"""Hash the contents of the file at the given path with the given
algorithms.
@@ -199,6 +250,9 @@
path: the path of the file to hash
algorithms: the hashing algorithms used
chunk_cb: a callback
+ with_length (bool): Include length in the dict result
+ hexdigest (bool): False returns the hash as binary, otherwise
+ returns as hex
Returns: a dict mapping each algorithm to a bytes digest.
@@ -209,9 +263,8 @@
"""
length = os.path.getsize(path)
with open(path, 'rb') as fobj:
- hash = hash_file(fobj, length, algorithms, chunk_cb)
- hash['length'] = length
- return hash
+ return hash_file(fobj, length, algorithms, chunk_cb=chunk_cb,
+ with_length=with_length, hexdigest=hexdigest)
def hash_data(data, algorithms=DEFAULT_ALGORITHMS, with_length=False):
@@ -230,10 +283,7 @@
"""
fobj = BytesIO(data)
length = len(data)
- data = hash_file(fobj, length, algorithms)
- if with_length:
- data['length'] = length
- return data
+ return hash_file(fobj, length, algorithms, with_length=with_length)
def hash_git_data(data, git_type, base_algo='sha1'):
diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py
--- a/swh/model/tests/test_hashutil.py
+++ b/swh/model/tests/test_hashutil.py
@@ -95,6 +95,27 @@
checksums = hashutil.hash_file(fobj, length=len(self.data))
self.assertEqual(checksums, self.checksums)
+ @istest
+ def hash_file_hexdigest(self):
+ fobj = io.BytesIO(self.data)
+ checksums = hashutil.hash_file(fobj, length=len(self.data),
+ hexdigest=True)
+ self.assertEqual(checksums, self.hex_checksums)
+
+ @istest
+ def hash_stream(self):
+ class StreamStub:
+ def __init__(self, data):
+ self.data = data
+
+ def iter_content(self):
+ yield from io.BytesIO(self.data)
+
+ s = StreamStub(self.data)
+ checksums = hashutil.hash_stream(s, length=len(self.data),
+ hexdigest=True)
+ self.assertEqual(checksums, self.hex_checksums)
+
@istest
def hash_file_missing_length(self):
fobj = io.BytesIO(self.data)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 20 2024, 11:35 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3232238
Attached To
D410: model.hashutil: Open new endpoint to allow to hash stream
Event Timeline
Log In to Comment