Page MenuHomeSoftware Heritage

D410.id1273.diff
No OneTemporary

D410.id1273.diff

diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py
--- a/swh/model/hashutil.py
+++ b/swh/model/hashutil.py
@@ -162,7 +162,8 @@
return _new_hashlib_hash(algo)
-def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None):
+def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS,
+ chunk_cb=None, with_length=False, hexdigest=False):
"""Hash the contents of the given file object with the given algorithms.
Args:
@@ -171,11 +172,15 @@
git-specific algorithms)
algorithms: the hashing algorithms to be used, as an iterable over
strings
+ with_length (bool): Include length in the dict result
+ hexdigest (bool): False returns the hash as binary, otherwise
+ returns as hex
- Returns: a dict mapping each algorithm to a bytes digest.
+ Returns: a dict mapping each algorithm to a digest (bytes by default).
Raises:
ValueError if algorithms contains an unknown hash algorithm.
+
"""
hashes = {algo: _new_hash(algo, length) for algo in algorithms}
@@ -188,10 +193,56 @@
if chunk_cb:
chunk_cb(chunk)
- return {algo: hash.digest() for algo, hash in hashes.items()}
+ if hexdigest:
+ h = {algo: hash.hexdigest() for algo, hash in hashes.items()}
+ else:
+ h = {algo: hash.digest() for algo, hash in hashes.items()}
+ if with_length:
+ h['length'] = length
+ return h
+
+
+def hash_stream(s, length=None, algorithms=DEFAULT_ALGORITHMS,
+ chunk_cb=None, with_length=False, hexdigest=False):
+ """Hash the contents of the given stream with the given algorithms.
+
+ Args:
+ s (stream): a stream object (e.g requests.get(stream=True))
+ length (int): the length of the contents of the stream (for the
+ git-specific algorithms)
+ algorithms (dict): the hashing algorithms to be used, as an
+ iterable over strings
+ with_length (bool): Include length in the dict result
+ hexdigest (bool): False returns the hash as binary, otherwise
+ returns as hex
+
+ Returns: a dict mapping each algorithm to a digest (bytes by default).
+
+ Raises:
+ ValueError if algorithms contains an unknown hash algorithm.
+
+ """
+ hashes = {algo: _new_hash(algo, length) for algo in algorithms}
+
+ for chunk in s.iter_content():
+ if not chunk:
+ break
+ for hash in hashes.values():
+ hash.update(chunk)
+ if chunk_cb:
+ chunk_cb(chunk)
+
+ if hexdigest:
+ h = {algo: hash.hexdigest() for algo, hash in hashes.items()}
+ else:
+ h = {algo: hash.digest() for algo, hash in hashes.items()}
+ if with_length:
+ h['length'] = length
+ return h
-def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None):
+def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None,
+ with_length=True, hexdigest=False):
"""Hash the contents of the file at the given path with the given
algorithms.
@@ -199,6 +250,9 @@
path: the path of the file to hash
algorithms: the hashing algorithms used
chunk_cb: a callback
+ with_length (bool): Include length in the dict result
+ hexdigest (bool): False returns the hash as binary, otherwise
+ returns as hex
Returns: a dict mapping each algorithm to a bytes digest.
@@ -209,9 +263,8 @@
"""
length = os.path.getsize(path)
with open(path, 'rb') as fobj:
- hash = hash_file(fobj, length, algorithms, chunk_cb)
- hash['length'] = length
- return hash
+ return hash_file(fobj, length, algorithms, chunk_cb=chunk_cb,
+ with_length=with_length, hexdigest=hexdigest)
def hash_data(data, algorithms=DEFAULT_ALGORITHMS, with_length=False):
@@ -230,10 +283,7 @@
"""
fobj = BytesIO(data)
length = len(data)
- data = hash_file(fobj, length, algorithms)
- if with_length:
- data['length'] = length
- return data
+ return hash_file(fobj, length, algorithms, with_length=with_length)
def hash_git_data(data, git_type, base_algo='sha1'):
diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py
--- a/swh/model/tests/test_hashutil.py
+++ b/swh/model/tests/test_hashutil.py
@@ -95,6 +95,27 @@
checksums = hashutil.hash_file(fobj, length=len(self.data))
self.assertEqual(checksums, self.checksums)
+ @istest
+ def hash_file_hexdigest(self):
+ fobj = io.BytesIO(self.data)
+ checksums = hashutil.hash_file(fobj, length=len(self.data),
+ hexdigest=True)
+ self.assertEqual(checksums, self.hex_checksums)
+
+ @istest
+ def hash_stream(self):
+ class StreamStub:
+ def __init__(self, data):
+ self.data = data
+
+ def iter_content(self):
+ yield from io.BytesIO(self.data)
+
+ s = StreamStub(self.data)
+ checksums = hashutil.hash_stream(s, length=len(self.data),
+ hexdigest=True)
+ self.assertEqual(checksums, self.hex_checksums)
+
@istest
def hash_file_missing_length(self):
fobj = io.BytesIO(self.data)

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 11:35 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3232238

Event Timeline