diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py index a155603..bda5389 100644 --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -1,371 +1,449 @@ -# Copyright (C) 2015-2017 The Software Heritage developers +# Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of hashing function definitions. This is the base module use to compute swh's hashes. Only a subset of hashing algorithms is supported as defined in the ALGORITHMS set. Any provided algorithms not in that list will result in a ValueError explaining the error. This modules defines the following hashing functions: +- hash_stream: Hash the contents of something iterable (file, stream, + ...) with the given algorithms (defaulting to DEFAULT_ALGORITHMS if + none provided). + - hash_file: Hash the contents of the given file object with the given algorithms (defaulting to DEFAULT_ALGORITHMS if none provided). - hash_data: Hash the given binary blob with the given algorithms (defaulting to DEFAULT_ALGORITHMS if none provided). - hash_path: Hash the contents of the file at the given path with the given algorithms (defaulting to DEFAULT_ALGORITHMS if none provided). """ import binascii import functools import hashlib import os from io import BytesIO ALGORITHMS = set(['sha1', 'sha256', 'sha1_git', 'blake2s256', 'blake2b512']) """Hashing algorithms supported by this module""" DEFAULT_ALGORITHMS = set(['sha1', 'sha256', 'sha1_git', 'blake2s256']) """Algorithms computed by default when calling the functions from this module. Subset of :const:`ALGORITHMS`. """ HASH_BLOCK_SIZE = 32768 """Block size for streaming hash computations made in this module""" _blake2_hash_cache = {} +HASH_FORMATS = set(['bytes', 'bytehex', 'hex']) +"""Supported output hash formats +""" + + +class MultiHash: + """Hashutil class to support multiple hashes computation. + + Args: + + hash_names (set): Set of hash algorithms (+ length) to compute + hashes (cf. DEFAULT_ALGORITHMS) + length (int): Length of the total sum of chunks to read + + If the length is provided as algorithm, the length is also + computed and returned. + + """ + def __init__(self, hash_names, length=None): + self.state = {} + self.track_length = False + for name in hash_names: + if name == 'length': + self.state['length'] = 0 + self.track_length = True + else: + self.state[name] = _new_hash(name, length) + + @classmethod + def from_state(cls, state, track_length): + ret = cls([]) + ret.state = state + ret.track_length = track_length + + def update(self, chunk): + for name, h in self.state.items(): + if name == 'length': + continue + h.update(chunk) + if self.track_length: + self.state['length'] += len(chunk) + + def digest(self): + return { + name: h.digest() if name != 'length' else h + for name, h in self.state.items() + } + + def hexdigest(self): + return { + name: h.hexdigest() if name != 'length' else h + for name, h in self.state.items() + } + + def bytehexdigest(self): + return { + name: hash_to_bytehex(h.digest()) if name != 'length' else h + for name, h in self.state.items() + } + + def copy(self): + copied_state = { + name: h.copy() if name != 'length' else h + for name, h in self.state.items() + } + return self.from_state(copied_state, self.track_length) + def _new_blake2_hash(algo): """Return a function that initializes a blake2 hash. """ if algo in _blake2_hash_cache: return _blake2_hash_cache[algo]() lalgo = algo.lower() if not lalgo.startswith('blake2'): raise ValueError('Algorithm %s is not a blake2 hash' % algo) blake_family = lalgo[:7] digest_size = None if lalgo[7:]: try: digest_size, remainder = divmod(int(lalgo[7:]), 8) except ValueError: raise ValueError( 'Unknown digest size for algo %s' % algo ) from None if remainder: raise ValueError( 'Digest size for algorithm %s must be a multiple of 8' % algo ) if lalgo in hashlib.algorithms_available: # Handle the case where OpenSSL ships the given algorithm # (e.g. Python 3.5 on Debian 9 stretch) _blake2_hash_cache[algo] = lambda: hashlib.new(lalgo) else: # Try using the built-in implementation for Python 3.6+ if blake_family in hashlib.algorithms_available: blake2 = getattr(hashlib, blake_family) else: import pyblake2 blake2 = getattr(pyblake2, blake_family) _blake2_hash_cache[algo] = lambda: blake2(digest_size=digest_size) return _blake2_hash_cache[algo]() def _new_hashlib_hash(algo): """Initialize a digest object from hashlib. Handle the swh-specific names for the blake2-related algorithms """ if algo.startswith('blake2'): return _new_blake2_hash(algo) else: return hashlib.new(algo) def _new_git_hash(base_algo, git_type, length): """Initialize a digest object (as returned by python's hashlib) for the requested algorithm, and feed it with the header for a git object of the given type and length. The header for hashing a git object consists of: - The type of the object (encoded in ASCII) - One ASCII space (\x20) - The length of the object (decimal encoded in ASCII) - One NUL byte Args: base_algo (str from :const:`ALGORITHMS`): a hashlib-supported algorithm git_type: the type of the git object (supposedly one of 'blob', 'commit', 'tag', 'tree') length: the length of the git object you're encoding Returns: a hashutil.hash object """ h = _new_hashlib_hash(base_algo) git_header = '%s %d\0' % (git_type, length) h.update(git_header.encode('ascii')) return h def _new_hash(algo, length=None): """Initialize a digest object (as returned by python's hashlib) for the requested algorithm. See the constant ALGORITHMS for the list of supported algorithms. If a git-specific hashing algorithm is requested (e.g., "sha1_git"), the hashing object will be pre-fed with the needed header; for this to work, length must be given. Args: algo (str): a hashing algorithm (one of ALGORITHMS) length (int): the length of the hashed payload (needed for git-specific algorithms) Returns: a hashutil.hash object Raises: ValueError if algo is unknown, or length is missing for a git-specific hash. """ if algo not in ALGORITHMS: raise ValueError( 'Unexpected hashing algorithm %s, expected one of %s' % (algo, ', '.join(sorted(ALGORITHMS)))) if algo.endswith('_git'): if length is None: raise ValueError('Missing length for git hashing algorithm') base_algo = algo[:-4] return _new_git_hash(base_algo, 'blob', length) return _new_hashlib_hash(algo) -def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS, - chunk_cb=None, with_length=False, hexdigest=False): - """Hash the contents of the given file object with the given algorithms. +def _read(fobj): + """Wrapper function around reading a chunk from fobj. + + """ + return fobj.read(HASH_BLOCK_SIZE) + + +def hash_stream(s, readfn=_read, length=None, algorithms=DEFAULT_ALGORITHMS, + chunk_cb=None, hash_format='bytes'): + """Hash the contents of a stream Args: - fobj: a file-like object - length: the length of the contents of the file-like object (for the - git-specific algorithms) - algorithms: the hashing algorithms to be used, as an iterable over - strings - with_length (bool): Include length in the dict result - hexdigest (bool): False returns the hash as binary, otherwise - returns as hex + s: stream or object we can consume by successive call using `readfn` + readfn (fn): Function to read chunk data from s + length (int): the length of the contents of the object (for the + git-specific algorithms) + algorithms (set): the hashing algorithms to be used, as an + iterable over strings + hash_format (str): Format required for the output of the + computed hashes (cf. HASH_FORMATS) Returns: a dict mapping each algorithm to a digest (bytes by default). Raises: - ValueError if algorithms contains an unknown hash algorithm. + ValueError if: + + algorithms contains an unknown hash algorithm. + hash_format is an unknown hash format """ - hashes = {algo: _new_hash(algo, length) for algo in algorithms} + if hash_format not in HASH_FORMATS: + raise ValueError('Unexpected hash format %s, expected one of %s' % ( + hash_format, HASH_FORMATS)) + h = MultiHash(algorithms, length) while True: - chunk = fobj.read(HASH_BLOCK_SIZE) + chunk = readfn(s) if not chunk: break - for hash in hashes.values(): - hash.update(chunk) + h.update(chunk) if chunk_cb: chunk_cb(chunk) - if hexdigest: - h = {algo: hash.hexdigest() for algo, hash in hashes.items()} - else: - h = {algo: hash.digest() for algo, hash in hashes.items()} - if with_length: - h['length'] = length - return h + if hash_format == 'bytes': + return h.digest() + if hash_format == 'bytehex': + return h.bytehexdigest() + return h.hexdigest() -def hash_stream(s, length=None, algorithms=DEFAULT_ALGORITHMS, - chunk_cb=None, with_length=False, hexdigest=False): - """Hash the contents of the given stream with the given algorithms. +def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS, + chunk_cb=None, hash_format='bytes'): + """Hash the contents of the given file object with the given algorithms. Args: - s (stream): a stream object (e.g requests.get(stream=True)) - length (int): the length of the contents of the stream (for the - git-specific algorithms) - algorithms (dict): the hashing algorithms to be used, as an - iterable over strings - with_length (bool): Include length in the dict result - hexdigest (bool): False returns the hash as binary, otherwise - returns as hex + fobj: a file-like object + length: the length of the contents of the file-like object (for the + git-specific algorithms) + algorithms: the hashing algorithms to be used, as an iterable over + strings + hash_format (str): Format required for the output of the + computed hashes (cf. HASH_FORMATS) Returns: a dict mapping each algorithm to a digest (bytes by default). Raises: - ValueError if algorithms contains an unknown hash algorithm. + ValueError if: - """ - hashes = {algo: _new_hash(algo, length) for algo in algorithms} + algorithms contains an unknown hash algorithm. + hash_format is an unknown hash format - for chunk in s.iter_content(): - if not chunk: - break - for hash in hashes.values(): - hash.update(chunk) - if chunk_cb: - chunk_cb(chunk) - - if hexdigest: - h = {algo: hash.hexdigest() for algo, hash in hashes.items()} - else: - h = {algo: hash.digest() for algo, hash in hashes.items()} - if with_length: - h['length'] = length - return h + """ + return hash_stream(fobj, length=length, algorithms=algorithms, + chunk_cb=chunk_cb, hash_format=hash_format) def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None, - with_length=True, hexdigest=False): + hash_format='bytes', track_length=True): """Hash the contents of the file at the given path with the given algorithms. Args: - path: the path of the file to hash - algorithms: the hashing algorithms used - chunk_cb: a callback - with_length (bool): Include length in the dict result - hexdigest (bool): False returns the hash as binary, otherwise - returns as hex + path (str): the path of the file to hash + algorithms (set): the hashing algorithms used + chunk_cb (def): a callback + hash_format (str): Format required for the output of the + computed hashes (cf. HASH_FORMATS) Returns: a dict mapping each algorithm to a bytes digest. Raises: - ValueError if algorithms contains an unknown hash algorithm. + ValueError if: + + algorithms contains an unknown hash algorithm. + hash_format is an unknown hash format + OSError on file access error """ + if track_length: + algorithms = set(['length']).union(algorithms) length = os.path.getsize(path) with open(path, 'rb') as fobj: return hash_file(fobj, length, algorithms, chunk_cb=chunk_cb, - with_length=with_length, hexdigest=hexdigest) + hash_format=hash_format) -def hash_data(data, algorithms=DEFAULT_ALGORITHMS, with_length=False): +def hash_data(data, algorithms=DEFAULT_ALGORITHMS, hash_format='bytes'): """Hash the given binary blob with the given algorithms. Args: data (bytes): raw content to hash algorithms (list): the hashing algorithms used - with_length (bool): add the length key in the resulting dict + hash_format (str): Format required for the output of the + computed hashes (cf. HASH_FORMATS) Returns: a dict mapping each algorithm to a bytes digest Raises: TypeError if data does not support the buffer interface. - ValueError if algorithms contains an unknown hash algorithm. + ValueError if: + + algorithms contains an unknown hash algorithm. + hash_format is an unknown hash format + """ fobj = BytesIO(data) length = len(data) - return hash_file(fobj, length, algorithms, with_length=with_length) + return hash_file(fobj, length, algorithms, hash_format=hash_format) def hash_git_data(data, git_type, base_algo='sha1'): """Hash the given data as a git object of type git_type. Args: data: a bytes object git_type: the git object type base_algo: the base hashing algorithm used (default: sha1) Returns: a dict mapping each algorithm to a bytes digest Raises: ValueError if the git_type is unexpected. """ git_object_types = {'blob', 'tree', 'commit', 'tag', 'snapshot'} if git_type not in git_object_types: raise ValueError('Unexpected git object type %s, expected one of %s' % (git_type, ', '.join(sorted(git_object_types)))) h = _new_git_hash(base_algo, git_type, len(data)) h.update(data) return h.digest() @functools.lru_cache() def hash_to_hex(hash): """Converts a hash (in hex or bytes form) to its hexadecimal ascii form Args: hash (str or bytes): a :class:`bytes` hash or a :class:`str` containing the hexadecimal form of the hash Returns: str: the hexadecimal form of the hash """ if isinstance(hash, str): return hash return binascii.hexlify(hash).decode('ascii') @functools.lru_cache() def hash_to_bytehex(hash): """Converts a hash to its hexadecimal bytes representation Args: hash (bytes): a :class:`bytes` hash Returns: bytes: the hexadecimal form of the hash, as :class:`bytes` """ return binascii.hexlify(hash) @functools.lru_cache() def hash_to_bytes(hash): """Converts a hash (in hex or bytes form) to its raw bytes form Args: hash (str or bytes): a :class:`bytes` hash or a :class:`str` containing the hexadecimal form of the hash Returns: bytes: the :class:`bytes` form of the hash """ if isinstance(hash, bytes): return hash return bytes.fromhex(hash) @functools.lru_cache() def bytehex_to_hash(hex): """Converts a hexadecimal bytes representation of a hash to that hash Args: hash (bytes): a :class:`bytes` containing the hexadecimal form of the hash encoded in ascii Returns: bytes: the :class:`bytes` form of the hash """ return hash_to_bytes(hex.decode()) diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py index 99bd78e..4b0efa5 100644 --- a/swh/model/tests/test_hashutil.py +++ b/swh/model/tests/test_hashutil.py @@ -1,359 +1,389 @@ -# Copyright (C) 2015-2017 The Software Heritage developers +# Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib import io import os import tempfile import unittest from nose.tools import istest from unittest.mock import patch from swh.model import hashutil class Hashutil(unittest.TestCase): def setUp(self): # Reset function cache hashutil._blake2_hash_cache = {} self.data = b'1984\n' self.hex_checksums = { 'sha1': '62be35bf00ff0c624f4a621e2ea5595a049e0731', 'sha1_git': '568aaf43d83b2c3df8067f3bedbb97d83260be6d', 'sha256': '26602113b4b9afd9d55466b08580d3c2' '4a9b50ee5b5866c0d91fab0e65907311', 'blake2s256': '63cfb259e1fdb485bc5c55749697a6b21ef31fb7445f6c78a' 'c9422f9f2dc8906', } self.checksums = { type: bytes.fromhex(cksum) for type, cksum in self.hex_checksums.items() } + self.bytehex_checksums = { + type: hashutil.hash_to_bytehex(cksum) + for type, cksum in self.checksums.items() + } + self.git_hex_checksums = { 'blob': self.hex_checksums['sha1_git'], 'tree': '5b2e883aa33d2efab98442693ea4dd5f1b8871b0', 'commit': '79e4093542e72f0fcb7cbd75cb7d270f9254aa8f', 'tag': 'd6bf62466f287b4d986c545890716ce058bddf67', } self.git_checksums = { type: bytes.fromhex(cksum) for type, cksum in self.git_hex_checksums.items() } @istest def hash_data(self): checksums = hashutil.hash_data(self.data) self.assertEqual(checksums, self.checksums) self.assertFalse('length' in checksums) @istest def hash_data_with_length(self): expected_checksums = self.checksums.copy() expected_checksums['length'] = len(self.data) - checksums = hashutil.hash_data(self.data, with_length=True) + algos = set(['length']).union(hashutil.DEFAULT_ALGORITHMS) + checksums = hashutil.hash_data(self.data, algorithms=algos) self.assertEqual(checksums, expected_checksums) self.assertTrue('length' in checksums) @istest def hash_data_unknown_hash(self): with self.assertRaises(ValueError) as cm: hashutil.hash_data(self.data, ['unknown-hash']) self.assertIn('Unexpected hashing algorithm', cm.exception.args[0]) self.assertIn('unknown-hash', cm.exception.args[0]) + @istest + def hash_data_unknown_hash_format(self): + with self.assertRaises(ValueError) as cm: + hashutil.hash_data( + self.data, hashutil.DEFAULT_ALGORITHMS, + hash_format='unknown-format') + + self.assertIn('Unexpected hash format', cm.exception.args[0]) + self.assertIn('unknown-format', cm.exception.args[0]) + @istest def hash_git_data(self): checksums = { git_type: hashutil.hash_git_data(self.data, git_type) for git_type in self.git_checksums } self.assertEqual(checksums, self.git_checksums) @istest def hash_git_data_unknown_git_type(self): with self.assertRaises(ValueError) as cm: hashutil.hash_git_data(self.data, 'unknown-git-type') self.assertIn('Unexpected git object type', cm.exception.args[0]) self.assertIn('unknown-git-type', cm.exception.args[0]) @istest def hash_file(self): fobj = io.BytesIO(self.data) checksums = hashutil.hash_file(fobj, length=len(self.data)) self.assertEqual(checksums, self.checksums) @istest def hash_file_hexdigest(self): fobj = io.BytesIO(self.data) - checksums = hashutil.hash_file(fobj, length=len(self.data), - hexdigest=True) + checksums = hashutil.hash_file( + fobj, length=len(self.data), hash_format='hex') self.assertEqual(checksums, self.hex_checksums) + @istest + def hash_file_bytehexdigest(self): + fobj = io.BytesIO(self.data) + checksums = hashutil.hash_file( + fobj, length=len(self.data), hash_format='bytehex') + self.assertEqual(checksums, self.bytehex_checksums) + @istest def hash_stream(self): class StreamStub: def __init__(self, data): self.data = data def iter_content(self): yield from io.BytesIO(self.data) - s = StreamStub(self.data) - checksums = hashutil.hash_stream(s, length=len(self.data), - hexdigest=True) + s = StreamStub(self.data).iter_content() + + def _readfn(s): + try: + return next(s) + except StopIteration: + return None + + checksums = hashutil.hash_stream( + s, readfn=_readfn, length=len(self.data), hash_format='hex') self.assertEqual(checksums, self.hex_checksums) @istest def hash_file_missing_length(self): fobj = io.BytesIO(self.data) with self.assertRaises(ValueError) as cm: hashutil.hash_file(fobj, algorithms=['sha1_git']) self.assertIn('Missing length', cm.exception.args[0]) @istest def hash_path(self): with tempfile.NamedTemporaryFile(delete=False) as f: f.write(self.data) hashes = hashutil.hash_path(f.name) os.remove(f.name) self.checksums['length'] = len(self.data) self.assertEquals(self.checksums, hashes) @istest def hash_to_hex(self): for type in self.checksums: hex = self.hex_checksums[type] hash = self.checksums[type] self.assertEquals(hashutil.hash_to_hex(hex), hex) self.assertEquals(hashutil.hash_to_hex(hash), hex) @istest def hash_to_bytes(self): for type in self.checksums: hex = self.hex_checksums[type] hash = self.checksums[type] self.assertEquals(hashutil.hash_to_bytes(hex), hash) self.assertEquals(hashutil.hash_to_bytes(hash), hash) @istest def hash_to_bytehex(self): for algo in self.checksums: self.assertEqual(self.hex_checksums[algo].encode('ascii'), hashutil.hash_to_bytehex(self.checksums[algo])) @istest def bytehex_to_hash(self): for algo in self.checksums: self.assertEqual(self.checksums[algo], hashutil.bytehex_to_hash( self.hex_checksums[algo].encode())) @istest def new_hash_unsupported_hashing_algorithm(self): try: hashutil._new_hash('blake2:10') except ValueError as e: self.assertEquals(str(e), 'Unexpected hashing algorithm blake2:10, ' 'expected one of blake2b512, blake2s256, ' 'sha1, sha1_git, sha256') @patch('hashlib.new') @istest def new_hash_blake2b_blake2b512_builtin(self, mock_hashlib_new): if 'blake2b512' not in hashlib.algorithms_available: self.skipTest('blake2b512 not built-in') mock_hashlib_new.return_value = sentinel = object() h = hashutil._new_hash('blake2b512') self.assertIs(h, sentinel) mock_hashlib_new.assert_called_with('blake2b512') @patch('hashlib.new') @istest def new_hash_blake2s_blake2s256_builtin(self, mock_hashlib_new): if 'blake2s256' not in hashlib.algorithms_available: self.skipTest('blake2s256 not built-in') mock_hashlib_new.return_value = sentinel = object() h = hashutil._new_hash('blake2s256') self.assertIs(h, sentinel) mock_hashlib_new.assert_called_with('blake2s256') @istest def new_hash_blake2b_builtin(self): removed_hash = False try: if 'blake2b512' in hashlib.algorithms_available: removed_hash = True hashlib.algorithms_available.remove('blake2b512') if 'blake2b' not in hashlib.algorithms_available: self.skipTest('blake2b not built in') with patch('hashlib.blake2b') as mock_blake2b: mock_blake2b.return_value = sentinel = object() h = hashutil._new_hash('blake2b512') self.assertIs(h, sentinel) mock_blake2b.assert_called_with(digest_size=512//8) finally: if removed_hash: hashlib.algorithms_available.add('blake2b512') @istest def new_hash_blake2s_builtin(self): removed_hash = False try: if 'blake2s256' in hashlib.algorithms_available: removed_hash = True hashlib.algorithms_available.remove('blake2s256') if 'blake2s' not in hashlib.algorithms_available: self.skipTest('blake2s not built in') with patch('hashlib.blake2s') as mock_blake2s: mock_blake2s.return_value = sentinel = object() h = hashutil._new_hash('blake2s256') self.assertIs(h, sentinel) mock_blake2s.assert_called_with(digest_size=256//8) finally: if removed_hash: hashlib.algorithms_available.add('blake2s256') @istest def new_hash_blake2b_pyblake2(self): if 'blake2b512' in hashlib.algorithms_available: self.skipTest('blake2b512 built in') if 'blake2b' in hashlib.algorithms_available: self.skipTest('blake2b built in') with patch('pyblake2.blake2b') as mock_blake2b: mock_blake2b.return_value = sentinel = object() h = hashutil._new_hash('blake2b512') self.assertIs(h, sentinel) mock_blake2b.assert_called_with(digest_size=512//8) @istest def new_hash_blake2s_pyblake2(self): if 'blake2s256' in hashlib.algorithms_available: self.skipTest('blake2s256 built in') if 'blake2s' in hashlib.algorithms_available: self.skipTest('blake2s built in') with patch('pyblake2.blake2s') as mock_blake2s: mock_blake2s.return_value = sentinel = object() h = hashutil._new_hash('blake2s256') self.assertIs(h, sentinel) mock_blake2s.assert_called_with(digest_size=256//8) class HashlibGit(unittest.TestCase): def setUp(self): self.blob_data = b'42\n' self.tree_data = b''.join([b'40000 barfoo\0', bytes.fromhex('c3020f6bf135a38c6df' '3afeb5fb38232c5e07087'), b'100644 blah\0', bytes.fromhex('63756ef0df5e4f10b6efa' '33cfe5c758749615f20'), b'100644 hello\0', bytes.fromhex('907b308167f0880fb2a' '5c0e1614bb0c7620f9dc3')]) self.commit_data = """tree 1c61f7259dcb770f46b194d941df4f08ff0a3970 author Antoine R. Dumont (@ardumont) 1444054085 +0200 committer Antoine R. Dumont (@ardumont) 1444054085 +0200 initial """.encode('utf-8') # NOQA self.tag_data = """object 24d012aaec0bc5a4d2f62c56399053d6cc72a241 type commit tag 0.0.1 tagger Antoine R. Dumont (@ardumont) 1444225145 +0200 blah """.encode('utf-8') # NOQA self.checksums = { 'blob_sha1_git': bytes.fromhex('d81cc0710eb6cf9efd5b920a8453e1' 'e07157b6cd'), 'tree_sha1_git': bytes.fromhex('ac212302c45eada382b27bfda795db' '121dacdb1c'), 'commit_sha1_git': bytes.fromhex('e960570b2e6e2798fa4cfb9af2c399' 'd629189653'), 'tag_sha1_git': bytes.fromhex('bc2b99ba469987bcf1272c189ed534' 'e9e959f120'), } @istest def unknown_header_type(self): with self.assertRaises(ValueError) as cm: hashutil.hash_git_data(b'any-data', 'some-unknown-type') self.assertIn('Unexpected git object type', cm.exception.args[0]) @istest def hashdata_content(self): # when actual_hash = hashutil.hash_git_data(self.blob_data, git_type='blob') # then self.assertEqual(actual_hash, self.checksums['blob_sha1_git']) @istest def hashdata_tree(self): # when actual_hash = hashutil.hash_git_data(self.tree_data, git_type='tree') # then self.assertEqual(actual_hash, self.checksums['tree_sha1_git']) @istest def hashdata_revision(self): # when actual_hash = hashutil.hash_git_data(self.commit_data, git_type='commit') # then self.assertEqual(actual_hash, self.checksums['commit_sha1_git']) @istest def hashdata_tag(self): # when actual_hash = hashutil.hash_git_data(self.tag_data, git_type='tag') # then self.assertEqual(actual_hash, self.checksums['tag_sha1_git'])