diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py index 339b72f..ee29b6e 100644 --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -1,440 +1,435 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of hashing function definitions. This is the base module use to compute swh's hashes. Only a subset of hashing algorithms is supported as defined in the ALGORITHMS set. Any provided algorithms not in that list will result in a ValueError explaining the error. This modules defines the following hashing functions: - hash_file: Hash the contents of the given file object with the given algorithms (defaulting to DEFAULT_ALGORITHMS if none provided). - hash_data: Hash the given binary blob with the given algorithms (defaulting to DEFAULT_ALGORITHMS if none provided). - hash_path: Hash the contents of the file at the given path with the given algorithms (defaulting to DEFAULT_ALGORITHMS if none provided). """ import binascii import functools import hashlib import os from io import BytesIO ALGORITHMS = set(['sha1', 'sha256', 'sha1_git', 'blake2s256', 'blake2b512']) """Hashing algorithms supported by this module""" DEFAULT_ALGORITHMS = set(['sha1', 'sha256', 'sha1_git', 'blake2s256']) """Algorithms computed by default when calling the functions from this module. Subset of :const:`ALGORITHMS`. """ HASH_BLOCK_SIZE = 32768 """Block size for streaming hash computations made in this module""" _blake2_hash_cache = {} HASH_FORMATS = set(['bytes', 'bytehex', 'hex']) """Supported output hash formats """ EXTRA_LENGTH = set(['length']) """Extra information to compute """ class MultiHash: """Hashutil class to support multiple hashes computation. Args: hash_names (set): Set of hash algorithms (+ length) to compute hashes (cf. DEFAULT_ALGORITHMS) length (int): Length of the total sum of chunks to read If the length is provided as algorithm, the length is also computed and returned. """ def __init__(self, hash_names=DEFAULT_ALGORITHMS, length=None): self.state = {} self.track_length = False for name in hash_names: if name == 'length': self.state['length'] = 0 self.track_length = True else: self.state[name] = _new_hash(name, length) @classmethod def from_state(cls, state, track_length): ret = cls([]) ret.state = state ret.track_length = track_length @classmethod def from_file(cls, file, hash_names=DEFAULT_ALGORITHMS, length=None): ret = cls(length=length, hash_names=hash_names) for chunk in file: ret.update(chunk) return ret @classmethod def from_path(cls, path, hash_names=DEFAULT_ALGORITHMS, length=None, track_length=True): if not length: length = os.path.getsize(path) # For compatibility reason with `hash_path` if track_length: hash_names = hash_names.union(EXTRA_LENGTH) with open(path, 'rb') as f: return cls.from_file(f, hash_names=hash_names, length=length) @classmethod def from_data(cls, data, hash_names=DEFAULT_ALGORITHMS, length=None): if not length: length = len(data) fobj = BytesIO(data) return cls.from_file(fobj, hash_names=hash_names, length=length) def update(self, chunk): for name, h in self.state.items(): if name == 'length': continue h.update(chunk) if self.track_length: self.state['length'] += len(chunk) def digest(self): return { name: h.digest() if name != 'length' else h for name, h in self.state.items() } def hexdigest(self): return { name: h.hexdigest() if name != 'length' else h for name, h in self.state.items() } def bytehexdigest(self): return { name: hash_to_bytehex(h.digest()) if name != 'length' else h for name, h in self.state.items() } def copy(self): copied_state = { name: h.copy() if name != 'length' else h for name, h in self.state.items() } return self.from_state(copied_state, self.track_length) def _new_blake2_hash(algo): """Return a function that initializes a blake2 hash. """ if algo in _blake2_hash_cache: return _blake2_hash_cache[algo]() lalgo = algo.lower() if not lalgo.startswith('blake2'): raise ValueError('Algorithm %s is not a blake2 hash' % algo) blake_family = lalgo[:7] digest_size = None if lalgo[7:]: try: digest_size, remainder = divmod(int(lalgo[7:]), 8) except ValueError: raise ValueError( 'Unknown digest size for algo %s' % algo ) from None if remainder: raise ValueError( 'Digest size for algorithm %s must be a multiple of 8' % algo ) if lalgo in hashlib.algorithms_available: # Handle the case where OpenSSL ships the given algorithm # (e.g. Python 3.5 on Debian 9 stretch) _blake2_hash_cache[algo] = lambda: hashlib.new(lalgo) else: # Try using the built-in implementation for Python 3.6+ if blake_family in hashlib.algorithms_available: blake2 = getattr(hashlib, blake_family) else: import pyblake2 blake2 = getattr(pyblake2, blake_family) _blake2_hash_cache[algo] = lambda: blake2(digest_size=digest_size) return _blake2_hash_cache[algo]() def _new_hashlib_hash(algo): """Initialize a digest object from hashlib. Handle the swh-specific names for the blake2-related algorithms """ if algo.startswith('blake2'): return _new_blake2_hash(algo) else: return hashlib.new(algo) def _new_git_hash(base_algo, git_type, length): """Initialize a digest object (as returned by python's hashlib) for the requested algorithm, and feed it with the header for a git object of the given type and length. The header for hashing a git object consists of: - The type of the object (encoded in ASCII) - One ASCII space (\x20) - The length of the object (decimal encoded in ASCII) - One NUL byte Args: base_algo (str from :const:`ALGORITHMS`): a hashlib-supported algorithm git_type: the type of the git object (supposedly one of 'blob', 'commit', 'tag', 'tree') length: the length of the git object you're encoding Returns: a hashutil.hash object """ h = _new_hashlib_hash(base_algo) git_header = '%s %d\0' % (git_type, length) h.update(git_header.encode('ascii')) return h def _new_hash(algo, length=None): """Initialize a digest object (as returned by python's hashlib) for the requested algorithm. See the constant ALGORITHMS for the list of supported algorithms. If a git-specific hashing algorithm is requested (e.g., "sha1_git"), the hashing object will be pre-fed with the needed header; for this to work, length must be given. Args: algo (str): a hashing algorithm (one of ALGORITHMS) length (int): the length of the hashed payload (needed for git-specific algorithms) Returns: a hashutil.hash object Raises: ValueError if algo is unknown, or length is missing for a git-specific hash. """ if algo not in ALGORITHMS: raise ValueError( 'Unexpected hashing algorithm %s, expected one of %s' % (algo, ', '.join(sorted(ALGORITHMS)))) if algo.endswith('_git'): if length is None: raise ValueError('Missing length for git hashing algorithm') base_algo = algo[:-4] return _new_git_hash(base_algo, 'blob', length) return _new_hashlib_hash(algo) def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS, - chunk_cb=None, hash_format='bytes'): - """Hash the contents of the given file object with the given algorithms. + chunk_cb=None): + """(Deprecated) cf. MultiHash.from_file + + Hash the contents of the given file object with the given algorithms. Args: fobj: a file-like object length: the length of the contents of the file-like object (for the git-specific algorithms) algorithms: the hashing algorithms to be used, as an iterable over strings hash_format (str): Format required for the output of the computed hashes (cf. HASH_FORMATS) Returns: a dict mapping each algorithm to a digest (bytes by default). Raises: ValueError if: algorithms contains an unknown hash algorithm. hash_format is an unknown hash format """ - if hash_format not in HASH_FORMATS: - raise ValueError('Unexpected hash format %s, expected one of %s' % ( - hash_format, HASH_FORMATS)) - h = MultiHash(algorithms, length) while True: chunk = fobj.read(HASH_BLOCK_SIZE) if not chunk: break h.update(chunk) if chunk_cb: chunk_cb(chunk) - if hash_format == 'bytes': - return h.digest() - if hash_format == 'bytehex': - return h.bytehexdigest() - return h.hexdigest() + return h.digest() def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None, - hash_format='bytes', track_length=True): - """Hash the contents of the file at the given path with the given - algorithms. + track_length=True): + """(deprecated) cf. MultiHash.from_path + + Hash the contents of the file at the given path with the given + algorithms. Args: path (str): the path of the file to hash algorithms (set): the hashing algorithms used chunk_cb (def): a callback hash_format (str): Format required for the output of the computed hashes (cf. HASH_FORMATS) Returns: a dict mapping each algorithm to a bytes digest. Raises: ValueError if: algorithms contains an unknown hash algorithm. hash_format is an unknown hash format OSError on file access error """ if track_length: algorithms = set(['length']).union(algorithms) length = os.path.getsize(path) with open(path, 'rb') as fobj: - return hash_file(fobj, length, algorithms, chunk_cb=chunk_cb, - hash_format=hash_format) + return hash_file(fobj, length, algorithms, chunk_cb=chunk_cb) + +def hash_data(data, algorithms=DEFAULT_ALGORITHMS): + """(deprecated) cf. MultiHash.from_data -def hash_data(data, algorithms=DEFAULT_ALGORITHMS, hash_format='bytes'): - """Hash the given binary blob with the given algorithms. + Hash the given binary blob with the given algorithms. Args: data (bytes): raw content to hash algorithms (list): the hashing algorithms used hash_format (str): Format required for the output of the computed hashes (cf. HASH_FORMATS) Returns: a dict mapping each algorithm to a bytes digest Raises: TypeError if data does not support the buffer interface. ValueError if: algorithms contains an unknown hash algorithm. hash_format is an unknown hash format """ - fobj = BytesIO(data) - length = len(data) - return hash_file(fobj, length, algorithms, hash_format=hash_format) + return MultiHash.from_data(data, hash_names=algorithms).digest() def hash_git_data(data, git_type, base_algo='sha1'): """Hash the given data as a git object of type git_type. Args: data: a bytes object git_type: the git object type base_algo: the base hashing algorithm used (default: sha1) Returns: a dict mapping each algorithm to a bytes digest Raises: ValueError if the git_type is unexpected. """ git_object_types = {'blob', 'tree', 'commit', 'tag', 'snapshot'} if git_type not in git_object_types: raise ValueError('Unexpected git object type %s, expected one of %s' % (git_type, ', '.join(sorted(git_object_types)))) h = _new_git_hash(base_algo, git_type, len(data)) h.update(data) return h.digest() @functools.lru_cache() def hash_to_hex(hash): """Converts a hash (in hex or bytes form) to its hexadecimal ascii form Args: hash (str or bytes): a :class:`bytes` hash or a :class:`str` containing the hexadecimal form of the hash Returns: str: the hexadecimal form of the hash """ if isinstance(hash, str): return hash return binascii.hexlify(hash).decode('ascii') @functools.lru_cache() def hash_to_bytehex(hash): """Converts a hash to its hexadecimal bytes representation Args: hash (bytes): a :class:`bytes` hash Returns: bytes: the hexadecimal form of the hash, as :class:`bytes` """ return binascii.hexlify(hash) @functools.lru_cache() def hash_to_bytes(hash): """Converts a hash (in hex or bytes form) to its raw bytes form Args: hash (str or bytes): a :class:`bytes` hash or a :class:`str` containing the hexadecimal form of the hash Returns: bytes: the :class:`bytes` form of the hash """ if isinstance(hash, bytes): return hash return bytes.fromhex(hash) @functools.lru_cache() def bytehex_to_hash(hex): """Converts a hexadecimal bytes representation of a hash to that hash Args: hash (bytes): a :class:`bytes` containing the hexadecimal form of the hash encoded in ascii Returns: bytes: the :class:`bytes` form of the hash """ return hash_to_bytes(hex.decode()) diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py index d288149..92b3684 100644 --- a/swh/model/tests/test_hashutil.py +++ b/swh/model/tests/test_hashutil.py @@ -1,438 +1,414 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib import io import os import tempfile import unittest from nose.tools import istest from unittest.mock import patch from swh.model import hashutil from swh.model.hashutil import MultiHash class BaseHashutil(unittest.TestCase): def setUp(self): # Reset function cache hashutil._blake2_hash_cache = {} self.data = b'1984\n' self.hex_checksums = { 'sha1': '62be35bf00ff0c624f4a621e2ea5595a049e0731', 'sha1_git': '568aaf43d83b2c3df8067f3bedbb97d83260be6d', 'sha256': '26602113b4b9afd9d55466b08580d3c2' '4a9b50ee5b5866c0d91fab0e65907311', 'blake2s256': '63cfb259e1fdb485bc5c55749697a6b21ef31fb7445f6c78a' 'c9422f9f2dc8906', } self.checksums = { type: bytes.fromhex(cksum) for type, cksum in self.hex_checksums.items() } self.bytehex_checksums = { type: hashutil.hash_to_bytehex(cksum) for type, cksum in self.checksums.items() } self.git_hex_checksums = { 'blob': self.hex_checksums['sha1_git'], 'tree': '5b2e883aa33d2efab98442693ea4dd5f1b8871b0', 'commit': '79e4093542e72f0fcb7cbd75cb7d270f9254aa8f', 'tag': 'd6bf62466f287b4d986c545890716ce058bddf67', } self.git_checksums = { type: bytes.fromhex(cksum) for type, cksum in self.git_hex_checksums.items() } class MultiHashTest(BaseHashutil): @istest def multi_hash_data(self): checksums = MultiHash.from_data(self.data).digest() self.assertEqual(checksums, self.checksums) self.assertFalse('length' in checksums) @istest def multi_hash_data_with_length(self): expected_checksums = self.checksums.copy() expected_checksums['length'] = len(self.data) algos = set(['length']).union(hashutil.DEFAULT_ALGORITHMS) checksums = MultiHash.from_data(self.data, hash_names=algos).digest() self.assertEqual(checksums, expected_checksums) self.assertTrue('length' in checksums) @istest def multi_hash_data_unknown_hash(self): with self.assertRaises(ValueError) as cm: MultiHash.from_data(self.data, ['unknown-hash']) self.assertIn('Unexpected hashing algorithm', cm.exception.args[0]) self.assertIn('unknown-hash', cm.exception.args[0]) @istest def multi_hash_file(self): fobj = io.BytesIO(self.data) checksums = MultiHash.from_file(fobj, length=len(self.data)).digest() self.assertEqual(checksums, self.checksums) @istest def multi_hash_file_hexdigest(self): fobj = io.BytesIO(self.data) length = len(self.data) checksums = MultiHash.from_file(fobj, length=length).hexdigest() self.assertEqual(checksums, self.hex_checksums) @istest def multi_hash_file_bytehexdigest(self): fobj = io.BytesIO(self.data) length = len(self.data) checksums = MultiHash.from_file(fobj, length=length).bytehexdigest() self.assertEqual(checksums, self.bytehex_checksums) @istest def multi_hash_file_missing_length(self): fobj = io.BytesIO(self.data) with self.assertRaises(ValueError) as cm: MultiHash.from_file(fobj, hash_names=['sha1_git']) self.assertIn('Missing length', cm.exception.args[0]) @istest def multi_hash_path(self): with tempfile.NamedTemporaryFile(delete=False) as f: f.write(self.data) hashes = MultiHash.from_path(f.name).digest() os.remove(f.name) self.checksums['length'] = len(self.data) self.assertEquals(self.checksums, hashes) class Hashutil(BaseHashutil): @istest def hash_data(self): checksums = hashutil.hash_data(self.data) self.assertEqual(checksums, self.checksums) self.assertFalse('length' in checksums) @istest def hash_data_with_length(self): expected_checksums = self.checksums.copy() expected_checksums['length'] = len(self.data) algos = set(['length']).union(hashutil.DEFAULT_ALGORITHMS) checksums = hashutil.hash_data(self.data, algorithms=algos) self.assertEqual(checksums, expected_checksums) self.assertTrue('length' in checksums) @istest def hash_data_unknown_hash(self): with self.assertRaises(ValueError) as cm: hashutil.hash_data(self.data, ['unknown-hash']) self.assertIn('Unexpected hashing algorithm', cm.exception.args[0]) self.assertIn('unknown-hash', cm.exception.args[0]) - @istest - def hash_data_unknown_hash_format(self): - with self.assertRaises(ValueError) as cm: - hashutil.hash_data( - self.data, hashutil.DEFAULT_ALGORITHMS, - hash_format='unknown-format') - - self.assertIn('Unexpected hash format', cm.exception.args[0]) - self.assertIn('unknown-format', cm.exception.args[0]) - @istest def hash_git_data(self): checksums = { git_type: hashutil.hash_git_data(self.data, git_type) for git_type in self.git_checksums } self.assertEqual(checksums, self.git_checksums) @istest def hash_git_data_unknown_git_type(self): with self.assertRaises(ValueError) as cm: hashutil.hash_git_data(self.data, 'unknown-git-type') self.assertIn('Unexpected git object type', cm.exception.args[0]) self.assertIn('unknown-git-type', cm.exception.args[0]) @istest def hash_file(self): fobj = io.BytesIO(self.data) checksums = hashutil.hash_file(fobj, length=len(self.data)) self.assertEqual(checksums, self.checksums) - @istest - def hash_file_hexdigest(self): - fobj = io.BytesIO(self.data) - checksums = hashutil.hash_file( - fobj, length=len(self.data), hash_format='hex') - self.assertEqual(checksums, self.hex_checksums) - - @istest - def hash_file_bytehexdigest(self): - fobj = io.BytesIO(self.data) - checksums = hashutil.hash_file( - fobj, length=len(self.data), hash_format='bytehex') - self.assertEqual(checksums, self.bytehex_checksums) - @istest def hash_file_missing_length(self): fobj = io.BytesIO(self.data) with self.assertRaises(ValueError) as cm: hashutil.hash_file(fobj, algorithms=['sha1_git']) self.assertIn('Missing length', cm.exception.args[0]) @istest def hash_path(self): with tempfile.NamedTemporaryFile(delete=False) as f: f.write(self.data) hashes = hashutil.hash_path(f.name) os.remove(f.name) self.checksums['length'] = len(self.data) self.assertEquals(self.checksums, hashes) @istest def hash_to_hex(self): for type in self.checksums: hex = self.hex_checksums[type] hash = self.checksums[type] self.assertEquals(hashutil.hash_to_hex(hex), hex) self.assertEquals(hashutil.hash_to_hex(hash), hex) @istest def hash_to_bytes(self): for type in self.checksums: hex = self.hex_checksums[type] hash = self.checksums[type] self.assertEquals(hashutil.hash_to_bytes(hex), hash) self.assertEquals(hashutil.hash_to_bytes(hash), hash) @istest def hash_to_bytehex(self): for algo in self.checksums: self.assertEqual(self.hex_checksums[algo].encode('ascii'), hashutil.hash_to_bytehex(self.checksums[algo])) @istest def bytehex_to_hash(self): for algo in self.checksums: self.assertEqual(self.checksums[algo], hashutil.bytehex_to_hash( self.hex_checksums[algo].encode())) @istest def new_hash_unsupported_hashing_algorithm(self): try: hashutil._new_hash('blake2:10') except ValueError as e: self.assertEquals(str(e), 'Unexpected hashing algorithm blake2:10, ' 'expected one of blake2b512, blake2s256, ' 'sha1, sha1_git, sha256') @patch('hashlib.new') @istest def new_hash_blake2b_blake2b512_builtin(self, mock_hashlib_new): if 'blake2b512' not in hashlib.algorithms_available: self.skipTest('blake2b512 not built-in') mock_hashlib_new.return_value = sentinel = object() h = hashutil._new_hash('blake2b512') self.assertIs(h, sentinel) mock_hashlib_new.assert_called_with('blake2b512') @patch('hashlib.new') @istest def new_hash_blake2s_blake2s256_builtin(self, mock_hashlib_new): if 'blake2s256' not in hashlib.algorithms_available: self.skipTest('blake2s256 not built-in') mock_hashlib_new.return_value = sentinel = object() h = hashutil._new_hash('blake2s256') self.assertIs(h, sentinel) mock_hashlib_new.assert_called_with('blake2s256') @istest def new_hash_blake2b_builtin(self): removed_hash = False try: if 'blake2b512' in hashlib.algorithms_available: removed_hash = True hashlib.algorithms_available.remove('blake2b512') if 'blake2b' not in hashlib.algorithms_available: self.skipTest('blake2b not built in') with patch('hashlib.blake2b') as mock_blake2b: mock_blake2b.return_value = sentinel = object() h = hashutil._new_hash('blake2b512') self.assertIs(h, sentinel) mock_blake2b.assert_called_with(digest_size=512//8) finally: if removed_hash: hashlib.algorithms_available.add('blake2b512') @istest def new_hash_blake2s_builtin(self): removed_hash = False try: if 'blake2s256' in hashlib.algorithms_available: removed_hash = True hashlib.algorithms_available.remove('blake2s256') if 'blake2s' not in hashlib.algorithms_available: self.skipTest('blake2s not built in') with patch('hashlib.blake2s') as mock_blake2s: mock_blake2s.return_value = sentinel = object() h = hashutil._new_hash('blake2s256') self.assertIs(h, sentinel) mock_blake2s.assert_called_with(digest_size=256//8) finally: if removed_hash: hashlib.algorithms_available.add('blake2s256') @istest def new_hash_blake2b_pyblake2(self): if 'blake2b512' in hashlib.algorithms_available: self.skipTest('blake2b512 built in') if 'blake2b' in hashlib.algorithms_available: self.skipTest('blake2b built in') with patch('pyblake2.blake2b') as mock_blake2b: mock_blake2b.return_value = sentinel = object() h = hashutil._new_hash('blake2b512') self.assertIs(h, sentinel) mock_blake2b.assert_called_with(digest_size=512//8) @istest def new_hash_blake2s_pyblake2(self): if 'blake2s256' in hashlib.algorithms_available: self.skipTest('blake2s256 built in') if 'blake2s' in hashlib.algorithms_available: self.skipTest('blake2s built in') with patch('pyblake2.blake2s') as mock_blake2s: mock_blake2s.return_value = sentinel = object() h = hashutil._new_hash('blake2s256') self.assertIs(h, sentinel) mock_blake2s.assert_called_with(digest_size=256//8) class HashlibGit(unittest.TestCase): def setUp(self): self.blob_data = b'42\n' self.tree_data = b''.join([b'40000 barfoo\0', bytes.fromhex('c3020f6bf135a38c6df' '3afeb5fb38232c5e07087'), b'100644 blah\0', bytes.fromhex('63756ef0df5e4f10b6efa' '33cfe5c758749615f20'), b'100644 hello\0', bytes.fromhex('907b308167f0880fb2a' '5c0e1614bb0c7620f9dc3')]) self.commit_data = """tree 1c61f7259dcb770f46b194d941df4f08ff0a3970 author Antoine R. Dumont (@ardumont) 1444054085 +0200 committer Antoine R. Dumont (@ardumont) 1444054085 +0200 initial """.encode('utf-8') # NOQA self.tag_data = """object 24d012aaec0bc5a4d2f62c56399053d6cc72a241 type commit tag 0.0.1 tagger Antoine R. Dumont (@ardumont) 1444225145 +0200 blah """.encode('utf-8') # NOQA self.checksums = { 'blob_sha1_git': bytes.fromhex('d81cc0710eb6cf9efd5b920a8453e1' 'e07157b6cd'), 'tree_sha1_git': bytes.fromhex('ac212302c45eada382b27bfda795db' '121dacdb1c'), 'commit_sha1_git': bytes.fromhex('e960570b2e6e2798fa4cfb9af2c399' 'd629189653'), 'tag_sha1_git': bytes.fromhex('bc2b99ba469987bcf1272c189ed534' 'e9e959f120'), } @istest def unknown_header_type(self): with self.assertRaises(ValueError) as cm: hashutil.hash_git_data(b'any-data', 'some-unknown-type') self.assertIn('Unexpected git object type', cm.exception.args[0]) @istest def hashdata_content(self): # when actual_hash = hashutil.hash_git_data(self.blob_data, git_type='blob') # then self.assertEqual(actual_hash, self.checksums['blob_sha1_git']) @istest def hashdata_tree(self): # when actual_hash = hashutil.hash_git_data(self.tree_data, git_type='tree') # then self.assertEqual(actual_hash, self.checksums['tree_sha1_git']) @istest def hashdata_revision(self): # when actual_hash = hashutil.hash_git_data(self.commit_data, git_type='commit') # then self.assertEqual(actual_hash, self.checksums['commit_sha1_git']) @istest def hashdata_tag(self): # when actual_hash = hashutil.hash_git_data(self.tag_data, git_type='tag') # then self.assertEqual(actual_hash, self.checksums['tag_sha1_git'])