diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -48,21 +48,6 @@ f.write(chunk) hashes = h.hexdigest() # returns a dict of {hash_algo_name: hash_in_hex} - Note: Prior to this, we would have to use chunk_cb (cf. hash_file, - hash_path) - - -This module also defines the following (deprecated) hashing functions: - -- hash_file: Hash the contents of the given file object with the given - algorithms (defaulting to DEFAULT_ALGORITHMS if none provided). - -- hash_data: Hash the given binary blob with the given algorithms - (defaulting to DEFAULT_ALGORITHMS if none provided). - -- hash_path: Hash the contents of the file at the given path with the - given algorithms (defaulting to DEFAULT_ALGORITHMS if none - provided). """ @@ -290,84 +275,6 @@ return _new_hashlib_hash(algo) -def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS, - chunk_cb=None): - """(Deprecated) cf. MultiHash.from_file - - Hash the contents of the given file object with the given algorithms. - - Args: - fobj: a file-like object - length (int): the length of the contents of the file-like - object (for the git-specific algorithms) - algorithms (set): the hashing algorithms to be used, as an - iterable over strings - chunk_cb (fun): a callback function taking a chunk of data as - parameter - - Returns: - a dict mapping each algorithm to a digest (bytes by default). - - Raises: - ValueError if algorithms contains an unknown hash algorithm. - - """ - h = MultiHash(algorithms, length) - while True: - chunk = fobj.read(HASH_BLOCK_SIZE) - if not chunk: - break - h.update(chunk) - if chunk_cb: - chunk_cb(chunk) - - return h.digest() - - -def hash_path(path, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None): - """(deprecated) cf. MultiHash.from_path - - Hash the contents of the file at the given path with the given - algorithms. - - Args: - path (str): the path of the file to hash - algorithms (set): the hashing algorithms used - chunk_cb (fun): a callback function taking a chunk of data as parameter - - Returns: a dict mapping each algorithm to a bytes digest. - - Raises: - ValueError if algorithms contains an unknown hash algorithm. - OSError on file access error - - """ - length = os.path.getsize(path) - with open(path, 'rb') as fobj: - hashes = hash_file(fobj, length, algorithms, chunk_cb=chunk_cb) - hashes['length'] = length - return hashes - - -def hash_data(data, algorithms=DEFAULT_ALGORITHMS): - """(deprecated) cf. MultiHash.from_data - - Hash the given binary blob with the given algorithms. - - Args: - data (bytes): raw content to hash - algorithms (set): the hashing algorithms used - - Returns: a dict mapping each algorithm to a bytes digest - - Raises: - TypeError if data does not support the buffer interface. - ValueError if algorithms contains an unknown hash algorithm. - - """ - return MultiHash.from_data(data, hash_names=algorithms).digest() - - def hash_git_data(data, git_type, base_algo='sha1'): """Hash the given data as a git object of type git_type. diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py --- a/swh/model/tests/test_hashutil.py +++ b/swh/model/tests/test_hashutil.py @@ -111,27 +111,6 @@ class Hashutil(BaseHashutil): - def test_hash_data(self): - checksums = hashutil.hash_data(self.data) - self.assertEqual(checksums, self.checksums) - self.assertFalse('length' in checksums) - - def test_hash_data_with_length(self): - expected_checksums = self.checksums.copy() - expected_checksums['length'] = len(self.data) - - algos = set(['length']).union(hashutil.DEFAULT_ALGORITHMS) - checksums = hashutil.hash_data(self.data, algorithms=algos) - - self.assertEqual(checksums, expected_checksums) - self.assertTrue('length' in checksums) - - def test_hash_data_unknown_hash(self): - with self.assertRaises(ValueError) as cm: - hashutil.hash_data(self.data, ['unknown-hash']) - - self.assertIn('Unexpected hashing algorithm', cm.exception.args[0]) - self.assertIn('unknown-hash', cm.exception.args[0]) def test_hash_git_data(self): checksums = { @@ -148,30 +127,6 @@ self.assertIn('Unexpected git object type', cm.exception.args[0]) self.assertIn('unknown-git-type', cm.exception.args[0]) - def test_hash_file(self): - fobj = io.BytesIO(self.data) - - checksums = hashutil.hash_file(fobj, length=len(self.data)) - self.assertEqual(checksums, self.checksums) - - def test_hash_file_missing_length(self): - fobj = io.BytesIO(self.data) - - with self.assertRaises(ValueError) as cm: - hashutil.hash_file(fobj, algorithms=['sha1_git']) - - self.assertIn('Missing length', cm.exception.args[0]) - - def test_hash_path(self): - with tempfile.NamedTemporaryFile(delete=False) as f: - f.write(self.data) - - hashes = hashutil.hash_path(f.name) - os.remove(f.name) - - self.checksums['length'] = len(self.data) - self.assertEqual(self.checksums, hashes) - def test_hash_to_hex(self): for type in self.checksums: hex = self.hex_checksums[type] diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -103,7 +103,8 @@ tzinfo=datetime.timezone.utc), } - self.content_id = hashutil.hash_data(self.content['data']) + self.content_id = hashutil.MultiHash.from_data( + self.content['data']).digest() def test_content_identifier(self): self.assertEqual(identifiers.content_identifier(self.content), diff --git a/swh/model/tests/test_validators.py b/swh/model/tests/test_validators.py --- a/swh/model/tests/test_validators.py +++ b/swh/model/tests/test_validators.py @@ -9,6 +9,10 @@ from swh.model import exceptions, hashutil, validators +def hash_data(raw_content): + return hashutil.MultiHash.from_data(raw_content).digest() + + class TestValidators(unittest.TestCase): def setUp(self): self.valid_visible_content = { @@ -20,7 +24,7 @@ } self.valid_visible_content.update( - hashutil.hash_data(self.valid_visible_content['data'])) + hash_data(self.valid_visible_content['data'])) self.valid_absent_content = { 'status': 'absent', @@ -34,7 +38,7 @@ self.invalid_content_hash_mismatch = self.valid_visible_content.copy() self.invalid_content_hash_mismatch.update( - hashutil.hash_data(b"this is not the data you're looking for")) + hash_data(b"this is not the data you're looking for")) def test_validate_content(self): self.assertTrue(