diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py --- a/swh/model/tests/test_hashutil.py +++ b/swh/model/tests/test_hashutil.py @@ -8,9 +8,10 @@ import io import os import tempfile -import unittest from unittest.mock import patch +import pytest + from swh.model import hashutil from swh.model.hashutil import MultiHash @@ -26,13 +27,16 @@ hashutil._blake2_hash_cache.clear() -class BaseHashutil(unittest.TestCase): - def setUp(self): - # Reset function cache - hashutil._blake2_hash_cache = {} +@pytest.fixture +def hash_test_data(): + + # Reset function cache + hashutil._blake2_hash_cache = {} + + class HashTestData: - self.data = b"1984\n" - self.hex_checksums = { + data = b"1984\n" + hex_checksums = { "sha1": "62be35bf00ff0c624f4a621e2ea5595a049e0731", "sha1_git": "568aaf43d83b2c3df8067f3bedbb97d83260be6d", "sha256": "26602113b4b9afd9d55466b08580d3c2" @@ -41,238 +45,252 @@ "c9422f9f2dc8906", } - self.checksums = { - type: bytes.fromhex(cksum) for type, cksum in self.hex_checksums.items() + checksums = { + type: bytes.fromhex(cksum) for type, cksum in hex_checksums.items() } - self.bytehex_checksums = { - type: hashutil.hash_to_bytehex(cksum) - for type, cksum in self.checksums.items() + bytehex_checksums = { + type: hashutil.hash_to_bytehex(cksum) for type, cksum in checksums.items() } - self.git_hex_checksums = { - "blob": self.hex_checksums["sha1_git"], + git_hex_checksums = { + "blob": hex_checksums["sha1_git"], "tree": "5b2e883aa33d2efab98442693ea4dd5f1b8871b0", "commit": "79e4093542e72f0fcb7cbd75cb7d270f9254aa8f", "tag": "d6bf62466f287b4d986c545890716ce058bddf67", } - self.git_checksums = { - type: bytes.fromhex(cksum) for type, cksum in self.git_hex_checksums.items() + git_checksums = { + type: bytes.fromhex(cksum) for type, cksum in git_hex_checksums.items() } + return HashTestData -class MultiHashTest(BaseHashutil): - def test_multi_hash_data(self): - checksums = MultiHash.from_data(self.data).digest() - self.assertEqual(checksums, self.checksums) - self.assertFalse("length" in checksums) - def test_multi_hash_data_with_length(self): - expected_checksums = self.checksums.copy() - expected_checksums["length"] = len(self.data) +def test_multi_hash_data(hash_test_data): + checksums = MultiHash.from_data(hash_test_data.data).digest() + assert checksums == hash_test_data.checksums + assert "length" not in checksums - algos = set(["length"]).union(hashutil.DEFAULT_ALGORITHMS) - checksums = MultiHash.from_data(self.data, hash_names=algos).digest() - self.assertEqual(checksums, expected_checksums) - self.assertTrue("length" in checksums) +def test_multi_hash_data_with_length(hash_test_data): + expected_checksums = hash_test_data.checksums.copy() + expected_checksums["length"] = len(hash_test_data.data) - def test_multi_hash_data_unknown_hash(self): - with self.assertRaises(ValueError) as cm: - MultiHash.from_data(self.data, ["unknown-hash"]) + algos = set(["length"]).union(hashutil.DEFAULT_ALGORITHMS) + checksums = MultiHash.from_data(hash_test_data.data, hash_names=algos).digest() - self.assertIn("Unexpected hashing algorithm", cm.exception.args[0]) - self.assertIn("unknown-hash", cm.exception.args[0]) + assert checksums == expected_checksums + assert "length" in checksums - def test_multi_hash_file(self): - fobj = io.BytesIO(self.data) - checksums = MultiHash.from_file(fobj, length=len(self.data)).digest() - self.assertEqual(checksums, self.checksums) +def test_multi_hash_data_unknown_hash(hash_test_data): + with pytest.raises(ValueError, match="Unexpected hashing algorithm.*unknown-hash"): + MultiHash.from_data(hash_test_data.data, ["unknown-hash"]) - def test_multi_hash_file_hexdigest(self): - fobj = io.BytesIO(self.data) - length = len(self.data) - checksums = MultiHash.from_file(fobj, length=length).hexdigest() - self.assertEqual(checksums, self.hex_checksums) - def test_multi_hash_file_bytehexdigest(self): - fobj = io.BytesIO(self.data) - length = len(self.data) - checksums = MultiHash.from_file(fobj, length=length).bytehexdigest() - self.assertEqual(checksums, self.bytehex_checksums) +def test_multi_hash_file(hash_test_data): + fobj = io.BytesIO(hash_test_data.data) - def test_multi_hash_file_missing_length(self): - fobj = io.BytesIO(self.data) - with self.assertRaises(ValueError) as cm: - MultiHash.from_file(fobj, hash_names=["sha1_git"]) + checksums = MultiHash.from_file(fobj, length=len(hash_test_data.data)).digest() + assert checksums == hash_test_data.checksums - self.assertIn("Missing length", cm.exception.args[0]) - def test_multi_hash_path(self): - with tempfile.NamedTemporaryFile(delete=False) as f: - f.write(self.data) +def test_multi_hash_file_hexdigest(hash_test_data): + fobj = io.BytesIO(hash_test_data.data) + length = len(hash_test_data.data) + checksums = MultiHash.from_file(fobj, length=length).hexdigest() + assert checksums == hash_test_data.hex_checksums - hashes = MultiHash.from_path(f.name).digest() - os.remove(f.name) - self.assertEqual(self.checksums, hashes) +def test_multi_hash_file_bytehexdigest(hash_test_data): + fobj = io.BytesIO(hash_test_data.data) + length = len(hash_test_data.data) + checksums = MultiHash.from_file(fobj, length=length).bytehexdigest() + assert checksums == hash_test_data.bytehex_checksums -class Hashutil(BaseHashutil): - def test_hash_git_data(self): - checksums = { - git_type: hashutil.hash_git_data(self.data, git_type) - for git_type in self.git_checksums - } +def test_multi_hash_file_missing_length(hash_test_data): + fobj = io.BytesIO(hash_test_data.data) + with pytest.raises(ValueError, match="Missing length"): + MultiHash.from_file(fobj, hash_names=["sha1_git"]) - self.assertEqual(checksums, self.git_checksums) - - def test_hash_git_data_unknown_git_type(self): - with self.assertRaises(ValueError) as cm: - hashutil.hash_git_data(self.data, "unknown-git-type") - - self.assertIn("Unexpected git object type", cm.exception.args[0]) - self.assertIn("unknown-git-type", cm.exception.args[0]) - - def test_hash_to_hex(self): - for type in self.checksums: - hex = self.hex_checksums[type] - hash = self.checksums[type] - self.assertEqual(hashutil.hash_to_hex(hex), hex) - self.assertEqual(hashutil.hash_to_hex(hash), hex) - - def test_hash_to_bytes(self): - for type in self.checksums: - hex = self.hex_checksums[type] - hash = self.checksums[type] - self.assertEqual(hashutil.hash_to_bytes(hex), hash) - self.assertEqual(hashutil.hash_to_bytes(hash), hash) - - def test_hash_to_bytehex(self): - for algo in self.checksums: - self.assertEqual( - self.hex_checksums[algo].encode("ascii"), - hashutil.hash_to_bytehex(self.checksums[algo]), - ) - - def test_bytehex_to_hash(self): - for algo in self.checksums: - self.assertEqual( - self.checksums[algo], - hashutil.bytehex_to_hash(self.hex_checksums[algo].encode()), - ) - - def test_new_hash_unsupported_hashing_algorithm(self): - try: - hashutil._new_hash("blake2:10") - except ValueError as e: - self.assertEqual( - str(e), - "Unexpected hashing algorithm blake2:10, " - "expected one of blake2b512, blake2s256, " - "sha1, sha1_git, sha256", - ) - - @patch("hashlib.new") - def test_new_hash_blake2b_blake2b512_builtin(self, mock_hashlib_new): - if "blake2b512" not in hashlib.algorithms_available: - self.skipTest("blake2b512 not built-in") - mock_hashlib_new.return_value = sentinel = object() - h = hashutil._new_hash("blake2b512") +def test_multi_hash_path(hash_test_data): + with tempfile.NamedTemporaryFile(delete=False) as f: + f.write(hash_test_data.data) - self.assertIs(h, sentinel) - mock_hashlib_new.assert_called_with("blake2b512") + hashes = MultiHash.from_path(f.name).digest() + os.remove(f.name) - @patch("hashlib.new") - def test_new_hash_blake2s_blake2s256_builtin(self, mock_hashlib_new): - if "blake2s256" not in hashlib.algorithms_available: - self.skipTest("blake2s256 not built-in") - mock_hashlib_new.return_value = sentinel = object() + assert hash_test_data.checksums == hashes - h = hashutil._new_hash("blake2s256") - self.assertIs(h, sentinel) - mock_hashlib_new.assert_called_with("blake2s256") +def test_hash_git_data(hash_test_data): + checksums = { + git_type: hashutil.hash_git_data(hash_test_data.data, git_type) + for git_type in hash_test_data.git_checksums + } + + assert checksums == hash_test_data.git_checksums - def test_new_hash_blake2b_builtin(self): - removed_hash = False - try: - if "blake2b512" in hashlib.algorithms_available: - removed_hash = True - hashlib.algorithms_available.remove("blake2b512") - if "blake2b" not in hashlib.algorithms_available: - self.skipTest("blake2b not built in") +def test_hash_git_data_unknown_git_type(hash_test_data): + with pytest.raises( + ValueError, match="Unexpected git object type.*unknown-git-type" + ): + hashutil.hash_git_data(hash_test_data.data, "unknown-git-type") - with patch_blake2("hashlib.blake2b") as mock_blake2b: - mock_blake2b.return_value = sentinel = object() - h = hashutil._new_hash("blake2b512") +def test_hash_to_hex(hash_test_data): + for type in hash_test_data.checksums: + hex = hash_test_data.hex_checksums[type] + hash = hash_test_data.checksums[type] + assert hashutil.hash_to_hex(hex) == hex + assert hashutil.hash_to_hex(hash) == hex - self.assertIs(h, sentinel) - mock_blake2b.assert_called_with(digest_size=512 // 8) - finally: - if removed_hash: - hashlib.algorithms_available.add("blake2b512") - def test_new_hash_blake2s_builtin(self): - removed_hash = False +def test_hash_to_bytes(hash_test_data): + for type in hash_test_data.checksums: + hex = hash_test_data.hex_checksums[type] + hash = hash_test_data.checksums[type] + assert hashutil.hash_to_bytes(hex) == hash + assert hashutil.hash_to_bytes(hash) == hash - try: - if "blake2s256" in hashlib.algorithms_available: - removed_hash = True - hashlib.algorithms_available.remove("blake2s256") - if "blake2s" not in hashlib.algorithms_available: - self.skipTest("blake2s not built in") - with patch_blake2("hashlib.blake2s") as mock_blake2s: - mock_blake2s.return_value = sentinel = object() +def test_hash_to_bytehex(hash_test_data): + for algo in hash_test_data.checksums: + hex_checksum = hash_test_data.hex_checksums[algo].encode("ascii") + assert hex_checksum == hashutil.hash_to_bytehex(hash_test_data.checksums[algo]) + + +def test_bytehex_to_hash(hash_test_data): + for algo in hash_test_data.checksums: + assert hash_test_data.checksums[algo] == hashutil.bytehex_to_hash( + hash_test_data.hex_checksums[algo].encode() + ) - h = hashutil._new_hash("blake2s256") - self.assertIs(h, sentinel) - mock_blake2s.assert_called_with(digest_size=256 // 8) - finally: - if removed_hash: - hashlib.algorithms_available.add("blake2s256") +def test_new_hash_unsupported_hashing_algorithm(): + expected_message = ( + "Unexpected hashing algorithm blake2:10, " + "expected one of blake2b512, blake2s256, " + "sha1, sha1_git, sha256" + ) + with pytest.raises(ValueError, match=expected_message): + hashutil._new_hash("blake2:10") - def test_new_hash_blake2b_pyblake2(self): + +@pytest.mark.skipif( + "blake2b512" not in hashlib.algorithms_available, reason="blake2b512 not built-in" +) +@patch("hashlib.new") +def test_new_hash_blake2b_blake2b512_builtin(mock_hashlib_new): + mock_hashlib_new.return_value = sentinel = object() + + h = hashutil._new_hash("blake2b512") + + assert h is sentinel + mock_hashlib_new.assert_called_with("blake2b512") + + +@pytest.mark.skipif( + "blake2s256" not in hashlib.algorithms_available, reason="blake2s256 not built-in" +) +@patch("hashlib.new") +def test_new_hash_blake2s_blake2s256_builtin(mock_hashlib_new): + mock_hashlib_new.return_value = sentinel = object() + + h = hashutil._new_hash("blake2s256") + + assert h is sentinel + mock_hashlib_new.assert_called_with("blake2s256") + + +@pytest.mark.skipif( + "blake2b" not in hashlib.algorithms_available, reason="blake2b not built-in" +) +def test_new_hash_blake2b_builtin(hash_test_data): + removed_hash = False + + try: if "blake2b512" in hashlib.algorithms_available: - self.skipTest("blake2b512 built in") - if "blake2b" in hashlib.algorithms_available: - self.skipTest("blake2b built in") + removed_hash = True + hashlib.algorithms_available.remove("blake2b512") - with patch_blake2("pyblake2.blake2b") as mock_blake2b: + with patch_blake2("hashlib.blake2b") as mock_blake2b: mock_blake2b.return_value = sentinel = object() h = hashutil._new_hash("blake2b512") - self.assertIs(h, sentinel) + assert h is sentinel mock_blake2b.assert_called_with(digest_size=512 // 8) + finally: + if removed_hash: + hashlib.algorithms_available.add("blake2b512") + + +@pytest.mark.skipif( + "blake2s" not in hashlib.algorithms_available, reason="blake2s not built-in" +) +def test_new_hash_blake2s_builtin(): + removed_hash = False - def test_new_hash_blake2s_pyblake2(self): + try: if "blake2s256" in hashlib.algorithms_available: - self.skipTest("blake2s256 built in") - if "blake2s" in hashlib.algorithms_available: - self.skipTest("blake2s built in") + removed_hash = True + hashlib.algorithms_available.remove("blake2s256") - with patch_blake2("pyblake2.blake2s") as mock_blake2s: + with patch_blake2("hashlib.blake2s") as mock_blake2s: mock_blake2s.return_value = sentinel = object() h = hashutil._new_hash("blake2s256") - self.assertIs(h, sentinel) + assert h is sentinel mock_blake2s.assert_called_with(digest_size=256 // 8) + finally: + if removed_hash: + hashlib.algorithms_available.add("blake2s256") + + +@pytest.mark.skipif( + "blake2b512" in hashlib.algorithms_available, reason="blake2b512 built-in" +) +@pytest.mark.skipif( + "blake2b" in hashlib.algorithms_available, reason="blake2b built-in" +) +def test_new_hash_blake2b_pyblake2(): + with patch_blake2("pyblake2.blake2b") as mock_blake2b: + mock_blake2b.return_value = sentinel = object() + + h = hashutil._new_hash("blake2b512") + + assert h is sentinel + mock_blake2b.assert_called_with(digest_size=512 // 8) + +@pytest.mark.skipif( + "blake2s256" in hashlib.algorithms_available, reason="blake2s256 built-in" +) +@pytest.mark.skipif( + "blake2s" in hashlib.algorithms_available, reason="blake2s built-in" +) +def test_new_hash_blake2s_pyblake2(): + with patch_blake2("pyblake2.blake2s") as mock_blake2s: + mock_blake2s.return_value = sentinel = object() + + h = hashutil._new_hash("blake2s256") + + assert h is sentinel + mock_blake2s.assert_called_with(digest_size=256 // 8) -class HashlibGit(unittest.TestCase): - def setUp(self): - self.blob_data = b"42\n" - self.tree_data = b"".join( +@pytest.fixture +def hashgit_test_data(): + class HashGitTestData: + blob_data = b"42\n" + + tree_data = b"".join( [ b"40000 barfoo\0", bytes.fromhex("c3020f6bf135a38c6df" "3afeb5fb38232c5e07087"), @@ -283,14 +301,15 @@ ] ) - self.commit_data = b"""\ + commit_data = b"""\ tree 1c61f7259dcb770f46b194d941df4f08ff0a3970 author Antoine R. Dumont (@ardumont) 1444054085 +0200 committer Antoine R. Dumont (@ardumont) 1444054085 +0200 initial """ # noqa - self.tag_data = """object 24d012aaec0bc5a4d2f62c56399053d6cc72a241 + + tag_data = """object 24d012aaec0bc5a4d2f62c56399053d6cc72a241 type commit tag 0.0.1 tagger Antoine R. Dumont (@ardumont) 1444225145 +0200 @@ -300,7 +319,7 @@ "utf-8" ) # NOQA - self.checksums = { + checksums = { "blob_sha1_git": bytes.fromhex( "d81cc0710eb6cf9efd5b920a8453e1" "e07157b6cd" ), @@ -315,36 +334,43 @@ ), } - def test_unknown_header_type(self): - with self.assertRaises(ValueError) as cm: - hashutil.hash_git_data(b"any-data", "some-unknown-type") + return HashGitTestData + + +def test_unknown_header_type(): + with pytest.raises(ValueError, match="Unexpected git object type"): + hashutil.hash_git_data(b"any-data", "some-unknown-type") + + +def test_hashdata_content(hashgit_test_data): + # when + actual_hash = hashutil.hash_git_data(hashgit_test_data.blob_data, git_type="blob") + + # then + assert actual_hash == hashgit_test_data.checksums["blob_sha1_git"] - self.assertIn("Unexpected git object type", cm.exception.args[0]) - def test_hashdata_content(self): - # when - actual_hash = hashutil.hash_git_data(self.blob_data, git_type="blob") +def test_hashdata_tree(hashgit_test_data): + # when + actual_hash = hashutil.hash_git_data(hashgit_test_data.tree_data, git_type="tree") - # then - self.assertEqual(actual_hash, self.checksums["blob_sha1_git"]) + # then + assert actual_hash == hashgit_test_data.checksums["tree_sha1_git"] - def test_hashdata_tree(self): - # when - actual_hash = hashutil.hash_git_data(self.tree_data, git_type="tree") - # then - self.assertEqual(actual_hash, self.checksums["tree_sha1_git"]) +def test_hashdata_revision(hashgit_test_data): + # when + actual_hash = hashutil.hash_git_data( + hashgit_test_data.commit_data, git_type="commit" + ) - def test_hashdata_revision(self): - # when - actual_hash = hashutil.hash_git_data(self.commit_data, git_type="commit") + # then + assert actual_hash == hashgit_test_data.checksums["commit_sha1_git"] - # then - self.assertEqual(actual_hash, self.checksums["commit_sha1_git"]) - def test_hashdata_tag(self): - # when - actual_hash = hashutil.hash_git_data(self.tag_data, git_type="tag") +def test_hashdata_tag(hashgit_test_data): + # when + actual_hash = hashutil.hash_git_data(hashgit_test_data.tag_data, git_type="tag") - # then - self.assertEqual(actual_hash, self.checksums["tag_sha1_git"]) + # then + assert actual_hash == hashgit_test_data.checksums["tag_sha1_git"]