diff --git a/swh/storage/pytest_plugin.py b/swh/storage/pytest_plugin.py --- a/swh/storage/pytest_plugin.py +++ b/swh/storage/pytest_plugin.py @@ -211,9 +211,8 @@ """ return { - "content": [data.cont, data.cont2], - "content_no_data": [data.cont3], - "skipped_content": [data.skipped_cont, data.skipped_cont2], + "content": [data.content, data.content2, data.content3], + "skipped_content": [data.skipped_content, data.skipped_content2], "person": [data.person], "directory": [data.dir2, data.dir, data.dir3, data.dir4], "revision": [data.revision, data.revision2, data.revision3, data.revision4], @@ -240,7 +239,6 @@ # changed but not the endpoints yet) OBJECT_FACTORY = { "content": Content.from_dict, - "content_no_data": Content.from_dict, "skipped_content": SkippedContent.from_dict, "person": Person.from_dict, "directory": Directory.from_dict, diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py --- a/swh/storage/tests/storage_data.py +++ b/swh/storage/tests/storage_data.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2019 The Software Heritage developers +# Copyright (C) 2015-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -11,11 +11,13 @@ from swh.model import from_disk from swh.model.identifiers import parse_swhid from swh.model.model import ( + Content, MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - RawExtrinsicMetadata, MetadataTargetType, + RawExtrinsicMetadata, + SkippedContent, ) @@ -33,96 +35,92 @@ data = StorageData() -cont = { - "data": b"42\n", - "length": 3, - "sha1": hash_to_bytes("34973274ccef6ab4dfaaf86599792fa9c3fe4689"), - "sha1_git": hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), - "sha256": hash_to_bytes( +content = Content( + data=b"42\n", + length=3, + sha1=hash_to_bytes("34973274ccef6ab4dfaaf86599792fa9c3fe4689"), + sha1_git=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), + sha256=hash_to_bytes( "673650f936cb3b0a2f93ce09d81be10748b1b203c19e8176b4eefc1964a0cf3a" ), - "blake2s256": hash_to_bytes( + blake2s256=hash_to_bytes( "d5fe1939576527e42cfd76a9455a2432fe7f56669564577dd93c4280e76d661d" ), - "status": "visible", -} + status="visible", +) -cont2 = { - "data": b"4242\n", - "length": 5, - "sha1": hash_to_bytes("61c2b3a30496d329e21af70dd2d7e097046d07b7"), - "sha1_git": hash_to_bytes("36fade77193cb6d2bd826161a0979d64c28ab4fa"), - "sha256": hash_to_bytes( +content2 = Content( + data=b"4242\n", + length=5, + sha1=hash_to_bytes("61c2b3a30496d329e21af70dd2d7e097046d07b7"), + sha1_git=hash_to_bytes("36fade77193cb6d2bd826161a0979d64c28ab4fa"), + sha256=hash_to_bytes( "859f0b154fdb2d630f45e1ecae4a862915435e663248bb8461d914696fc047cd" ), - "blake2s256": hash_to_bytes( + blake2s256=hash_to_bytes( "849c20fad132b7c2d62c15de310adfe87be94a379941bed295e8141c6219810d" ), - "status": "visible", -} + status="visible", +) -cont3 = { - "data": b"424242\n", - "length": 7, - "sha1": hash_to_bytes("3e21cc4942a4234c9e5edd8a9cacd1670fe59f13"), - "sha1_git": hash_to_bytes("c932c7649c6dfa4b82327d121215116909eb3bea"), - "sha256": hash_to_bytes( +content3 = Content( + data=b"424242\n", + length=7, + sha1=hash_to_bytes("3e21cc4942a4234c9e5edd8a9cacd1670fe59f13"), + sha1_git=hash_to_bytes("c932c7649c6dfa4b82327d121215116909eb3bea"), + sha256=hash_to_bytes( "92fb72daf8c6818288a35137b72155f507e5de8d892712ab96277aaed8cf8a36" ), - "blake2s256": hash_to_bytes( + blake2s256=hash_to_bytes( "76d0346f44e5a27f6bafdd9c2befd304aff83780f93121d801ab6a1d4769db11" ), - "status": "visible", - "ctime": "2019-12-01 00:00:00Z", -} - -contents = (cont, cont2, cont3) + status="visible", + ctime=datetime.datetime(2019, 12, 1, tzinfo=datetime.timezone.utc), +) -missing_cont = { - "length": 8, - "sha1": hash_to_bytes("f9c24e2abb82063a3ba2c44efd2d3c797f28ac90"), - "sha1_git": hash_to_bytes("33e45d56f88993aae6a0198013efa80716fd8919"), - "sha256": hash_to_bytes( +missing_content = Content( + data=b"something missing", + length=8, + sha1=hash_to_bytes("f9c24e2abb82063a3ba2c44efd2d3c797f28ac90"), + sha1_git=hash_to_bytes("33e45d56f88993aae6a0198013efa80716fd8919"), + sha256=hash_to_bytes( "6bbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a" ), - "blake2s256": hash_to_bytes( + blake2s256=hash_to_bytes( "306856b8fd879edb7b6f1aeaaf8db9bbecc993cd7f776c333ac3a782fa5c6eba" ), - "reason": "Content too long", - "status": "absent", -} + status="visible", +) -skipped_cont = { - "length": 1024 * 1024 * 200, - "sha1_git": hash_to_bytes("33e45d56f88993aae6a0198013efa80716fd8920"), - "sha1": hash_to_bytes("43e45d56f88993aae6a0198013efa80716fd8920"), - "sha256": hash_to_bytes( +skipped_content = SkippedContent( + length=1024 * 1024 * 200, + sha1_git=hash_to_bytes("33e45d56f88993aae6a0198013efa80716fd8920"), + sha1=hash_to_bytes("43e45d56f88993aae6a0198013efa80716fd8920"), + sha256=hash_to_bytes( "7bbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a" ), - "blake2s256": hash_to_bytes( + blake2s256=hash_to_bytes( "ade18b1adecb33f891ca36664da676e12c772cc193778aac9a137b8dc5834b9b" ), - "reason": "Content too long", - "status": "absent", - "origin": "file:///dev/zero", -} + reason="Content too long", + status="absent", + origin="file:///dev/zero", +) -skipped_cont2 = { - "length": 1024 * 1024 * 300, - "sha1_git": hash_to_bytes("44e45d56f88993aae6a0198013efa80716fd8921"), - "sha1": hash_to_bytes("54e45d56f88993aae6a0198013efa80716fd8920"), - "sha256": hash_to_bytes( +skipped_content2 = SkippedContent( + length=1024 * 1024 * 300, + sha1_git=hash_to_bytes("44e45d56f88993aae6a0198013efa80716fd8921"), + sha1=hash_to_bytes("54e45d56f88993aae6a0198013efa80716fd8920"), + sha256=hash_to_bytes( "8cbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a" ), - "blake2s256": hash_to_bytes( + blake2s256=hash_to_bytes( "9ce18b1adecb33f891ca36664da676e12c772cc193778aac9a137b8dc5834b9b" ), - "reason": "Content too long", - "status": "absent", -} - -skipped_contents = (skipped_cont, skipped_cont2) + reason="Content too long", + status="absent", +) dir = { @@ -131,7 +129,7 @@ { "name": b"foo", "type": "file", - "target": hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), # cont + "target": content.sha1_git, "perms": from_disk.DentryPerms.content, }, { @@ -149,9 +147,7 @@ { "name": b"oof", "type": "file", - "target": hash_to_bytes( # cont2 - "36fade77193cb6d2bd826161a0979d64c28ab4fa" - ), + "target": content2.sha1_git, "perms": from_disk.DentryPerms.content, }, ), @@ -163,7 +159,7 @@ { "name": b"foo", "type": "file", - "target": hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), # cont + "target": content.sha1_git, "perms": from_disk.DentryPerms.content, }, { @@ -505,7 +501,7 @@ content_metadata = RawExtrinsicMetadata( type=MetadataTargetType.CONTENT, - id=parse_swhid(f"swh:1:cnt:{hash_to_hex(cont['sha1_git'])}"), + id=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), origin=origin["url"], discovery_date=datetime.datetime( 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc @@ -517,7 +513,7 @@ ) content_metadata2 = RawExtrinsicMetadata( type=MetadataTargetType.CONTENT, - id=parse_swhid(f"swh:1:cnt:{hash_to_hex(cont['sha1_git'])}"), + id=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), origin=origin2["url"], discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc @@ -529,7 +525,7 @@ ) content_metadata3 = RawExtrinsicMetadata( type=MetadataTargetType.CONTENT, - id=parse_swhid(f"swh:1:cnt:{hash_to_hex(cont['sha1_git'])}"), + id=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), @@ -585,13 +581,3 @@ "email": b"john.doe@institute.org", "fullname": b"John Doe ", } - -objects = { - "content": contents, - "skipped_content": skipped_contents, - "directory": directories, - "revision": revisions, - "origin": origins, - "release": releases, - "snapshot": snapshots, -} diff --git a/swh/storage/tests/test_buffer.py b/swh/storage/tests/test_buffer.py --- a/swh/storage/tests/test_buffer.py +++ b/swh/storage/tests/test_buffer.py @@ -16,11 +16,11 @@ def test_buffering_proxy_storage_content_threshold_not_hit(sample_data_model): - contents = sample_data_model["content"] + contents = sample_data_model["content"][:2] contents_dict = [c.to_dict() for c in contents] storage = get_storage_with_buffer_config(min_batch_size={"content": 10,}) - s = storage.content_add([contents[0], contents[1]]) + s = storage.content_add(contents) assert s == {} # contents have not been written to storage @@ -57,7 +57,7 @@ def test_buffering_proxy_storage_content_deduplicate(sample_data_model): - contents = sample_data_model["content"] + contents = sample_data_model["content"][:2] storage = get_storage_with_buffer_config(min_batch_size={"content": 2,}) s = storage.content_add([contents[0], contents[0]]) @@ -80,7 +80,7 @@ def test_buffering_proxy_storage_content_threshold_bytes_hit(sample_data_model): - contents = sample_data_model["content"] + contents = sample_data_model["content"][:2] content_bytes_min_batch_size = 2 storage = get_storage_with_buffer_config( min_batch_size={"content": 10, "content_bytes": content_bytes_min_batch_size,} diff --git a/swh/storage/tests/test_pytest_plugin.py b/swh/storage/tests/test_pytest_plugin.py --- a/swh/storage/tests/test_pytest_plugin.py +++ b/swh/storage/tests/test_pytest_plugin.py @@ -15,7 +15,6 @@ assert set(sample_data.keys()) == set( [ "content", - "content_no_data", "skipped_content", "person", "directory", @@ -32,6 +31,8 @@ ) for object_type, objs in sample_data.items(): if object_type in [ + "content", + "skipped_content", "fetcher", "authority", "origin_metadata", @@ -49,7 +50,6 @@ assert set(sample_data_model.keys()) == set( [ "content", - "content_no_data", "skipped_content", "person", "directory", diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py --- a/swh/storage/tests/test_retry.py +++ b/swh/storage/tests/test_retry.py @@ -3,6 +3,8 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import attr + from unittest.mock import call import psycopg2 @@ -117,13 +119,14 @@ """Standard content_add_metadata works as before """ - sample_content = sample_data_model["content_no_data"][0] + sample_content = sample_data_model["content"][0] + content = attr.evolve(sample_content, data=None) - pk = sample_content.sha1 + pk = content.sha1 content_metadata = swh_storage.content_get_metadata([pk]) assert not content_metadata[pk] - s = swh_storage.content_add_metadata([sample_content]) + s = swh_storage.content_add_metadata([content]) assert s == { "content:add": 1, } @@ -151,13 +154,14 @@ {"content:add": 1}, ] - sample_content = sample_data_model["content_no_data"][0] + sample_content = sample_data_model["content"][0] + content = attr.evolve(sample_content, data=None) - s = swh_storage.content_add_metadata([sample_content]) + s = swh_storage.content_add_metadata([content]) assert s == {"content:add": 1} mock_memory.assert_has_calls( - [call([sample_content]), call([sample_content]), call([sample_content]),] + [call([content]), call([content]), call([content]),] ) @@ -174,14 +178,15 @@ "Refuse to add content_metadata!" ) - sample_content = sample_data_model["content_no_data"][0] - pk = sample_content.sha1 + sample_content = sample_data_model["content"][0] + content = attr.evolve(sample_content, data=None) + pk = content.sha1 content_metadata = swh_storage.content_get_metadata([pk]) assert not content_metadata[pk] with pytest.raises(StorageArgumentException, match="Refuse to add"): - swh_storage.content_add_metadata([sample_content]) + swh_storage.content_add_metadata([content]) assert mock_memory.call_count == 1 diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -36,7 +36,6 @@ Person, Release, Revision, - SkippedContent, Snapshot, ) from swh.model.hypothesis_strategies import objects @@ -107,7 +106,8 @@ class LazyContent(Content): def with_data(self): - return Content.from_dict({**self.to_dict(), "data": data.cont["data"]}) + raw_data = data.content.data + return Content.from_dict({**self.to_dict(), "data": raw_data}) class TestStorage: @@ -470,14 +470,13 @@ ) def test_content_missing(self, swh_storage, algos): algos |= {"sha1"} - cont = Content.from_dict(data.cont2) - missing_cont = SkippedContent.from_dict(data.missing_cont) - swh_storage.content_add([cont]) + content, missing_content = [data.content2, data.missing_content] + swh_storage.content_add([content]) - test_contents = [cont.to_dict()] + test_contents = [content.to_dict()] missing_per_hash = defaultdict(list) for i in range(256): - test_content = missing_cont.to_dict() + test_content = missing_content.to_dict() for hash in algos: test_content[hash] = bytes([i]) + test_content[hash][1:] missing_per_hash[hash].append(test_content[hash]) @@ -501,14 +500,13 @@ ) def test_content_missing_unknown_algo(self, swh_storage, algos): algos |= {"sha1"} - cont = Content.from_dict(data.cont2) - missing_cont = SkippedContent.from_dict(data.missing_cont) - swh_storage.content_add([cont]) + content, missing_content = [data.content2, data.missing_content] + swh_storage.content_add([content]) - test_contents = [cont.to_dict()] + test_contents = [content.to_dict()] missing_per_hash = defaultdict(list) for i in range(16): - test_content = missing_cont.to_dict() + test_content = missing_content.to_dict() for hash in algos: test_content[hash] = bytes([i]) + test_content[hash][1:] missing_per_hash[hash].append(test_content[hash]) @@ -651,8 +649,7 @@ assert tuple(actual_contents[missing_cont.sha1]) == () def test_content_get_random(self, swh_storage, sample_data_model): - cont, cont2 = sample_data_model["content"][:2] - cont3 = sample_data_model["content_no_data"][0] + cont, cont2, cont3 = sample_data_model["content"][:3] swh_storage.content_add([cont, cont2, cont3]) assert swh_storage.content_get_random() in { @@ -2915,7 +2912,7 @@ def test_content_find_with_non_present_content( self, swh_storage, sample_data_model ): - missing_content = sample_data_model["content_no_data"][0] + missing_content = sample_data_model["skipped_content"][0] # 1. with something that does not exist actually_present = swh_storage.content_find({"sha1": missing_content.sha1})