diff --git a/swh/storage/pytest_plugin.py b/swh/storage/pytest_plugin.py --- a/swh/storage/pytest_plugin.py +++ b/swh/storage/pytest_plugin.py @@ -6,7 +6,7 @@ import glob from os import path, environ -from typing import Dict, Tuple, Union +from typing import Union import pytest @@ -16,9 +16,9 @@ from pytest_postgresql.janitor import DatabaseJanitor, psycopg2, Version from swh.core.utils import numfile_sortkey as sortkey -from swh.model.model import BaseModel from swh.storage import get_storage -from swh.storage.tests.storage_data import data + +from swh.storage.tests.storage_data import data, StorageData SQL_DIR = path.join(path.dirname(swh.storage.__file__), "sql") @@ -188,25 +188,12 @@ @pytest.fixture -def sample_data() -> Dict[str, Tuple[BaseModel]]: +def sample_data() -> StorageData: """Pre-defined sample storage object data to manipulate Returns: - Dict of data model objects (keys: content, directory, revision, release, person, - origin) + StorageData whose attribute keys are data model objects: contents, + directories, revisions, releases, origin """ - return { - "content": data.contents, - "skipped_content": data.skipped_contents, - "directory": data.directories, - "revision": data.revisions, - "release": data.releases, - "snapshot": data.snapshots, - "origin": data.origins, - "origin_visit": data.origin_visits, - "fetcher": data.fetchers, - "authority": data.authorities, - "origin_metadata": data.origin_metadata, - "content_metadata": data.content_metadata, - } + return data diff --git a/swh/storage/tests/algos/test_origin.py b/swh/storage/tests/algos/test_origin.py --- a/swh/storage/tests/algos/test_origin.py +++ b/swh/storage/tests/algos/test_origin.py @@ -13,7 +13,6 @@ from swh.storage.utils import now from swh.storage.tests.test_storage import round_to_milliseconds -from swh.storage.tests.storage_data import data def assert_list_eq(left, right, msg=None): @@ -92,8 +91,8 @@ assert origin_get_latest_visit_status(swh_storage, "unknown-origin") is None # unknown type so no result - origin = sample_data["origin"][0] - origin_visit = sample_data["origin_visit"][0] + origin = sample_data.origin + origin_visit = sample_data.origin_visit assert origin_visit.origin == origin.url swh_storage.origin_add([origin]) @@ -119,17 +118,21 @@ """Initialize storage with origin/origin-visit/origin-visit-status """ - snapshot = sample_data["snapshot"][2] - origin1, origin2 = sample_data["origin"][:2] + snapshot = sample_data.snapshots[2] + origin1, origin2 = sample_data.origins[:2] swh_storage.origin_add([origin1, origin2]) ov1, ov2 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin1.url, date=data.date_visit1, type=data.type_visit1, + origin=origin1.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ), OriginVisit( - origin=origin2.url, date=data.date_visit2, type=data.type_visit2, + origin=origin2.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ), ] ) @@ -138,14 +141,14 @@ date_now = now() date_now = round_to_milliseconds(date_now) - assert data.date_visit1 < data.date_visit2 - assert data.date_visit2 < date_now + assert sample_data.date_visit1 < sample_data.date_visit2 + assert sample_data.date_visit2 < date_now # origin visit status 1 for origin visit 1 ovs11 = OriginVisitStatus( origin=origin1.url, visit=ov1.visit, - date=data.date_visit1, + date=sample_data.date_visit1, status="partial", snapshot=None, ) @@ -153,7 +156,7 @@ ovs12 = OriginVisitStatus( origin=origin1.url, visit=ov1.visit, - date=data.date_visit2, + date=sample_data.date_visit2, status="ongoing", snapshot=None, ) @@ -161,7 +164,7 @@ ovs21 = OriginVisitStatus( origin=origin2.url, visit=ov2.visit, - date=data.date_visit2, + date=sample_data.date_visit2, status="ongoing", snapshot=None, ) @@ -194,36 +197,40 @@ # no visit for origin1 url with type_visit2 assert ( - origin_get_latest_visit_status(swh_storage, origin1.url, type=data.type_visit2) + origin_get_latest_visit_status( + swh_storage, origin1.url, type=sample_data.type_visit2 + ) is None ) # no visit for origin2 url with type_visit1 assert ( - origin_get_latest_visit_status(swh_storage, origin2.url, type=data.type_visit1) + origin_get_latest_visit_status( + swh_storage, origin2.url, type=sample_data.type_visit1 + ) is None ) # Two visits, both with no snapshot, take the most recent actual_ov1, actual_ovs12 = origin_get_latest_visit_status( - swh_storage, origin1.url, type=data.type_visit1 + swh_storage, origin1.url, type=sample_data.type_visit1 ) assert isinstance(actual_ov1, OriginVisit) assert isinstance(actual_ovs12, OriginVisitStatus) assert actual_ov1.origin == ov1.origin assert actual_ov1.visit == ov1.visit - assert actual_ov1.type == data.type_visit1 + assert actual_ov1.type == sample_data.type_visit1 assert actual_ovs12 == ovs12 # take the most recent visit with type_visit2 actual_ov2, actual_ovs22 = origin_get_latest_visit_status( - swh_storage, origin2.url, type=data.type_visit2 + swh_storage, origin2.url, type=sample_data.type_visit2 ) assert isinstance(actual_ov2, OriginVisit) assert isinstance(actual_ovs22, OriginVisitStatus) assert actual_ov2.origin == ov2.origin assert actual_ov2.visit == ov2.visit - assert actual_ov2.type == data.type_visit2 + assert actual_ov2.type == sample_data.type_visit2 assert actual_ovs22 == ovs22 @@ -247,7 +254,7 @@ ) assert actual_ov1.origin == ov1.origin assert actual_ov1.visit == ov1.visit - assert actual_ov1.type == data.type_visit1 + assert actual_ov1.type == sample_data.type_visit1 assert actual_ovs11 == ovs11 # both status exist, take the latest one @@ -256,14 +263,14 @@ ) assert actual_ov1.origin == ov1.origin assert actual_ov1.visit == ov1.visit - assert actual_ov1.type == data.type_visit1 + assert actual_ov1.type == sample_data.type_visit1 assert actual_ovs12 == ovs12 assert isinstance(actual_ov1, OriginVisit) assert isinstance(actual_ovs12, OriginVisitStatus) assert actual_ov1.origin == ov1.origin assert actual_ov1.visit == ov1.visit - assert actual_ov1.type == data.type_visit1 + assert actual_ov1.type == sample_data.type_visit1 assert actual_ovs12 == ovs12 # take the most recent visit with type_visit2 @@ -272,7 +279,7 @@ ) assert actual_ov2.origin == ov2.origin assert actual_ov2.visit == ov2.visit - assert actual_ov2.type == data.type_visit2 + assert actual_ov2.type == sample_data.type_visit2 assert actual_ovs22 == ovs22 @@ -301,7 +308,7 @@ # Add another visit swh_storage.origin_visit_add( - [OriginVisit(origin=origin2.url, date=date_now, type=data.type_visit2,),] + [OriginVisit(origin=origin2.url, date=date_now, type=sample_data.type_visit2,),] ) # Requiring the latest visit with a snapshot, we still find the previous visit diff --git a/swh/storage/tests/algos/test_snapshot.py b/swh/storage/tests/algos/test_snapshot.py --- a/swh/storage/tests/algos/test_snapshot.py +++ b/swh/storage/tests/algos/test_snapshot.py @@ -53,9 +53,9 @@ assert snapshot_get_latest(swh_storage, "unknown-origin") is None # no snapshot on origin visit so None - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) - origin_visit, origin_visit2 = sample_data["origin_visit"][:2] + origin_visit, origin_visit2 = sample_data.origin_visits[:2] assert origin_visit.origin == origin.url swh_storage.origin_visit_add([origin_visit]) @@ -67,7 +67,7 @@ # visit references a snapshot but the snapshot does not exist in backend for some # reason - complete_snapshot = sample_data["snapshot"][2] + complete_snapshot = sample_data.snapshots[2] swh_storage.origin_visit_status_add( [ OriginVisitStatus( @@ -85,10 +85,10 @@ def test_snapshot_get_latest(swh_storage, sample_data): - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) - visit1, visit2 = sample_data["origin_visit"][:2] + visit1, visit2 = sample_data.origin_visits[:2] assert visit1.origin == origin.url swh_storage.origin_visit_add([visit1]) @@ -96,7 +96,7 @@ visit_id = ov1["visit"] # Add snapshot to visit1, latest snapshot = visit 1 snapshot - complete_snapshot = sample_data["snapshot"][2] + complete_snapshot = sample_data.snapshots[2] swh_storage.snapshot_add([complete_snapshot]) swh_storage.origin_visit_status_add( diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py --- a/swh/storage/tests/storage_data.py +++ b/swh/storage/tests/storage_data.py @@ -36,13 +36,15 @@ class StorageData: + """Interface to the content of this module + + """ + def __getattr__(self, key): try: v = globals()[key] except KeyError as e: raise AttributeError(e.args[0]) - if hasattr(v, "copy"): - return v.copy() return v diff --git a/swh/storage/tests/test_buffer.py b/swh/storage/tests/test_buffer.py --- a/swh/storage/tests/test_buffer.py +++ b/swh/storage/tests/test_buffer.py @@ -16,7 +16,7 @@ def test_buffering_proxy_storage_content_threshold_not_hit(sample_data): - contents = sample_data["content"][:2] + contents = sample_data.contents[:2] contents_dict = [c.to_dict() for c in contents] storage = get_storage_with_buffer_config(min_batch_size={"content": 10,}) @@ -38,7 +38,7 @@ def test_buffering_proxy_storage_content_threshold_nb_hit(sample_data): - content = sample_data["content"][0] + content = sample_data.content content_dict = content.to_dict() storage = get_storage_with_buffer_config(min_batch_size={"content": 1,}) @@ -57,7 +57,7 @@ def test_buffering_proxy_storage_content_deduplicate(sample_data): - contents = sample_data["content"][:2] + contents = sample_data.contents[:2] storage = get_storage_with_buffer_config(min_batch_size={"content": 2,}) s = storage.content_add([contents[0], contents[0]]) @@ -80,7 +80,7 @@ def test_buffering_proxy_storage_content_threshold_bytes_hit(sample_data): - contents = sample_data["content"][:2] + contents = sample_data.contents[:2] content_bytes_min_batch_size = 2 storage = get_storage_with_buffer_config( min_batch_size={"content": 10, "content_bytes": content_bytes_min_batch_size,} @@ -102,7 +102,7 @@ def test_buffering_proxy_storage_skipped_content_threshold_not_hit(sample_data): - contents = sample_data["skipped_content"] + contents = sample_data.skipped_contents contents_dict = [c.to_dict() for c in contents] storage = get_storage_with_buffer_config(min_batch_size={"skipped_content": 10,}) s = storage.skipped_content_add([contents[0], contents[1]]) @@ -120,7 +120,7 @@ def test_buffering_proxy_storage_skipped_content_threshold_nb_hit(sample_data): - contents = sample_data["skipped_content"] + contents = sample_data.skipped_contents storage = get_storage_with_buffer_config(min_batch_size={"skipped_content": 1,}) s = storage.skipped_content_add([contents[0]]) @@ -134,7 +134,7 @@ def test_buffering_proxy_storage_skipped_content_deduplicate(sample_data): - contents = sample_data["skipped_content"][:2] + contents = sample_data.skipped_contents[:2] storage = get_storage_with_buffer_config(min_batch_size={"skipped_content": 2,}) s = storage.skipped_content_add([contents[0], contents[0]]) @@ -156,33 +156,32 @@ def test_buffering_proxy_storage_directory_threshold_not_hit(sample_data): - directories = sample_data["directory"] + directory = sample_data.directory storage = get_storage_with_buffer_config(min_batch_size={"directory": 10,}) - s = storage.directory_add([directories[0]]) + s = storage.directory_add([directory]) assert s == {} - directory_id = directories[0].id - missing_directories = storage.directory_missing([directory_id]) - assert list(missing_directories) == [directory_id] + missing_directories = storage.directory_missing([directory.id]) + assert list(missing_directories) == [directory.id] s = storage.flush() assert s == { "directory:add": 1, } - missing_directories = storage.directory_missing([directory_id]) + missing_directories = storage.directory_missing([directory.id]) assert list(missing_directories) == [] def test_buffering_proxy_storage_directory_threshold_hit(sample_data): - directories = sample_data["directory"] + directory = sample_data.directory storage = get_storage_with_buffer_config(min_batch_size={"directory": 1,}) - s = storage.directory_add([directories[0]]) + s = storage.directory_add([directory]) assert s == { "directory:add": 1, } - missing_directories = storage.directory_missing([directories[0].id]) + missing_directories = storage.directory_missing([directory.id]) assert list(missing_directories) == [] s = storage.flush() @@ -190,7 +189,7 @@ def test_buffering_proxy_storage_directory_deduplicate(sample_data): - directories = sample_data["directory"][:2] + directories = sample_data.directories[:2] storage = get_storage_with_buffer_config(min_batch_size={"directory": 2,}) s = storage.directory_add([directories[0], directories[0]]) @@ -212,33 +211,32 @@ def test_buffering_proxy_storage_revision_threshold_not_hit(sample_data): - revisions = sample_data["revision"] + revision = sample_data.revision storage = get_storage_with_buffer_config(min_batch_size={"revision": 10,}) - s = storage.revision_add([revisions[0]]) + s = storage.revision_add([revision]) assert s == {} - revision_id = revisions[0].id - missing_revisions = storage.revision_missing([revision_id]) - assert list(missing_revisions) == [revision_id] + missing_revisions = storage.revision_missing([revision.id]) + assert list(missing_revisions) == [revision.id] s = storage.flush() assert s == { "revision:add": 1, } - missing_revisions = storage.revision_missing([revision_id]) + missing_revisions = storage.revision_missing([revision.id]) assert list(missing_revisions) == [] def test_buffering_proxy_storage_revision_threshold_hit(sample_data): - revisions = sample_data["revision"] + revision = sample_data.revision storage = get_storage_with_buffer_config(min_batch_size={"revision": 1,}) - s = storage.revision_add([revisions[0]]) + s = storage.revision_add([revision]) assert s == { "revision:add": 1, } - missing_revisions = storage.revision_missing([revisions[0].id]) + missing_revisions = storage.revision_missing([revision.id]) assert list(missing_revisions) == [] s = storage.flush() @@ -246,7 +244,7 @@ def test_buffering_proxy_storage_revision_deduplicate(sample_data): - revisions = sample_data["revision"][:2] + revisions = sample_data.revisions[:2] storage = get_storage_with_buffer_config(min_batch_size={"revision": 2,}) s = storage.revision_add([revisions[0], revisions[0]]) @@ -268,7 +266,7 @@ def test_buffering_proxy_storage_release_threshold_not_hit(sample_data): - releases = sample_data["release"] + releases = sample_data.releases threshold = 10 assert len(releases) < threshold @@ -292,7 +290,7 @@ def test_buffering_proxy_storage_release_threshold_hit(sample_data): - releases = sample_data["release"] + releases = sample_data.releases threshold = 2 assert len(releases) > threshold @@ -314,7 +312,7 @@ def test_buffering_proxy_storage_release_deduplicate(sample_data): - releases = sample_data["release"][:2] + releases = sample_data.releases[:2] storage = get_storage_with_buffer_config(min_batch_size={"release": 2,}) s = storage.release_add([releases[0], releases[0]]) @@ -340,15 +338,15 @@ """ threshold = 10 - contents = sample_data["content"] + contents = sample_data.contents assert 0 < len(contents) < threshold - skipped_contents = sample_data["skipped_content"] + skipped_contents = sample_data.skipped_contents assert 0 < len(skipped_contents) < threshold - directories = sample_data["directory"] + directories = sample_data.directories assert 0 < len(directories) < threshold - revisions = sample_data["revision"] + revisions = sample_data.revisions assert 0 < len(revisions) < threshold - releases = sample_data["release"] + releases = sample_data.releases assert 0 < len(releases) < threshold storage = get_storage_with_buffer_config( diff --git a/swh/storage/tests/test_cassandra.py b/swh/storage/tests/test_cassandra.py --- a/swh/storage/tests/test_cassandra.py +++ b/swh/storage/tests/test_cassandra.py @@ -193,7 +193,7 @@ """ called = 0 - cont, cont2 = sample_data["content"][:2] + cont, cont2 = sample_data.contents[:2] # always return a token def mock_cgtfsh(algo, hash_): @@ -236,7 +236,7 @@ """ called = 0 - cont, cont2 = [attr.evolve(c, ctime=now()) for c in sample_data["content"][:2]] + cont, cont2 = [attr.evolve(c, ctime=now()) for c in sample_data.contents[:2]] # always return a token def mock_cgtfsh(algo, hash_): @@ -284,7 +284,7 @@ """ called = 0 - cont, cont2 = [attr.evolve(c, ctime=now()) for c in sample_data["content"][:2]] + cont, cont2 = [attr.evolve(c, ctime=now()) for c in sample_data.contents[:2]] # always return a token def mock_cgtfsh(algo, hash_): diff --git a/swh/storage/tests/test_filter.py b/swh/storage/tests/test_filter.py --- a/swh/storage/tests/test_filter.py +++ b/swh/storage/tests/test_filter.py @@ -21,7 +21,7 @@ def test_filtering_proxy_storage_content(swh_storage, sample_data): - sample_content = sample_data["content"][0] + sample_content = sample_data.content content = next(swh_storage.content_get([sample_content.sha1])) assert not content @@ -43,7 +43,7 @@ def test_filtering_proxy_storage_skipped_content(swh_storage, sample_data): - sample_content = sample_data["skipped_content"][0] + sample_content = sample_data.skipped_content sample_content_dict = sample_content.to_dict() content = next(swh_storage.skipped_content_missing([sample_content_dict])) @@ -67,7 +67,7 @@ swh_storage, sample_data ): sample_contents = [ - attr.evolve(c, sha1_git=None) for c in sample_data["skipped_content"] + attr.evolve(c, sha1_git=None) for c in sample_data.skipped_contents ] sample_content, sample_content2 = [c.to_dict() for c in sample_contents[:2]] @@ -92,7 +92,7 @@ def test_filtering_proxy_storage_revision(swh_storage, sample_data): - sample_revision = sample_data["revision"][0] + sample_revision = sample_data.revision revision = next(swh_storage.revision_get([sample_revision.id])) assert not revision @@ -112,7 +112,7 @@ def test_filtering_proxy_storage_directory(swh_storage, sample_data): - sample_directory = sample_data["directory"][0] + sample_directory = sample_data.directory directory = next(swh_storage.directory_missing([sample_directory.id])) assert directory diff --git a/swh/storage/tests/test_pytest_plugin.py b/swh/storage/tests/test_pytest_plugin.py --- a/swh/storage/tests/test_pytest_plugin.py +++ b/swh/storage/tests/test_pytest_plugin.py @@ -3,30 +3,30 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information - from swh.model.model import BaseModel from swh.storage.interface import StorageInterface def test_sample_data(sample_data): - assert set(sample_data.keys()) == set( - [ - "content", - "skipped_content", - "directory", - "revision", - "release", - "snapshot", - "origin", - "origin_visit", - "fetcher", - "authority", - "origin_metadata", - "content_metadata", - ] - ) - for object_type, objs in sample_data.items(): - for obj in objs: + """Ensure the most important part of the sample data fixtures (about dag model + objects) are BaseModel instances. + + """ + for attribute_key in [ + "contents", + "skipped_contents", + "directories", + "revisions", + "releases", + "snapshots", + "origins", + "origin_visits", + "fetchers", + "authorities", + "origin_metadata", + "content_metadata", + ]: + for obj in getattr(sample_data, attribute_key): assert isinstance(obj, BaseModel) diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py --- a/swh/storage/tests/test_retry.py +++ b/swh/storage/tests/test_retry.py @@ -51,8 +51,7 @@ """Standard content_add works as before """ - sample_content = sample_data["content"][0] - + sample_content = sample_data.content content = next(swh_storage.content_get([sample_content.sha1])) assert not content @@ -82,7 +81,7 @@ {"content:add": 1}, ] - sample_content = sample_data["content"][0] + sample_content = sample_data.content content = next(swh_storage.content_get([sample_content.sha1])) assert not content @@ -104,7 +103,7 @@ mock_memory = mocker.patch("swh.storage.in_memory.InMemoryStorage.content_add") mock_memory.side_effect = StorageArgumentException("Refuse to add content always!") - sample_content = sample_data["content"][0] + sample_content = sample_data.content content = next(swh_storage.content_get([sample_content.sha1])) assert not content @@ -119,7 +118,7 @@ """Standard content_add_metadata works as before """ - sample_content = sample_data["content"][0] + sample_content = sample_data.content content = attr.evolve(sample_content, data=None) pk = content.sha1 @@ -154,7 +153,7 @@ {"content:add": 1}, ] - sample_content = sample_data["content"][0] + sample_content = sample_data.content content = attr.evolve(sample_content, data=None) s = swh_storage.content_add_metadata([content]) @@ -178,7 +177,7 @@ "Refuse to add content_metadata!" ) - sample_content = sample_data["content"][0] + sample_content = sample_data.content content = attr.evolve(sample_content, data=None) pk = content.sha1 @@ -195,7 +194,7 @@ """Standard skipped_content_add works as before """ - sample_content = sample_data["skipped_content"][0] + sample_content = sample_data.skipped_content sample_content_dict = sample_content.to_dict() skipped_contents = list(swh_storage.skipped_content_missing([sample_content_dict])) @@ -227,7 +226,7 @@ {"skipped_content:add": 1}, ] - sample_content = sample_data["skipped_content"][0] + sample_content = sample_data.skipped_content s = swh_storage.skipped_content_add([sample_content]) assert s == {"skipped_content:add": 1} @@ -250,7 +249,7 @@ "Refuse to add content_metadata!" ) - sample_content = sample_data["skipped_content"][0] + sample_content = sample_data.skipped_content sample_content_dict = sample_content.to_dict() skipped_contents = list(swh_storage.skipped_content_missing([sample_content_dict])) @@ -269,7 +268,7 @@ """Standard origin_visit_add works as before """ - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) @@ -292,7 +291,7 @@ """Multiple retries for hash collision and psycopg2 error but finally ok """ - origin = sample_data["origin"][1] + origin = sample_data.origins[1] swh_storage.origin_add([origin]) mock_memory = mocker.patch("swh.storage.in_memory.InMemoryStorage.origin_visit_add") @@ -326,7 +325,7 @@ mock_memory = mocker.patch("swh.storage.in_memory.InMemoryStorage.origin_visit_add") mock_memory.side_effect = StorageArgumentException("Refuse to add origin always!") - origin = sample_data["origin"][0] + origin = sample_data.origin origins = list(swh_storage.origin_visit_get(origin.url)) assert not origins @@ -344,7 +343,7 @@ """Standard metadata_fetcher_add works as before """ - fetcher = sample_data["fetcher"][0] + fetcher = sample_data.metadata_fetcher metadata_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version) assert not metadata_fetcher @@ -361,7 +360,7 @@ """Multiple retries for hash collision and psycopg2 error but finally ok """ - fetcher = sample_data["fetcher"][0] + fetcher = sample_data.metadata_fetcher mock_memory = mocker.patch( "swh.storage.in_memory.InMemoryStorage.metadata_fetcher_add" ) @@ -397,7 +396,7 @@ "Refuse to add metadata_fetcher always!" ) - fetcher = sample_data["fetcher"][0] + fetcher = sample_data.metadata_fetcher actual_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version) assert not actual_fetcher @@ -412,7 +411,7 @@ """Standard metadata_authority_add works as before """ - authority = sample_data["authority"][0] + authority = sample_data.metadata_authority assert not swh_storage.metadata_authority_get(authority.type, authority.url) @@ -428,7 +427,7 @@ """Multiple retries for hash collision and psycopg2 error but finally ok """ - authority = sample_data["authority"][0] + authority = sample_data.metadata_authority mock_memory = mocker.patch( "swh.storage.in_memory.InMemoryStorage.metadata_authority_add" @@ -464,7 +463,7 @@ "Refuse to add authority_id always!" ) - authority = sample_data["authority"][0] + authority = sample_data.metadata_authority swh_storage.metadata_authority_get(authority.type, authority.url) @@ -478,12 +477,12 @@ """Standard object_metadata_add works as before """ - origin = sample_data["origin"][0] - ori_meta = sample_data["origin_metadata"][0] + origin = sample_data.origin + ori_meta = sample_data.origin_metadata1 assert origin.url == ori_meta.id swh_storage.origin_add([origin]) - swh_storage.metadata_authority_add([sample_data["authority"][0]]) - swh_storage.metadata_fetcher_add([sample_data["fetcher"][0]]) + swh_storage.metadata_authority_add([sample_data.metadata_authority]) + swh_storage.metadata_fetcher_add([sample_data.metadata_fetcher]) origin_metadata = swh_storage.object_metadata_get( MetadataTargetType.ORIGIN, ori_meta.id, ori_meta.authority @@ -505,12 +504,12 @@ """Multiple retries for hash collision and psycopg2 error but finally ok """ - origin = sample_data["origin"][0] - ori_meta = sample_data["origin_metadata"][0] + origin = sample_data.origin + ori_meta = sample_data.origin_metadata1 assert origin.url == ori_meta.id swh_storage.origin_add([origin]) - swh_storage.metadata_authority_add([sample_data["authority"][0]]) - swh_storage.metadata_fetcher_add([sample_data["fetcher"][0]]) + swh_storage.metadata_authority_add([sample_data.metadata_authority]) + swh_storage.metadata_fetcher_add([sample_data.metadata_fetcher]) mock_memory = mocker.patch( "swh.storage.in_memory.InMemoryStorage.object_metadata_add" ) @@ -547,8 +546,8 @@ ) mock_memory.side_effect = StorageArgumentException("Refuse to add always!") - origin = sample_data["origin"][0] - ori_meta = sample_data["origin_metadata"][0] + origin = sample_data.origin + ori_meta = sample_data.origin_metadata1 assert origin.url == ori_meta.id swh_storage.origin_add([origin]) @@ -562,7 +561,7 @@ """Standard directory_add works as before """ - sample_dir = sample_data["directory"][0] + sample_dir = sample_data.directory directory = swh_storage.directory_get_random() # no directory assert not directory @@ -592,7 +591,7 @@ {"directory:add": 1}, ] - sample_dir = sample_data["directory"][1] + sample_dir = sample_data.directories[1] directory_id = swh_storage.directory_get_random() # no directory assert not directory_id @@ -618,7 +617,7 @@ "Refuse to add directory always!" ) - sample_dir = sample_data["directory"][0] + sample_dir = sample_data.directory directory_id = swh_storage.directory_get_random() # no directory assert not directory_id @@ -633,7 +632,7 @@ """Standard revision_add works as before """ - sample_rev = sample_data["revision"][0] + sample_rev = sample_data.revision revision = next(swh_storage.revision_get([sample_rev.id])) assert not revision @@ -663,7 +662,7 @@ {"revision:add": 1}, ] - sample_rev = sample_data["revision"][0] + sample_rev = sample_data.revision revision = next(swh_storage.revision_get([sample_rev.id])) assert not revision @@ -687,7 +686,7 @@ mock_memory = mocker.patch("swh.storage.in_memory.InMemoryStorage.revision_add") mock_memory.side_effect = StorageArgumentException("Refuse to add revision always!") - sample_rev = sample_data["revision"][0] + sample_rev = sample_data.revision revision = next(swh_storage.revision_get([sample_rev.id])) assert not revision @@ -702,7 +701,7 @@ """Standard release_add works as before """ - sample_rel = sample_data["release"][0] + sample_rel = sample_data.release release = next(swh_storage.release_get([sample_rel.id])) assert not release @@ -732,7 +731,7 @@ {"release:add": 1}, ] - sample_rel = sample_data["release"][0] + sample_rel = sample_data.release release = next(swh_storage.release_get([sample_rel.id])) assert not release @@ -756,7 +755,7 @@ mock_memory = mocker.patch("swh.storage.in_memory.InMemoryStorage.release_add") mock_memory.side_effect = StorageArgumentException("Refuse to add release always!") - sample_rel = sample_data["release"][0] + sample_rel = sample_data.release release = next(swh_storage.release_get([sample_rel.id])) assert not release @@ -771,7 +770,7 @@ """Standard snapshot_add works as before """ - sample_snap = sample_data["snapshot"][0] + sample_snap = sample_data.snapshot snapshot = swh_storage.snapshot_get(sample_snap.id) assert not snapshot @@ -801,7 +800,7 @@ {"snapshot:add": 1}, ] - sample_snap = sample_data["snapshot"][0] + sample_snap = sample_data.snapshot snapshot = swh_storage.snapshot_get(sample_snap.id) assert not snapshot @@ -825,7 +824,7 @@ mock_memory = mocker.patch("swh.storage.in_memory.InMemoryStorage.snapshot_add") mock_memory.side_effect = StorageArgumentException("Refuse to add snapshot always!") - sample_snap = sample_data["snapshot"][0] + sample_snap = sample_data.snapshot snapshot = swh_storage.snapshot_get(sample_snap.id) assert not snapshot diff --git a/swh/storage/tests/test_revision_bw_compat.py b/swh/storage/tests/test_revision_bw_compat.py --- a/swh/storage/tests/test_revision_bw_compat.py +++ b/swh/storage/tests/test_revision_bw_compat.py @@ -17,7 +17,7 @@ def test_revision_extra_header_in_metadata(swh_storage_backend_config, sample_data): storage = get_storage(**swh_storage_backend_config) - rev = sample_data["revision"][0] + rev = sample_data.revision md_w_extra = dict( rev.metadata.items(), diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -159,7 +159,7 @@ assert swh_storage.check_config(check_write=False) def test_content_add(self, swh_storage, sample_data): - cont = sample_data["content"][0] + cont = sample_data.content insertion_start_time = now() actual_result = swh_storage.content_add([cont]) @@ -191,7 +191,7 @@ assert swh_storage.stat_counters()["content"] == 1 def test_content_add_from_generator(self, swh_storage, sample_data): - cont = sample_data["content"][0] + cont = sample_data.content def _cnt_gen(): yield cont @@ -207,7 +207,7 @@ assert swh_storage.stat_counters()["content"] == 1 def test_content_add_from_lazy_content(self, swh_storage, sample_data): - cont = sample_data["content"][0] + cont = sample_data.content lazy_content = LazyContent.from_dict(cont.to_dict()) insertion_start_time = now() @@ -243,7 +243,7 @@ assert swh_storage.stat_counters()["content"] == 1 def test_content_get_missing(self, swh_storage, sample_data): - cont, cont2 = sample_data["content"][:2] + cont, cont2 = sample_data.contents[:2] swh_storage.content_add([cont]) @@ -261,7 +261,7 @@ assert results == [None, {"sha1": cont.sha1, "data": cont.data}] def test_content_add_different_input(self, swh_storage, sample_data): - cont, cont2 = sample_data["content"][:2] + cont, cont2 = sample_data.contents[:2] actual_result = swh_storage.content_add([cont, cont2]) assert actual_result == { @@ -270,7 +270,7 @@ } def test_content_add_twice(self, swh_storage, sample_data): - cont, cont2 = sample_data["content"][:2] + cont, cont2 = sample_data.contents[:2] actual_result = swh_storage.content_add([cont]) assert actual_result == { @@ -290,7 +290,7 @@ assert len(swh_storage.content_find(cont2.to_dict())) == 1 def test_content_add_collision(self, swh_storage, sample_data): - cont1 = sample_data["content"][0] + cont1 = sample_data.content # create (corrupted) content with same sha1{,_git} but != sha256 sha256_array = bytearray(cont1.sha256) @@ -317,7 +317,7 @@ ] def test_content_add_duplicate(self, swh_storage, sample_data): - cont = sample_data["content"][0] + cont = sample_data.content swh_storage.content_add([cont, cont]) assert list(swh_storage.content_get([cont.sha1])) == [ @@ -325,7 +325,7 @@ ] def test_content_update(self, swh_storage, sample_data): - cont1 = sample_data["content"][0] + cont1 = sample_data.content if hasattr(swh_storage, "journal_writer"): swh_storage.journal_writer.journal = None # TODO, not supported @@ -346,7 +346,7 @@ assert tuple(results[cont1.sha1]) == (expected_content,) def test_content_add_metadata(self, swh_storage, sample_data): - cont = attr.evolve(sample_data["content"][0], data=None, ctime=now()) + cont = attr.evolve(sample_data.content, data=None, ctime=now()) actual_result = swh_storage.content_add_metadata([cont]) assert actual_result == { @@ -370,7 +370,7 @@ assert obj == cont def test_content_add_metadata_different_input(self, swh_storage, sample_data): - contents = sample_data["content"][:2] + contents = sample_data.contents[:2] cont = attr.evolve(contents[0], data=None, ctime=now()) cont2 = attr.evolve(contents[1], data=None, ctime=now()) @@ -380,7 +380,7 @@ } def test_content_add_metadata_collision(self, swh_storage, sample_data): - cont1 = attr.evolve(sample_data["content"][0], data=None, ctime=now()) + cont1 = attr.evolve(sample_data.content, data=None, ctime=now()) # create (corrupted) content with same sha1{,_git} but != sha256 sha1_git_array = bytearray(cont1.sha256) @@ -407,7 +407,7 @@ ] def test_skipped_content_add(self, swh_storage, sample_data): - contents = sample_data["skipped_content"][:2] + contents = sample_data.skipped_contents[:2] cont = contents[0] cont2 = attr.evolve(contents[1], blake2s256=None) @@ -427,7 +427,7 @@ def test_skipped_content_add_missing_hashes(self, swh_storage, sample_data): cont, cont2 = [ - attr.evolve(c, sha1_git=None) for c in sample_data["skipped_content"][:2] + attr.evolve(c, sha1_git=None) for c in sample_data.skipped_contents[:2] ] contents_dict = [c.to_dict() for c in [cont, cont2]] @@ -443,7 +443,7 @@ assert missing == [] def test_skipped_content_missing_partial_hash(self, swh_storage, sample_data): - cont = sample_data["skipped_content"][0] + cont = sample_data.skipped_content cont2 = attr.evolve(cont, sha1_git=None) contents_dict = [c.to_dict() for c in [cont, cont2]] @@ -522,8 +522,8 @@ def test_content_missing_per_sha1(self, swh_storage, sample_data): # given - cont = sample_data["content"][0] - missing_cont = sample_data["skipped_content"][0] + cont = sample_data.content + missing_cont = sample_data.skipped_content swh_storage.content_add([cont]) # when @@ -532,8 +532,8 @@ assert list(gen) == [missing_cont.sha1] def test_content_missing_per_sha1_git(self, swh_storage, sample_data): - cont, cont2 = sample_data["content"][:2] - missing_cont = sample_data["skipped_content"][0] + cont, cont2 = sample_data.contents[:2] + missing_cont = sample_data.skipped_content swh_storage.content_add([cont, cont2]) @@ -618,7 +618,7 @@ assert_contents_ok(expected_contents, actual_contents, ["sha1"]) def test_content_get_metadata(self, swh_storage, sample_data): - cont1, cont2 = sample_data["content"][:2] + cont1, cont2 = sample_data.contents[:2] swh_storage.content_add([cont1, cont2]) @@ -636,8 +636,8 @@ assert len(actual_md.keys()) == 2 def test_content_get_metadata_missing_sha1(self, swh_storage, sample_data): - cont1, cont2 = sample_data["content"][:2] - missing_cont = sample_data["skipped_content"][0] + cont1, cont2 = sample_data.contents[:2] + missing_cont = sample_data.skipped_content swh_storage.content_add([cont1, cont2]) @@ -647,7 +647,7 @@ assert tuple(actual_contents[missing_cont.sha1]) == () def test_content_get_random(self, swh_storage, sample_data): - cont, cont2, cont3 = sample_data["content"][:3] + cont, cont2, cont3 = sample_data.contents[:3] swh_storage.content_add([cont, cont2, cont3]) assert swh_storage.content_get_random() in { @@ -657,7 +657,7 @@ } def test_directory_add(self, swh_storage, sample_data): - directory = sample_data["directory"][1] + directory = sample_data.directories[1] init_missing = list(swh_storage.directory_missing([directory.id])) assert [directory.id] == init_missing @@ -681,7 +681,7 @@ assert swh_storage.stat_counters()["directory"] == 1 def test_directory_add_from_generator(self, swh_storage, sample_data): - directory = sample_data["directory"][1] + directory = sample_data.directories[1] def _dir_gen(): yield directory @@ -697,7 +697,7 @@ assert swh_storage.stat_counters()["directory"] == 1 def test_directory_add_twice(self, swh_storage, sample_data): - directory = sample_data["directory"][1] + directory = sample_data.directories[1] actual_result = swh_storage.directory_add([directory]) assert actual_result == {"directory:add": 1} @@ -714,7 +714,7 @@ ] def test_directory_get_recursive(self, swh_storage, sample_data): - dir1, dir2, dir3 = sample_data["directory"][:3] + dir1, dir2, dir3 = sample_data.directories[:3] init_missing = list(swh_storage.directory_missing([dir1.id])) assert init_missing == [dir1.id] @@ -749,7 +749,7 @@ assert sorted(expected_data, key=cmpdir) == sorted(actual_data, key=cmpdir) def test_directory_get_non_recursive(self, swh_storage, sample_data): - dir1, dir2, dir3 = sample_data["directory"][:3] + dir1, dir2, dir3 = sample_data.directories[:3] init_missing = list(swh_storage.directory_missing([dir1.id])) assert init_missing == [dir1.id] @@ -780,8 +780,8 @@ assert sorted(expected_data, key=cmpdir) == sorted(actual_data, key=cmpdir) def test_directory_entry_get_by_path(self, swh_storage, sample_data): - cont = sample_data["content"][0] - dir1, dir2, dir3, dir4, dir5 = sample_data["directory"][:5] + cont = sample_data.content + dir1, dir2, dir3, dir4, dir5 = sample_data.directories[:5] # given init_missing = list(swh_storage.directory_missing([dir3.id])) @@ -853,7 +853,7 @@ assert actual_entry is None def test_directory_get_random(self, swh_storage, sample_data): - dir1, dir2, dir3 = sample_data["directory"][:3] + dir1, dir2, dir3 = sample_data.directories[:3] swh_storage.directory_add([dir1, dir2, dir3]) assert swh_storage.directory_get_random() in { @@ -863,7 +863,7 @@ } def test_revision_add(self, swh_storage, sample_data): - revision = sample_data["revision"][0] + revision = sample_data.revision init_missing = swh_storage.revision_missing([revision.id]) assert list(init_missing) == [revision.id] @@ -885,7 +885,7 @@ assert swh_storage.stat_counters()["revision"] == 1 def test_revision_add_from_generator(self, swh_storage, sample_data): - revision = sample_data["revision"][0] + revision = sample_data.revision def _rev_gen(): yield revision @@ -897,7 +897,7 @@ assert swh_storage.stat_counters()["revision"] == 1 def test_revision_add_twice(self, swh_storage, sample_data): - revision, revision2 = sample_data["revision"][:2] + revision, revision2 = sample_data.revisions[:2] actual_result = swh_storage.revision_add([revision]) assert actual_result == {"revision:add": 1} @@ -915,7 +915,7 @@ ] def test_revision_add_name_clash(self, swh_storage, sample_data): - revision, revision2 = sample_data["revision"][:2] + revision, revision2 = sample_data.revisions[:2] revision1 = attr.evolve( revision, @@ -937,7 +937,7 @@ assert actual_result == {"revision:add": 2} def test_revision_get_order(self, swh_storage, sample_data): - revision, revision2 = sample_data["revision"][:2] + revision, revision2 = sample_data.revisions[:2] add_result = swh_storage.revision_add([revision, revision2]) assert add_result == {"revision:add": 2} @@ -952,7 +952,7 @@ assert [Revision.from_dict(r) for r in res2] == [revision2, revision] def test_revision_log(self, swh_storage, sample_data): - revision1, revision2, revision3, revision4 = sample_data["revision"][:4] + revision1, revision2, revision3, revision4 = sample_data.revisions[:4] # rev4 -is-child-of-> rev3 -> rev1, (rev2 -> rev1) swh_storage.revision_add([revision1, revision2, revision3, revision4]) @@ -966,7 +966,7 @@ assert actual_results == [revision4, revision3, revision1, revision2] def test_revision_log_with_limit(self, swh_storage, sample_data): - revision1, revision2, revision3, revision4 = sample_data["revision"][:4] + revision1, revision2, revision3, revision4 = sample_data.revisions[:4] # revision4 -is-child-of-> revision3 swh_storage.revision_add([revision3, revision4]) @@ -977,12 +977,12 @@ assert actual_results[0] == revision4 def test_revision_log_unknown_revision(self, swh_storage, sample_data): - revision = sample_data["revision"][0] + revision = sample_data.revision rev_log = list(swh_storage.revision_log([revision.id])) assert rev_log == [] def test_revision_shortlog(self, swh_storage, sample_data): - revision1, revision2, revision3, revision4 = sample_data["revision"][:4] + revision1, revision2, revision3, revision4 = sample_data.revisions[:4] # rev4 -is-child-of-> rev3 -> (rev1, rev2); rev2 -> rev1 swh_storage.revision_add([revision1, revision2, revision3, revision4]) @@ -999,7 +999,7 @@ ] def test_revision_shortlog_with_limit(self, swh_storage, sample_data): - revision1, revision2, revision3, revision4 = sample_data["revision"][:4] + revision1, revision2, revision3, revision4 = sample_data.revisions[:4] # revision4 -is-child-of-> revision3 swh_storage.revision_add([revision1, revision2, revision3, revision4]) @@ -1010,7 +1010,7 @@ assert list(actual_results[0]) == [revision4.id, revision4.parents] def test_revision_get(self, swh_storage, sample_data): - revision, revision2 = sample_data["revision"][:2] + revision, revision2 = sample_data.revisions[:2] swh_storage.revision_add([revision]) @@ -1021,7 +1021,7 @@ assert actual_revisions[1] is None def test_revision_get_no_parents(self, swh_storage, sample_data): - revision = sample_data["revision"][0] + revision = sample_data.revision swh_storage.revision_add([revision]) get = list(swh_storage.revision_get([revision.id])) @@ -1031,7 +1031,7 @@ assert tuple(get[0]["parents"]) == () # no parents on this one def test_revision_get_random(self, swh_storage, sample_data): - revision1, revision2, revision3 = sample_data["revision"][:3] + revision1, revision2, revision3 = sample_data.revisions[:3] swh_storage.revision_add([revision1, revision2, revision3]) @@ -1042,7 +1042,7 @@ } def test_release_add(self, swh_storage, sample_data): - release, release2 = sample_data["release"][:2] + release, release2 = sample_data.releases[:2] init_missing = swh_storage.release_missing([release.id, release2.id]) assert list(init_missing) == [release.id, release2.id] @@ -1066,7 +1066,7 @@ assert swh_storage.stat_counters()["release"] == 2 def test_release_add_from_generator(self, swh_storage, sample_data): - release, release2 = sample_data["release"][:2] + release, release2 = sample_data.releases[:2] def _rel_gen(): yield release @@ -1084,7 +1084,7 @@ assert swh_storage.stat_counters()["release"] == 2 def test_release_add_no_author_date(self, swh_storage, sample_data): - full_release = sample_data["release"][0] + full_release = sample_data.release release = attr.evolve(full_release, author=None, date=None) actual_result = swh_storage.release_add([release]) @@ -1098,7 +1098,7 @@ ] def test_release_add_twice(self, swh_storage, sample_data): - release, release2 = sample_data["release"][:2] + release, release2 = sample_data.releases[:2] actual_result = swh_storage.release_add([release]) assert actual_result == {"release:add": 1} @@ -1124,14 +1124,14 @@ email=b"john.doe@example.com", ), ) - for c in sample_data["release"][:2] + for c in sample_data.releases[:2] ] actual_result = swh_storage.release_add([release, release2]) assert actual_result == {"release:add": 2} def test_release_get(self, swh_storage, sample_data): - release, release2, release3 = sample_data["release"][:3] + release, release2, release3 = sample_data.releases[:3] # given swh_storage.release_add([release, release2]) @@ -1147,7 +1147,7 @@ assert unknown_releases[0] is None def test_release_get_order(self, swh_storage, sample_data): - release, release2 = sample_data["release"][:2] + release, release2 = sample_data.releases[:2] add_result = swh_storage.release_add([release, release2]) assert add_result == {"release:add": 2} @@ -1161,7 +1161,7 @@ assert list(res2) == [release2.to_dict(), release.to_dict()] def test_release_get_random(self, swh_storage, sample_data): - release, release2, release3 = sample_data["release"][:3] + release, release2, release3 = sample_data.releases[:3] swh_storage.release_add([release, release2, release3]) @@ -1172,7 +1172,7 @@ } def test_origin_add(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] origin_dict, origin2_dict = [o.to_dict() for o in [origin, origin2]] assert swh_storage.origin_get([origin_dict])[0] is None @@ -1194,7 +1194,7 @@ assert swh_storage.stat_counters()["origin"] == 2 def test_origin_add_from_generator(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] origin_dict, origin2_dict = [o.to_dict() for o in [origin, origin2]] def _ori_gen(): @@ -1218,7 +1218,7 @@ assert swh_storage.stat_counters()["origin"] == 2 def test_origin_add_twice(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] origin_dict, origin2_dict = [o.to_dict() for o in [origin, origin2]] add1 = swh_storage.origin_add([origin, origin2]) @@ -1234,7 +1234,7 @@ assert add2 == {"origin:add": 0} def test_origin_get_legacy(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] origin_dict, origin2_dict = [o.to_dict() for o in [origin, origin2]] assert swh_storage.origin_get(origin_dict) is None @@ -1244,7 +1244,7 @@ assert actual_origin0["url"] == origin.url def test_origin_get(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] origin_dict, origin2_dict = [o.to_dict() for o in [origin, origin2]] assert swh_storage.origin_get(origin_dict) is None @@ -1281,18 +1281,24 @@ return visits def test_origin_visit_get_all(self, swh_storage, sample_data): - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) visits = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin.url, date=data.date_visit1, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ), OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ), OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ), ] ) @@ -1351,7 +1357,7 @@ assert [] == list(swh_storage.origin_visit_get("foo")) def test_origin_visit_get_random(self, swh_storage, sample_data): - origins = sample_data["origin"][:2] + origins = sample_data.origins[:2] swh_storage.origin_add(origins) # Add some random visits within the selection range @@ -1388,7 +1394,7 @@ assert random_origin_visit["origin"] in [o.url for o in origins] def test_origin_visit_get_random_nothing_found(self, swh_storage, sample_data): - origins = sample_data["origin"] + origins = sample_data.origins swh_storage.origin_add(origins) visit_type = "hg" # Add some visits outside of the random generation selection so nothing @@ -1415,7 +1421,7 @@ assert random_origin_visit is None def test_origin_get_by_sha1(self, swh_storage, sample_data): - origin = sample_data["origin"][0] + origin = sample_data.origin assert swh_storage.origin_get(origin.to_dict()) is None swh_storage.origin_add([origin]) @@ -1424,14 +1430,14 @@ assert origins[0]["url"] == origin.url def test_origin_get_by_sha1_not_found(self, swh_storage, sample_data): - origin = sample_data["origin"][0] + origin = sample_data.origin assert swh_storage.origin_get(origin.to_dict()) is None origins = list(swh_storage.origin_get_by_sha1([sha1(origin.url)])) assert len(origins) == 1 assert origins[0] is None def test_origin_search_single_result(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] found_origins = list(swh_storage.origin_search(origin.url)) assert len(found_origins) == 0 @@ -1465,7 +1471,7 @@ assert found_origins[0] == origin2_data def test_origin_search_no_regexp(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] origin_dicts = [o.to_dict() for o in [origin, origin2]] swh_storage.origin_add([origin, origin2]) @@ -1488,7 +1494,7 @@ assert found_origins0 != found_origins1 def test_origin_search_regexp_substring(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] origin_dicts = [o.to_dict() for o in [origin, origin2]] swh_storage.origin_add([origin, origin2]) @@ -1515,7 +1521,7 @@ assert found_origins0 != found_origins1 def test_origin_search_regexp_fullstring(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] origin_dicts = [o.to_dict() for o in [origin, origin2]] swh_storage.origin_add([origin, origin2]) @@ -1542,7 +1548,7 @@ assert found_origins0 != found_origins1 def test_origin_visit_add(self, swh_storage, sample_data): - origin1 = sample_data["origin"][1] + origin1 = sample_data.origins[1] swh_storage.origin_add([origin1]) date_visit = now() @@ -1552,10 +1558,10 @@ date_visit2 = round_to_milliseconds(date_visit2) visit1 = OriginVisit( - origin=origin1.url, date=date_visit, type=data.type_visit1, + origin=origin1.url, date=date_visit, type=sample_data.type_visit1, ) visit2 = OriginVisit( - origin=origin1.url, date=date_visit2, type=data.type_visit2, + origin=origin1.url, date=date_visit2, type=sample_data.type_visit2, ) # add once @@ -1601,11 +1607,9 @@ for obj in expected_objects: assert obj in actual_objects - def test_origin_visit_add_validation(self, swh_storage): + def test_origin_visit_add_validation(self, swh_storage, sample_data): """Unknown origin when adding visits should raise""" - visit = OriginVisit( - origin="something-unknown", date=now(), type=data.type_visit1, - ) + visit = attr.evolve(sample_data.origin_visit, origin="something-unknonw") with pytest.raises(StorageArgumentException, match="Unknown origin"): swh_storage.origin_visit_add([visit]) @@ -1632,18 +1636,22 @@ """Correct origin visit statuses should add a new visit status """ - snapshot = sample_data["snapshot"][0] - origin1 = sample_data["origin"][1] + snapshot = sample_data.snapshot + origin1 = sample_data.origins[1] origin2 = Origin(url="new-origin") swh_storage.origin_add([origin1, origin2]) ov1, ov2 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin1.url, date=data.date_visit1, type=data.type_visit1, + origin=origin1.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ), OriginVisit( - origin=origin2.url, date=data.date_visit2, type=data.type_visit2, + origin=origin2.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ), ] ) @@ -1651,14 +1659,14 @@ ovs1 = OriginVisitStatus( origin=origin1.url, visit=ov1.visit, - date=data.date_visit1, + date=sample_data.date_visit1, status="created", snapshot=None, ) ovs2 = OriginVisitStatus( origin=origin2.url, visit=ov2.visit, - date=data.date_visit2, + date=sample_data.date_visit2, status="created", snapshot=None, ) @@ -1718,13 +1726,15 @@ """Correct origin visit statuses should add a new visit status """ - snapshot = sample_data["snapshot"][0] - origin1 = sample_data["origin"][1] + snapshot = sample_data.snapshot + origin1 = sample_data.origins[1] swh_storage.origin_add([origin1]) ov1 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin1.url, date=data.date_visit1, type=data.type_visit1, + origin=origin1.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ), ] )[0] @@ -1732,7 +1742,7 @@ ovs1 = OriginVisitStatus( origin=origin1.url, visit=ov1.visit, - date=data.date_visit1, + date=sample_data.date_visit1, status="created", snapshot=None, ) @@ -1774,61 +1784,73 @@ assert obj in actual_objects def test_origin_visit_find_by_date(self, swh_storage, sample_data): - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) visit1 = OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit1, ) visit2 = OriginVisit( - origin=origin.url, date=data.date_visit3, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit3, + type=sample_data.type_visit2, ) visit3 = OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit3, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit3, ) ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3]) ovs1 = OriginVisitStatus( origin=origin.url, visit=ov1.visit, - date=data.date_visit2, + date=sample_data.date_visit2, status="ongoing", snapshot=None, ) ovs2 = OriginVisitStatus( origin=origin.url, visit=ov2.visit, - date=data.date_visit3, + date=sample_data.date_visit3, status="ongoing", snapshot=None, ) ovs3 = OriginVisitStatus( origin=origin.url, visit=ov3.visit, - date=data.date_visit2, + date=sample_data.date_visit2, status="ongoing", snapshot=None, ) swh_storage.origin_visit_status_add([ovs1, ovs2, ovs3]) # Simple case - visit = swh_storage.origin_visit_find_by_date(origin.url, data.date_visit3) + visit = swh_storage.origin_visit_find_by_date( + origin.url, sample_data.date_visit3 + ) assert visit["visit"] == ov2.visit # There are two visits at the same date, the latest must be returned - visit = swh_storage.origin_visit_find_by_date(origin.url, data.date_visit2) + visit = swh_storage.origin_visit_find_by_date( + origin.url, sample_data.date_visit2 + ) assert visit["visit"] == ov3.visit - def test_origin_visit_find_by_date__unknown_origin(self, swh_storage): - swh_storage.origin_visit_find_by_date("foo", data.date_visit2) + def test_origin_visit_find_by_date__unknown_origin(self, swh_storage, sample_data): + swh_storage.origin_visit_find_by_date("foo", sample_data.date_visit2) def test_origin_visit_get_by(self, swh_storage, sample_data): - snapshot = sample_data["snapshot"][0] - origins = sample_data["origin"][:2] + snapshot = sample_data.snapshot + origins = sample_data.origins[:2] swh_storage.origin_add(origins) origin_url, origin_url2 = [o.url for o in origins] visit = OriginVisit( - origin=origin_url, date=data.date_visit2, type=data.type_visit2, + origin=origin_url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ) origin_visit1 = swh_storage.origin_visit_add([visit])[0] @@ -1847,10 +1869,14 @@ # Add some other {origin, visit} entries visit2 = OriginVisit( - origin=origin_url, date=data.date_visit3, type=data.type_visit3, + origin=origin_url, + date=sample_data.date_visit3, + type=sample_data.type_visit3, ) visit3 = OriginVisit( - origin=origin_url2, date=data.date_visit3, type=data.type_visit3, + origin=origin_url2, + date=sample_data.date_visit3, + type=sample_data.type_visit3, ) swh_storage.origin_visit_add([visit2, visit3]) @@ -1878,8 +1904,8 @@ { "origin": origin_url, "visit": origin_visit1.visit, - "date": data.date_visit2, - "type": data.type_visit2, + "date": sample_data.date_visit2, + "type": sample_data.type_visit2, "metadata": visit1_metadata, "status": "full", "snapshot": snapshot.id, @@ -1898,7 +1924,7 @@ assert swh_storage.origin_visit_get_by("foo", 10) is None def test_origin_visit_get_by_no_result(self, swh_storage, sample_data): - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) actual_origin_visit = swh_storage.origin_visit_get_by(origin.url, 999) assert actual_origin_visit is None @@ -1911,7 +1937,7 @@ assert swh_storage.origin_visit_get_latest("unknown-origin") is None # unknown type - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) assert swh_storage.origin_visit_get_latest(origin.url, type="unknown") is None @@ -1919,41 +1945,47 @@ """Filtering origin visit get latest with filter type should be ok """ - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) visit1 = OriginVisit( - origin=origin.url, date=data.date_visit1, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ) visit2 = OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ) # Add a visit with the same date as the previous one visit3 = OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ) - assert data.type_visit1 != data.type_visit2 - assert data.date_visit1 < data.date_visit2 + assert sample_data.type_visit1 != sample_data.type_visit2 + assert sample_data.date_visit1 < sample_data.date_visit2 ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3]) origin_visit1 = swh_storage.origin_visit_get_by(origin.url, ov1.visit) origin_visit3 = swh_storage.origin_visit_get_by(origin.url, ov3.visit) - assert data.type_visit1 != data.type_visit2 + assert sample_data.type_visit1 != sample_data.type_visit2 # Check type filter is ok actual_ov1 = swh_storage.origin_visit_get_latest( - origin.url, type=data.type_visit1, + origin.url, type=sample_data.type_visit1, ) assert actual_ov1 == origin_visit1 actual_ov3 = swh_storage.origin_visit_get_latest( - origin.url, type=data.type_visit2, + origin.url, type=sample_data.type_visit2, ) assert actual_ov3 == origin_visit3 new_type = "npm" - assert new_type not in [data.type_visit1, data.type_visit2] + assert new_type not in [sample_data.type_visit1, sample_data.type_visit2] assert ( swh_storage.origin_visit_get_latest( @@ -1963,19 +1995,25 @@ ) def test_origin_visit_get_latest(self, swh_storage, sample_data): - empty_snapshot, complete_snapshot = sample_data["snapshot"][1:3] - origin = sample_data["origin"][0] + empty_snapshot, complete_snapshot = sample_data.snapshots[1:3] + origin = sample_data.origin swh_storage.origin_add([origin]) visit1 = OriginVisit( - origin=origin.url, date=data.date_visit1, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ) visit2 = OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ) # Add a visit with the same date as the previous one visit3 = OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ) ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3]) @@ -2110,8 +2148,8 @@ } == swh_storage.origin_visit_get_latest(origin.url, require_snapshot=True) def test_origin_visit_status_get_latest(self, swh_storage, sample_data): - snapshot = sample_data["snapshot"][2] - origin1 = sample_data["origin"][0] + snapshot = sample_data.snapshots[2] + origin1 = sample_data.origin swh_storage.origin_add([origin1]) # to have some reference visits @@ -2119,10 +2157,14 @@ ov1, ov2 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin1.url, date=data.date_visit1, type=data.type_visit1, + origin=origin1.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ), OriginVisit( - origin=origin1.url, date=data.date_visit2, type=data.type_visit2, + origin=origin1.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ), ] ) @@ -2130,27 +2172,28 @@ date_now = now() date_now = round_to_milliseconds(date_now) - assert data.date_visit1 < data.date_visit2 - assert data.date_visit2 < date_now + assert sample_data.date_visit1 < sample_data.date_visit2 + assert sample_data.date_visit2 < date_now ovs1 = OriginVisitStatus( origin=origin1.url, visit=ov1.visit, - date=data.date_visit1, + date=sample_data.date_visit1, status="partial", snapshot=None, ) ovs2 = OriginVisitStatus( origin=origin1.url, visit=ov1.visit, - date=data.date_visit2, + date=sample_data.date_visit2, status="ongoing", snapshot=None, ) ovs3 = OriginVisitStatus( origin=origin1.url, visit=ov2.visit, - date=data.date_visit2 + datetime.timedelta(minutes=1), # to not be ignored + date=sample_data.date_visit2 + + datetime.timedelta(minutes=1), # to not be ignored status="ongoing", snapshot=None, ) @@ -2217,7 +2260,7 @@ assert actual_origin_visit3 == ovs3 def test_person_fullname_unicity(self, swh_storage, sample_data): - revision, rev2 = sample_data["revision"][0:2] + revision, rev2 = sample_data.revisions[0:2] # create a revision with same committer fullname but wo name and email revision2 = attr.evolve( rev2, @@ -2235,15 +2278,17 @@ assert revisions[0]["committer"] == revisions[1]["committer"] def test_snapshot_add_get_empty(self, swh_storage, sample_data): - empty_snapshot = sample_data["snapshot"][1] + empty_snapshot = sample_data.snapshots[1] empty_snapshot_dict = empty_snapshot.to_dict() - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) ov1 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin.url, date=data.date_visit1, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ) ] )[0] @@ -2274,7 +2319,7 @@ ovs1 = OriginVisitStatus.from_dict( { "origin": origin.url, - "date": data.date_visit1, + "date": sample_data.date_visit1, "visit": ov1.visit, "status": "created", "snapshot": None, @@ -2304,13 +2349,15 @@ assert obj in actual_objects def test_snapshot_add_get_complete(self, swh_storage, sample_data): - complete_snapshot = sample_data["snapshot"][2] + complete_snapshot = sample_data.snapshots[2] complete_snapshot_dict = complete_snapshot.to_dict() - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) visit = OriginVisit( - origin=origin.url, date=data.date_visit1, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ) origin_visit1 = swh_storage.origin_visit_add([visit])[0] visit_id = origin_visit1.visit @@ -2336,7 +2383,7 @@ assert by_ov == {**complete_snapshot_dict, "next_branch": None} def test_snapshot_add_many(self, swh_storage, sample_data): - snapshot, _, complete_snapshot = sample_data["snapshot"][:3] + snapshot, _, complete_snapshot = sample_data.snapshots[:3] actual_result = swh_storage.snapshot_add([snapshot, complete_snapshot]) assert actual_result == {"snapshot:add": 2} @@ -2355,7 +2402,7 @@ assert swh_storage.stat_counters()["snapshot"] == 2 def test_snapshot_add_many_from_generator(self, swh_storage, sample_data): - snapshot, _, complete_snapshot = sample_data["snapshot"][:3] + snapshot, _, complete_snapshot = sample_data.snapshots[:3] def _snp_gen(): yield from [snapshot, complete_snapshot] @@ -2367,7 +2414,7 @@ assert swh_storage.stat_counters()["snapshot"] == 2 def test_snapshot_add_many_incremental(self, swh_storage, sample_data): - snapshot, _, complete_snapshot = sample_data["snapshot"][:3] + snapshot, _, complete_snapshot = sample_data.snapshots[:3] actual_result = swh_storage.snapshot_add([complete_snapshot]) assert actual_result == {"snapshot:add": 1} @@ -2386,7 +2433,7 @@ } def test_snapshot_add_twice(self, swh_storage, sample_data): - snapshot, empty_snapshot = sample_data["snapshot"][:2] + snapshot, empty_snapshot = sample_data.snapshots[:2] actual_result = swh_storage.snapshot_add([empty_snapshot]) assert actual_result == {"snapshot:add": 1} @@ -2404,7 +2451,7 @@ ] def test_snapshot_add_count_branches(self, swh_storage, sample_data): - complete_snapshot = sample_data["snapshot"][2] + complete_snapshot = sample_data.snapshots[2] actual_result = swh_storage.snapshot_add([complete_snapshot]) assert actual_result == {"snapshot:add": 1} @@ -2423,7 +2470,7 @@ assert snp_size == expected_snp_size def test_snapshot_add_get_paginated(self, swh_storage, sample_data): - complete_snapshot = sample_data["snapshot"][2] + complete_snapshot = sample_data.snapshots[2] swh_storage.snapshot_add([complete_snapshot]) @@ -2471,12 +2518,14 @@ assert snapshot == expected_snapshot def test_snapshot_add_get_filtered(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - complete_snapshot = sample_data["snapshot"][2] + origin = sample_data.origin + complete_snapshot = sample_data.snapshots[2] swh_storage.origin_add([origin]) visit = OriginVisit( - origin=origin.url, date=data.date_visit1, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ) origin_visit1 = swh_storage.origin_visit_add([visit])[0] @@ -2527,7 +2576,7 @@ assert snapshot == expected_snapshot def test_snapshot_add_get_filtered_and_paginated(self, swh_storage, sample_data): - complete_snapshot = sample_data["snapshot"][2] + complete_snapshot = sample_data.snapshots[2] swh_storage.snapshot_add([complete_snapshot]) @@ -2596,7 +2645,7 @@ assert snapshot == expected_snapshot def test_snapshot_add_get_branch_by_type(self, swh_storage, sample_data): - complete_snapshot = sample_data["snapshot"][2] + complete_snapshot = sample_data.snapshots[2] snapshot = complete_snapshot.to_dict() alias1 = b"alias1" @@ -2628,12 +2677,14 @@ assert alias1 in branches def test_snapshot_add_get(self, swh_storage, sample_data): - snapshot = sample_data["snapshot"][0] - origin = sample_data["origin"][0] + snapshot = sample_data.snapshot + origin = sample_data.origin swh_storage.origin_add([origin]) visit = OriginVisit( - origin=origin.url, date=data.date_visit1, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ) origin_visit1 = swh_storage.origin_visit_add([visit])[0] visit_id = origin_visit1.visit @@ -2663,14 +2714,16 @@ assert origin_visit_info["snapshot"] == snapshot.id def test_snapshot_add_twice__by_origin_visit(self, swh_storage, sample_data): - snapshot = sample_data["snapshot"][0] - origin = sample_data["origin"][0] + snapshot = sample_data.snapshot + origin = sample_data.origin swh_storage.origin_add([origin]) ov1 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin.url, date=data.date_visit1, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ) ] )[0] @@ -2697,7 +2750,9 @@ ov2 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ) ] )[0] @@ -2721,7 +2776,7 @@ ovs1 = OriginVisitStatus.from_dict( { "origin": origin.url, - "date": data.date_visit1, + "date": sample_data.date_visit1, "visit": ov1.visit, "status": "created", "metadata": None, @@ -2741,7 +2796,7 @@ ovs3 = OriginVisitStatus.from_dict( { "origin": origin.url, - "date": data.date_visit2, + "date": sample_data.date_visit2, "visit": ov2.visit, "status": "created", "metadata": None, @@ -2774,7 +2829,7 @@ assert obj in actual_objects def test_snapshot_get_random(self, swh_storage, sample_data): - snapshot, empty_snapshot, complete_snapshot = sample_data["snapshot"][:3] + snapshot, empty_snapshot, complete_snapshot = sample_data.snapshots[:3] swh_storage.snapshot_add([snapshot, empty_snapshot, complete_snapshot]) assert swh_storage.snapshot_get_random() in { @@ -2784,7 +2839,7 @@ } def test_snapshot_missing(self, swh_storage, sample_data): - snapshot, missing_snapshot = sample_data["snapshot"][:2] + snapshot, missing_snapshot = sample_data.snapshots[:2] snapshots = [snapshot.id, missing_snapshot.id] swh_storage.snapshot_add([snapshot]) @@ -2793,12 +2848,12 @@ assert list(missing_snapshots) == [missing_snapshot.id] def test_stat_counters(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - snapshot = sample_data["snapshot"][0] - revision = sample_data["revision"][0] - release = sample_data["release"][0] - directory = sample_data["directory"][0] - content = sample_data["content"][0] + origin = sample_data.origin + snapshot = sample_data.snapshot + revision = sample_data.revision + release = sample_data.release + directory = sample_data.directory + content = sample_data.content expected_keys = ["content", "directory", "origin", "revision"] @@ -2827,7 +2882,9 @@ swh_storage.origin_add([origin]) visit = OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ) origin_visit1 = swh_storage.origin_visit_add([visit])[0] @@ -2861,7 +2918,7 @@ assert counters["person"] == 3 def test_content_find_ctime(self, swh_storage, sample_data): - origin_content = sample_data["content"][0] + origin_content = sample_data.content ctime = round_to_milliseconds(now()) content = attr.evolve(origin_content, data=None, ctime=ctime) swh_storage.content_add_metadata([content]) @@ -2870,7 +2927,7 @@ assert actually_present[0] == content.to_dict() def test_content_find_with_present_content(self, swh_storage, sample_data): - content = sample_data["content"][0] + content = sample_data.content expected_content = content.to_dict() del expected_content["data"] del expected_content["ctime"] @@ -2902,7 +2959,7 @@ assert actually_present[0] == expected_content def test_content_find_with_non_present_content(self, swh_storage, sample_data): - missing_content = sample_data["skipped_content"][0] + missing_content = sample_data.skipped_content # 1. with something that does not exist actually_present = swh_storage.content_find({"sha1": missing_content.sha1}) @@ -2919,7 +2976,7 @@ assert actually_present == [] def test_content_find_with_duplicate_input(self, swh_storage, sample_data): - content = sample_data["content"][0] + content = sample_data.content # Create fake data with colliding sha256 and blake2s256 sha1_array = bytearray(content.sha1) @@ -2959,7 +3016,7 @@ assert result in actual_result def test_content_find_with_duplicate_sha256(self, swh_storage, sample_data): - content = sample_data["content"][0] + content = sample_data.content hashes = {} # Create fake data with colliding sha256 @@ -3015,7 +3072,7 @@ assert actual_result == [expected_duplicated_content] def test_content_find_with_duplicate_blake2s256(self, swh_storage, sample_data): - content = sample_data["content"][0] + content = sample_data.content # Create fake data with colliding sha256 and blake2s256 sha1_array = bytearray(content.sha1) @@ -3077,10 +3134,10 @@ swh_storage.content_find({"unknown-sha1": "something"}) # not the right key def test_object_find_by_sha1_git(self, swh_storage, sample_data): - content = sample_data["content"][0] - directory = sample_data["directory"][0] - revision = sample_data["revision"][0] - release = sample_data["release"][0] + content = sample_data.content + directory = sample_data.directory + revision = sample_data.revision + release = sample_data.release sha1_gits = [b"00000000000000000000"] expected = { @@ -3111,7 +3168,7 @@ assert expected == ret def test_metadata_fetcher_add_get(self, swh_storage, sample_data): - fetcher = sample_data["fetcher"][0] + fetcher = sample_data.metadata_fetcher actual_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version) assert actual_fetcher is None # does not exist @@ -3121,7 +3178,7 @@ assert res == fetcher def test_metadata_authority_add_get(self, swh_storage, sample_data): - authority = sample_data["authority"][0] + authority = sample_data.metadata_authority actual_authority = swh_storage.metadata_authority_get( authority.type, authority.url @@ -3134,10 +3191,10 @@ assert res == authority def test_content_metadata_add(self, swh_storage, sample_data): - content = sample_data["content"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - content_metadata = sample_data["content_metadata"][:2] + content = sample_data.content + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + content_metadata = sample_data.content_metadata[:2] content_swhid = SWHID( object_type="content", object_id=hash_to_bytes(content.sha1_git) @@ -3158,10 +3215,10 @@ def test_content_metadata_add_duplicate(self, swh_storage, sample_data): """Duplicates should be silently updated.""" - content = sample_data["content"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - content_metadata, content_metadata2 = sample_data["content_metadata"][:2] + content = sample_data.content + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + content_metadata, content_metadata2 = sample_data.content_metadata[:2] content_swhid = SWHID( object_type="content", object_id=hash_to_bytes(content.sha1_git) ) @@ -3190,12 +3247,14 @@ ) def test_content_metadata_get(self, swh_storage, sample_data): - content, content2 = sample_data["content"][:2] - fetcher, fetcher2 = sample_data["fetcher"][:2] - authority, authority2 = sample_data["authority"][:2] - content1_metadata1, content1_metadata2, content1_metadata3 = sample_data[ - "content_metadata" - ][:3] + content, content2 = sample_data.contents[:2] + fetcher, fetcher2 = sample_data.fetchers[:2] + authority, authority2 = sample_data.authorities[:2] + ( + content1_metadata1, + content1_metadata2, + content1_metadata3, + ) = sample_data.content_metadata[:3] content1_swhid = SWHID(object_type="content", object_id=content.sha1_git) content2_swhid = SWHID(object_type="content", object_id=content2.sha1_git) @@ -3236,10 +3295,10 @@ assert [content2_metadata] == list(result["results"],) def test_content_metadata_get_after(self, swh_storage, sample_data): - content = sample_data["content"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - content_metadata, content_metadata2 = sample_data["content_metadata"][:2] + content = sample_data.content + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + content_metadata, content_metadata2 = sample_data.content_metadata[:2] content_swhid = SWHID(object_type="content", object_id=content.sha1_git) @@ -3278,10 +3337,10 @@ assert result["results"] == [] def test_content_metadata_get_paginate(self, swh_storage, sample_data): - content = sample_data["content"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - content_metadata, content_metadata2 = sample_data["content_metadata"][:2] + content = sample_data.content + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + content_metadata, content_metadata2 = sample_data.content_metadata[:2] content_swhid = SWHID(object_type="content", object_id=content.sha1_git) @@ -3309,10 +3368,10 @@ assert result["results"] == [content_metadata2] def test_content_metadata_get_paginate_same_date(self, swh_storage, sample_data): - content = sample_data["content"][0] - fetcher1, fetcher2 = sample_data["fetcher"][:2] - authority = sample_data["authority"][0] - content_metadata, content_metadata2 = sample_data["content_metadata"][:2] + content = sample_data.content + fetcher1, fetcher2 = sample_data.fetchers[:2] + authority = sample_data.metadata_authority + content_metadata, content_metadata2 = sample_data.content_metadata[:2] content_swhid = SWHID(object_type="content", object_id=content.sha1_git) @@ -3344,10 +3403,10 @@ assert result["results"] == [new_content_metadata2] def test_content_metadata_get__invalid_id(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - content_metadata, content_metadata2 = sample_data["content_metadata"][:2] + origin = sample_data.origin + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + content_metadata, content_metadata2 = sample_data.content_metadata[:2] swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) @@ -3359,10 +3418,10 @@ ) def test_origin_metadata_add(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - origin_metadata, origin_metadata2 = sample_data["origin_metadata"][:2] + origin = sample_data.origin + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] assert swh_storage.origin_add([origin]) == {"origin:add": 1} @@ -3382,10 +3441,10 @@ def test_origin_metadata_add_duplicate(self, swh_storage, sample_data): """Duplicates should be silently updated.""" - origin = sample_data["origin"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - origin_metadata, origin_metadata2 = sample_data["origin_metadata"][:2] + origin = sample_data.origin + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] assert swh_storage.origin_add([origin]) == {"origin:add": 1} new_origin_metadata2 = attr.evolve( @@ -3413,12 +3472,14 @@ ) def test_origin_metadata_get(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] - fetcher, fetcher2 = sample_data["fetcher"][:2] - authority, authority2 = sample_data["authority"][:2] - origin1_metadata1, origin1_metadata2, origin1_metadata3 = sample_data[ - "origin_metadata" - ][:3] + origin, origin2 = sample_data.origins[:2] + fetcher, fetcher2 = sample_data.fetchers[:2] + authority, authority2 = sample_data.authorities[:2] + ( + origin1_metadata1, + origin1_metadata2, + origin1_metadata3, + ) = sample_data.origin_metadata[:3] assert swh_storage.origin_add([origin, origin2]) == {"origin:add": 2} @@ -3454,10 +3515,10 @@ assert [origin2_metadata] == list(result["results"],) def test_origin_metadata_get_after(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - origin_metadata, origin_metadata2 = sample_data["origin_metadata"][:2] + origin = sample_data.origin + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] assert swh_storage.origin_add([origin]) == {"origin:add": 1} @@ -3496,10 +3557,10 @@ assert result["results"] == [] def test_origin_metadata_get_paginate(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - origin_metadata, origin_metadata2 = sample_data["origin_metadata"][:2] + origin = sample_data.origin + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] assert swh_storage.origin_add([origin]) == {"origin:add": 1} swh_storage.metadata_fetcher_add([fetcher]) @@ -3528,10 +3589,10 @@ assert result["results"] == [origin_metadata2] def test_origin_metadata_get_paginate_same_date(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - fetcher1, fetcher2 = sample_data["fetcher"][:2] - authority = sample_data["authority"][0] - origin_metadata, origin_metadata2 = sample_data["origin_metadata"][:2] + origin = sample_data.origin + fetcher1, fetcher2 = sample_data.fetchers[:2] + authority = sample_data.metadata_authority + origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] assert swh_storage.origin_add([origin]) == {"origin:add": 1} swh_storage.metadata_fetcher_add([fetcher1, fetcher2]) @@ -3562,9 +3623,9 @@ assert result["results"] == [new_origin_metadata2] def test_origin_metadata_add_missing_authority(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - fetcher = sample_data["fetcher"][0] - origin_metadata, origin_metadata2 = sample_data["origin_metadata"][:2] + origin = sample_data.origin + fetcher = sample_data.metadata_fetcher + origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] assert swh_storage.origin_add([origin]) == {"origin:add": 1} swh_storage.metadata_fetcher_add([fetcher]) @@ -3573,9 +3634,9 @@ swh_storage.object_metadata_add([origin_metadata, origin_metadata2]) def test_origin_metadata_add_missing_fetcher(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - authority = sample_data["authority"][0] - origin_metadata, origin_metadata2 = sample_data["origin_metadata"][:2] + origin = sample_data.origin + authority = sample_data.metadata_authority + origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] assert swh_storage.origin_add([origin]) == {"origin:add": 1} swh_storage.metadata_authority_add([authority]) @@ -3584,11 +3645,11 @@ swh_storage.object_metadata_add([origin_metadata, origin_metadata2]) def test_origin_metadata_get__invalid_id_type(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - authority = sample_data["authority"][0] - fetcher = sample_data["fetcher"][0] - origin_metadata, origin_metadata2 = sample_data["origin_metadata"][:2] - content_metadata = sample_data["content_metadata"][0] + origin = sample_data.origin + authority = sample_data.metadata_authority + fetcher = sample_data.metadata_fetcher + origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] + content_metadata = sample_data.content_metadata[0] assert swh_storage.origin_add([origin]) == {"origin:add": 1} swh_storage.metadata_fetcher_add([fetcher]) @@ -3788,7 +3849,7 @@ assert sorted(returned_origins) == sorted(expected_origins) def test_origin_count(self, swh_storage, sample_data): - swh_storage.origin_add(sample_data["origin"]) + swh_storage.origin_add(sample_data.origins) assert swh_storage.origin_count("github") == 3 assert swh_storage.origin_count("gitlab") == 2 @@ -3798,7 +3859,7 @@ assert swh_storage.origin_count(".*user1.*", regexp=False) == 0 def test_origin_count_with_visit_no_visits(self, swh_storage, sample_data): - swh_storage.origin_add(sample_data["origin"]) + swh_storage.origin_add(sample_data.origins) # none of them have visits, so with_visit=True => 0 assert swh_storage.origin_count("github", with_visit=True) == 0 @@ -3811,7 +3872,7 @@ def test_origin_count_with_visit_with_visits_no_snapshot( self, swh_storage, sample_data ): - swh_storage.origin_add(sample_data["origin"]) + swh_storage.origin_add(sample_data.origins) origin_url = "https://github.com/user1/repo1" visit = OriginVisit(origin=origin_url, date=now(), type="git",) @@ -3837,8 +3898,8 @@ def test_origin_count_with_visit_with_visits_and_snapshot( self, swh_storage, sample_data ): - snapshot = sample_data["snapshot"][0] - swh_storage.origin_add(sample_data["origin"]) + snapshot = sample_data.snapshot + swh_storage.origin_add(sample_data.origins) swh_storage.snapshot_add([snapshot]) origin_url = "https://github.com/user1/repo1" @@ -3892,7 +3953,7 @@ # This test is only relevant on the local storage, with an actual # objstorage raising an exception def test_content_add_objstorage_exception(self, swh_storage, sample_data): - content = sample_data["content"][0] + content = sample_data.content swh_storage.objstorage.content_add = Mock( side_effect=Exception("mocked broken objstorage") @@ -3909,7 +3970,7 @@ class TestStorageRaceConditions: @pytest.mark.xfail def test_content_add_race(self, swh_storage, sample_data): - content = sample_data["content"][0] + content = sample_data.content results = queue.Queue() @@ -3951,7 +4012,7 @@ """ def test_content_update_with_new_cols(self, swh_storage, sample_data): - content, content2 = sample_data["content"][:2] + content, content2 = sample_data.contents[:2] swh_storage.journal_writer.journal = None # TODO, not supported @@ -3996,7 +4057,7 @@ ) def test_content_add_db(self, swh_storage, sample_data): - content = sample_data["content"][0] + content = sample_data.content actual_result = swh_storage.content_add([content]) @@ -4033,7 +4094,7 @@ assert contents[0] == attr.evolve(content, data=None) def test_content_add_metadata_db(self, swh_storage, sample_data): - content = attr.evolve(sample_data["content"][0], data=None, ctime=now()) + content = attr.evolve(sample_data.content, data=None, ctime=now()) actual_result = swh_storage.content_add_metadata([content]) @@ -4067,7 +4128,7 @@ assert contents[0] == content def test_skipped_content_add_db(self, swh_storage, sample_data): - content, cont2 = sample_data["skipped_content"][:2] + content, cont2 = sample_data.skipped_contents[:2] content2 = attr.evolve(cont2, blake2s256=None) actual_result = swh_storage.skipped_content_add([content, content, content2])