diff --git a/swh/storage/pytest_plugin.py b/swh/storage/pytest_plugin.py --- a/swh/storage/pytest_plugin.py +++ b/swh/storage/pytest_plugin.py @@ -6,7 +6,7 @@ import glob from os import path, environ -from typing import Dict, Tuple, Union +from typing import Union import pytest @@ -16,9 +16,9 @@ from pytest_postgresql.janitor import DatabaseJanitor, psycopg2, Version from swh.core.utils import numfile_sortkey as sortkey -from swh.model.model import BaseModel from swh.storage import get_storage -from swh.storage.tests.storage_data import data + +from swh.storage.tests.storage_data import StorageData SQL_DIR = path.join(path.dirname(swh.storage.__file__), "sql") @@ -188,25 +188,13 @@ @pytest.fixture -def sample_data() -> Dict[str, Tuple[BaseModel, ...]]: +def sample_data() -> StorageData: """Pre-defined sample storage object data to manipulate Returns: - Dict of data model objects (keys: content, directory, revision, release, person, - origin) + StorageData whose attribute keys are data model objects. Either multiple + objects: contents, directories, revisions, releases, ... or simple ones: + content, directory, revision, release, ... """ - return { - "content": data.contents, - "skipped_content": data.skipped_contents, - "directory": data.directories, - "revision": data.revisions, - "release": data.releases, - "snapshot": data.snapshots, - "origin": data.origins, - "origin_visit": data.origin_visits, - "fetcher": data.fetchers, - "authority": data.authorities, - "origin_metadata": data.origin_metadata, - "content_metadata": data.content_metadata, - } + return StorageData() diff --git a/swh/storage/tests/algos/test_origin.py b/swh/storage/tests/algos/test_origin.py --- a/swh/storage/tests/algos/test_origin.py +++ b/swh/storage/tests/algos/test_origin.py @@ -13,7 +13,6 @@ from swh.storage.utils import now from swh.storage.tests.test_storage import round_to_milliseconds -from swh.storage.tests.storage_data import data def assert_list_eq(left, right, msg=None): @@ -92,8 +91,8 @@ assert origin_get_latest_visit_status(swh_storage, "unknown-origin") is None # unknown type so no result - origin = sample_data["origin"][0] - origin_visit = sample_data["origin_visit"][0] + origin = sample_data.origin + origin_visit = sample_data.origin_visit assert origin_visit.origin == origin.url swh_storage.origin_add([origin]) @@ -119,17 +118,21 @@ """Initialize storage with origin/origin-visit/origin-visit-status """ - snapshot = sample_data["snapshot"][2] - origin1, origin2 = sample_data["origin"][:2] + snapshot = sample_data.snapshots[2] + origin1, origin2 = sample_data.origins[:2] swh_storage.origin_add([origin1, origin2]) ov1, ov2 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin1.url, date=data.date_visit1, type=data.type_visit1, + origin=origin1.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ), OriginVisit( - origin=origin2.url, date=data.date_visit2, type=data.type_visit2, + origin=origin2.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ), ] ) @@ -138,14 +141,14 @@ date_now = now() date_now = round_to_milliseconds(date_now) - assert data.date_visit1 < data.date_visit2 - assert data.date_visit2 < date_now + assert sample_data.date_visit1 < sample_data.date_visit2 + assert sample_data.date_visit2 < date_now # origin visit status 1 for origin visit 1 ovs11 = OriginVisitStatus( origin=origin1.url, visit=ov1.visit, - date=data.date_visit1, + date=sample_data.date_visit1, status="partial", snapshot=None, ) @@ -153,7 +156,7 @@ ovs12 = OriginVisitStatus( origin=origin1.url, visit=ov1.visit, - date=data.date_visit2, + date=sample_data.date_visit2, status="ongoing", snapshot=None, ) @@ -161,7 +164,7 @@ ovs21 = OriginVisitStatus( origin=origin2.url, visit=ov2.visit, - date=data.date_visit2, + date=sample_data.date_visit2, status="ongoing", snapshot=None, ) @@ -194,36 +197,40 @@ # no visit for origin1 url with type_visit2 assert ( - origin_get_latest_visit_status(swh_storage, origin1.url, type=data.type_visit2) + origin_get_latest_visit_status( + swh_storage, origin1.url, type=sample_data.type_visit2 + ) is None ) # no visit for origin2 url with type_visit1 assert ( - origin_get_latest_visit_status(swh_storage, origin2.url, type=data.type_visit1) + origin_get_latest_visit_status( + swh_storage, origin2.url, type=sample_data.type_visit1 + ) is None ) # Two visits, both with no snapshot, take the most recent actual_ov1, actual_ovs12 = origin_get_latest_visit_status( - swh_storage, origin1.url, type=data.type_visit1 + swh_storage, origin1.url, type=sample_data.type_visit1 ) assert isinstance(actual_ov1, OriginVisit) assert isinstance(actual_ovs12, OriginVisitStatus) assert actual_ov1.origin == ov1.origin assert actual_ov1.visit == ov1.visit - assert actual_ov1.type == data.type_visit1 + assert actual_ov1.type == sample_data.type_visit1 assert actual_ovs12 == ovs12 # take the most recent visit with type_visit2 actual_ov2, actual_ovs22 = origin_get_latest_visit_status( - swh_storage, origin2.url, type=data.type_visit2 + swh_storage, origin2.url, type=sample_data.type_visit2 ) assert isinstance(actual_ov2, OriginVisit) assert isinstance(actual_ovs22, OriginVisitStatus) assert actual_ov2.origin == ov2.origin assert actual_ov2.visit == ov2.visit - assert actual_ov2.type == data.type_visit2 + assert actual_ov2.type == sample_data.type_visit2 assert actual_ovs22 == ovs22 @@ -247,7 +254,7 @@ ) assert actual_ov1.origin == ov1.origin assert actual_ov1.visit == ov1.visit - assert actual_ov1.type == data.type_visit1 + assert actual_ov1.type == sample_data.type_visit1 assert actual_ovs11 == ovs11 # both status exist, take the latest one @@ -256,14 +263,14 @@ ) assert actual_ov1.origin == ov1.origin assert actual_ov1.visit == ov1.visit - assert actual_ov1.type == data.type_visit1 + assert actual_ov1.type == sample_data.type_visit1 assert actual_ovs12 == ovs12 assert isinstance(actual_ov1, OriginVisit) assert isinstance(actual_ovs12, OriginVisitStatus) assert actual_ov1.origin == ov1.origin assert actual_ov1.visit == ov1.visit - assert actual_ov1.type == data.type_visit1 + assert actual_ov1.type == sample_data.type_visit1 assert actual_ovs12 == ovs12 # take the most recent visit with type_visit2 @@ -272,7 +279,7 @@ ) assert actual_ov2.origin == ov2.origin assert actual_ov2.visit == ov2.visit - assert actual_ov2.type == data.type_visit2 + assert actual_ov2.type == sample_data.type_visit2 assert actual_ovs22 == ovs22 @@ -301,7 +308,7 @@ # Add another visit swh_storage.origin_visit_add( - [OriginVisit(origin=origin2.url, date=date_now, type=data.type_visit2,),] + [OriginVisit(origin=origin2.url, date=date_now, type=sample_data.type_visit2,),] ) # Requiring the latest visit with a snapshot, we still find the previous visit diff --git a/swh/storage/tests/algos/test_snapshot.py b/swh/storage/tests/algos/test_snapshot.py --- a/swh/storage/tests/algos/test_snapshot.py +++ b/swh/storage/tests/algos/test_snapshot.py @@ -53,9 +53,9 @@ assert snapshot_get_latest(swh_storage, "unknown-origin") is None # no snapshot on origin visit so None - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) - origin_visit, origin_visit2 = sample_data["origin_visit"][:2] + origin_visit, origin_visit2 = sample_data.origin_visits[:2] assert origin_visit.origin == origin.url swh_storage.origin_visit_add([origin_visit]) @@ -67,7 +67,7 @@ # visit references a snapshot but the snapshot does not exist in backend for some # reason - complete_snapshot = sample_data["snapshot"][2] + complete_snapshot = sample_data.snapshots[2] swh_storage.origin_visit_status_add( [ OriginVisitStatus( @@ -85,10 +85,10 @@ def test_snapshot_get_latest(swh_storage, sample_data): - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) - visit1, visit2 = sample_data["origin_visit"][:2] + visit1, visit2 = sample_data.origin_visits[:2] assert visit1.origin == origin.url swh_storage.origin_visit_add([visit1]) @@ -96,7 +96,7 @@ visit_id = ov1["visit"] # Add snapshot to visit1, latest snapshot = visit 1 snapshot - complete_snapshot = sample_data["snapshot"][2] + complete_snapshot = sample_data.snapshots[2] swh_storage.snapshot_add([complete_snapshot]) swh_storage.origin_visit_status_add( diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py --- a/swh/storage/tests/storage_data.py +++ b/swh/storage/tests/storage_data.py @@ -7,6 +7,8 @@ import attr +from typing import Tuple + from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.model import from_disk from swh.model.identifiers import parse_swhid @@ -36,531 +38,519 @@ class StorageData: - def __getattr__(self, key): - try: - v = globals()[key] - except KeyError as e: - raise AttributeError(e.args[0]) - if hasattr(v, "copy"): - return v.copy() - return v - - -data = StorageData() - - -content = Content( - data=b"42\n", - length=3, - sha1=hash_to_bytes("34973274ccef6ab4dfaaf86599792fa9c3fe4689"), - sha1_git=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), - sha256=hash_to_bytes( - "673650f936cb3b0a2f93ce09d81be10748b1b203c19e8176b4eefc1964a0cf3a" - ), - blake2s256=hash_to_bytes( - "d5fe1939576527e42cfd76a9455a2432fe7f56669564577dd93c4280e76d661d" - ), - status="visible", -) - -content2 = Content( - data=b"4242\n", - length=5, - sha1=hash_to_bytes("61c2b3a30496d329e21af70dd2d7e097046d07b7"), - sha1_git=hash_to_bytes("36fade77193cb6d2bd826161a0979d64c28ab4fa"), - sha256=hash_to_bytes( - "859f0b154fdb2d630f45e1ecae4a862915435e663248bb8461d914696fc047cd" - ), - blake2s256=hash_to_bytes( - "849c20fad132b7c2d62c15de310adfe87be94a379941bed295e8141c6219810d" - ), - status="visible", -) - -content3 = Content( - data=b"424242\n", - length=7, - sha1=hash_to_bytes("3e21cc4942a4234c9e5edd8a9cacd1670fe59f13"), - sha1_git=hash_to_bytes("c932c7649c6dfa4b82327d121215116909eb3bea"), - sha256=hash_to_bytes( - "92fb72daf8c6818288a35137b72155f507e5de8d892712ab96277aaed8cf8a36" - ), - blake2s256=hash_to_bytes( - "76d0346f44e5a27f6bafdd9c2befd304aff83780f93121d801ab6a1d4769db11" - ), - status="visible", - ctime=datetime.datetime(2019, 12, 1, tzinfo=datetime.timezone.utc), -) - -contents = (content, content2, content3) - - -skipped_content = SkippedContent( - length=1024 * 1024 * 200, - sha1_git=hash_to_bytes("33e45d56f88993aae6a0198013efa80716fd8920"), - sha1=hash_to_bytes("43e45d56f88993aae6a0198013efa80716fd8920"), - sha256=hash_to_bytes( - "7bbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a" - ), - blake2s256=hash_to_bytes( - "ade18b1adecb33f891ca36664da676e12c772cc193778aac9a137b8dc5834b9b" - ), - reason="Content too long", - status="absent", - origin="file:///dev/zero", -) - -skipped_content2 = SkippedContent( - length=1024 * 1024 * 300, - sha1_git=hash_to_bytes("44e45d56f88993aae6a0198013efa80716fd8921"), - sha1=hash_to_bytes("54e45d56f88993aae6a0198013efa80716fd8920"), - sha256=hash_to_bytes( - "8cbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a" - ), - blake2s256=hash_to_bytes( - "9ce18b1adecb33f891ca36664da676e12c772cc193778aac9a137b8dc5834b9b" - ), - reason="Content too long", - status="absent", -) + """Data model objects to use within tests. -skipped_contents = (skipped_content, skipped_content2) + """ -directory5 = Directory(entries=()) - -directory = Directory( - id=hash_to_bytes("34f335a750111ca0a8b64d8034faec9eedc396be"), - entries=tuple( - [ - DirectoryEntry( - name=b"foo", - type="file", - target=content.sha1_git, - perms=from_disk.DentryPerms.content, + content = Content( + data=b"42\n", + length=3, + sha1=hash_to_bytes("34973274ccef6ab4dfaaf86599792fa9c3fe4689"), + sha1_git=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), + sha256=hash_to_bytes( + "673650f936cb3b0a2f93ce09d81be10748b1b203c19e8176b4eefc1964a0cf3a" + ), + blake2s256=hash_to_bytes( + "d5fe1939576527e42cfd76a9455a2432fe7f56669564577dd93c4280e76d661d" + ), + status="visible", + ) + content2 = Content( + data=b"4242\n", + length=5, + sha1=hash_to_bytes("61c2b3a30496d329e21af70dd2d7e097046d07b7"), + sha1_git=hash_to_bytes("36fade77193cb6d2bd826161a0979d64c28ab4fa"), + sha256=hash_to_bytes( + "859f0b154fdb2d630f45e1ecae4a862915435e663248bb8461d914696fc047cd" + ), + blake2s256=hash_to_bytes( + "849c20fad132b7c2d62c15de310adfe87be94a379941bed295e8141c6219810d" + ), + status="visible", + ) + content3 = Content( + data=b"424242\n", + length=7, + sha1=hash_to_bytes("3e21cc4942a4234c9e5edd8a9cacd1670fe59f13"), + sha1_git=hash_to_bytes("c932c7649c6dfa4b82327d121215116909eb3bea"), + sha256=hash_to_bytes( + "92fb72daf8c6818288a35137b72155f507e5de8d892712ab96277aaed8cf8a36" + ), + blake2s256=hash_to_bytes( + "76d0346f44e5a27f6bafdd9c2befd304aff83780f93121d801ab6a1d4769db11" + ), + status="visible", + ctime=datetime.datetime(2019, 12, 1, tzinfo=datetime.timezone.utc), + ) + contents: Tuple[Content, ...] = (content, content2, content3) + + skipped_content = SkippedContent( + length=1024 * 1024 * 200, + sha1_git=hash_to_bytes("33e45d56f88993aae6a0198013efa80716fd8920"), + sha1=hash_to_bytes("43e45d56f88993aae6a0198013efa80716fd8920"), + sha256=hash_to_bytes( + "7bbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a" + ), + blake2s256=hash_to_bytes( + "ade18b1adecb33f891ca36664da676e12c772cc193778aac9a137b8dc5834b9b" + ), + reason="Content too long", + status="absent", + origin="file:///dev/zero", + ) + skipped_content2 = SkippedContent( + length=1024 * 1024 * 300, + sha1_git=hash_to_bytes("44e45d56f88993aae6a0198013efa80716fd8921"), + sha1=hash_to_bytes("54e45d56f88993aae6a0198013efa80716fd8920"), + sha256=hash_to_bytes( + "8cbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a" + ), + blake2s256=hash_to_bytes( + "9ce18b1adecb33f891ca36664da676e12c772cc193778aac9a137b8dc5834b9b" + ), + reason="Content too long", + status="absent", + ) + skipped_contents: Tuple[SkippedContent, ...] = (skipped_content, skipped_content2) + + directory5 = Directory(entries=()) + directory = Directory( + id=hash_to_bytes("34f335a750111ca0a8b64d8034faec9eedc396be"), + entries=tuple( + [ + DirectoryEntry( + name=b"foo", + type="file", + target=content.sha1_git, + perms=from_disk.DentryPerms.content, + ), + DirectoryEntry( + name=b"bar\xc3", + type="dir", + target=directory5.id, + perms=from_disk.DentryPerms.directory, + ), + ], + ), + ) + directory2 = Directory( + id=hash_to_bytes("8505808532953da7d2581741f01b29c04b1cb9ab"), + entries=tuple( + [ + DirectoryEntry( + name=b"oof", + type="file", + target=content2.sha1_git, + perms=from_disk.DentryPerms.content, + ) + ], + ), + ) + directory3 = Directory( + id=hash_to_bytes("4ea8c6b2f54445e5dd1a9d5bb2afd875d66f3150"), + entries=tuple( + [ + DirectoryEntry( + name=b"foo", + type="file", + target=content.sha1_git, + perms=from_disk.DentryPerms.content, + ), + DirectoryEntry( + name=b"subdir", + type="dir", + target=directory.id, + perms=from_disk.DentryPerms.directory, + ), + DirectoryEntry( + name=b"hello", + type="file", + target=directory5.id, + perms=from_disk.DentryPerms.content, + ), + ], + ), + ) + directory4 = Directory( + id=hash_to_bytes("377aa5fcd944fbabf502dbfda55cd14d33c8c3c6"), + entries=tuple( + [ + DirectoryEntry( + name=b"subdir1", + type="dir", + target=directory3.id, + perms=from_disk.DentryPerms.directory, + ) + ], + ), + ) + directories: Tuple[Directory, ...] = ( + directory2, + directory, + directory3, + directory4, + directory5, + ) + + minus_offset = datetime.timezone(datetime.timedelta(minutes=-120)) + plus_offset = datetime.timezone(datetime.timedelta(minutes=120)) + + revision = Revision( + id=hash_to_bytes("066b1b62dbfa033362092af468bf6cfabec230e7"), + message=b"hello", + author=Person( + name=b"Nicolas Dandrimont", + email=b"nicolas@example.com", + fullname=b"Nicolas Dandrimont ", + ), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1234567890, microseconds=0), + offset=120, + negative_utc=False, + ), + committer=Person( + name=b"St\xc3fano Zacchiroli", + email=b"stefano@example.com", + fullname=b"St\xc3fano Zacchiroli ", + ), + committer_date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1123456789, microseconds=0), + offset=120, + negative_utc=False, + ), + parents=(), + type=RevisionType.GIT, + directory=directory.id, + metadata={ + "checksums": {"sha1": "tarball-sha1", "sha256": "tarball-sha256",}, + "signed-off-by": "some-dude", + }, + extra_headers=( + (b"gpgsig", b"test123"), + (b"mergetag", b"foo\\bar"), + (b"mergetag", b"\x22\xaf\x89\x80\x01\x00"), + ), + synthetic=True, + ) + revision2 = Revision( + id=hash_to_bytes("df7a6f6a99671fb7f7343641aff983a314ef6161"), + message=b"hello again", + author=Person( + name=b"Roberto Dicosmo", + email=b"roberto@example.com", + fullname=b"Roberto Dicosmo ", + ), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1234567843, microseconds=220000,), + offset=-720, + negative_utc=False, + ), + committer=Person( + name=b"tony", email=b"ar@dumont.fr", fullname=b"tony ", + ), + committer_date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1123456789, microseconds=220000,), + offset=0, + negative_utc=False, + ), + parents=tuple([revision.id]), + type=RevisionType.GIT, + directory=directory2.id, + metadata=None, + extra_headers=(), + synthetic=False, + ) + revision3 = Revision( + id=hash_to_bytes("2cbd7bb22c653bbb23a29657852a50a01b591d46"), + message=b"a simple revision with no parents this time", + author=Person( + name=b"Roberto Dicosmo", + email=b"roberto@example.com", + fullname=b"Roberto Dicosmo ", + ), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1234567843, microseconds=220000,), + offset=-720, + negative_utc=False, + ), + committer=Person( + name=b"tony", email=b"ar@dumont.fr", fullname=b"tony ", + ), + committer_date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1127351742, microseconds=220000,), + offset=0, + negative_utc=False, + ), + parents=tuple([revision.id, revision2.id]), + type=RevisionType.GIT, + directory=directory2.id, + metadata=None, + extra_headers=(), + synthetic=True, + ) + revision4 = Revision( + id=hash_to_bytes("88cd5126fc958ed70089d5340441a1c2477bcc20"), + message=b"parent of self.revision2", + author=Person( + name=b"me", email=b"me@soft.heri", fullname=b"me ", + ), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1234567843, microseconds=220000,), + offset=-720, + negative_utc=False, + ), + committer=Person( + name=b"committer-dude", + email=b"committer@dude.com", + fullname=b"committer-dude ", + ), + committer_date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1244567843, microseconds=220000,), + offset=-720, + negative_utc=False, + ), + parents=tuple([revision3.id]), + type=RevisionType.GIT, + directory=directory.id, + metadata=None, + extra_headers=(), + synthetic=False, + ) + revisions: Tuple[Revision, ...] = (revision, revision2, revision3, revision4) + + origins: Tuple[Origin, ...] = ( + Origin(url="https://github.com/user1/repo1"), + Origin(url="https://github.com/user2/repo1"), + Origin(url="https://github.com/user3/repo1"), + Origin(url="https://gitlab.com/user1/repo1"), + Origin(url="https://gitlab.com/user2/repo1"), + Origin(url="https://forge.softwareheritage.org/source/repo1"), + ) + origin, origin2 = origins[:2] + + metadata_authority = MetadataAuthority( + type=MetadataAuthorityType.DEPOSIT_CLIENT, + url="http://hal.inria.example.com/", + metadata={"location": "France"}, + ) + metadata_authority2 = MetadataAuthority( + type=MetadataAuthorityType.REGISTRY, + url="http://wikidata.example.com/", + metadata={}, + ) + authorities: Tuple[MetadataAuthority, ...] = ( + metadata_authority, + metadata_authority2, + ) + + metadata_fetcher = MetadataFetcher( + name="swh-deposit", version="0.0.1", metadata={"sword_version": "2"}, + ) + metadata_fetcher2 = MetadataFetcher( + name="swh-example", version="0.0.1", metadata={}, + ) + fetchers: Tuple[MetadataFetcher, ...] = (metadata_fetcher, metadata_fetcher2) + + date_visit1 = datetime.datetime(2015, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) + date_visit2 = datetime.datetime(2017, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) + date_visit3 = datetime.datetime(2018, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) + + type_visit1 = "git" + type_visit2 = "hg" + type_visit3 = "deb" + + origin_visit = OriginVisit( + origin=origin.url, visit=1, date=date_visit1, type=type_visit1, + ) + origin_visit2 = OriginVisit( + origin=origin.url, visit=2, date=date_visit2, type=type_visit1, + ) + origin_visit3 = OriginVisit( + origin=origin2.url, visit=1, date=date_visit1, type=type_visit2, + ) + origin_visits: Tuple[OriginVisit, ...] = ( + origin_visit, + origin_visit2, + origin_visit3, + ) + + release = Release( + id=hash_to_bytes("a673e617fcc6234e29b2cad06b8245f96c415c61"), + name=b"v0.0.1", + author=Person( + name=b"olasd", email=b"nic@olasd.fr", fullname=b"olasd ", + ), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1234567890, microseconds=0), + offset=42, + negative_utc=False, + ), + target=revision.id, + target_type=ObjectType.REVISION, + message=b"synthetic release", + synthetic=True, + ) + release2 = Release( + id=hash_to_bytes("6902bd4c82b7d19a421d224aedab2b74197e420d"), + name=b"v0.0.2", + author=Person( + name=b"tony", email=b"ar@dumont.fr", fullname=b"tony ", + ), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1634366813, microseconds=0), + offset=-120, + negative_utc=False, + ), + target=revision2.id, + target_type=ObjectType.REVISION, + message=b"v0.0.2\nMisc performance improvements + bug fixes", + synthetic=False, + ) + release3 = Release( + id=hash_to_bytes("3e9050196aa288264f2a9d279d6abab8b158448b"), + name=b"v0.0.2", + author=Person( + name=b"tony", + email=b"tony@ardumont.fr", + fullname=b"tony ", + ), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1634366813, microseconds=0), + offset=-120, + negative_utc=False, + ), + target=revision3.id, + target_type=ObjectType.REVISION, + message=b"yet another synthetic release", + synthetic=True, + ) + + releases: Tuple[Release, ...] = (release, release2, release3) + + snapshot = Snapshot( + id=hash_to_bytes("409ee1ff3f10d166714bc90581debfd0446dda57"), + branches={ + b"master": SnapshotBranch( + target=revision.id, target_type=TargetType.REVISION, ), - DirectoryEntry( - name=b"bar\xc3", - type="dir", - target=directory5.id, - perms=from_disk.DentryPerms.directory, + }, + ) + empty_snapshot = Snapshot( + id=hash_to_bytes("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"), branches={}, + ) + complete_snapshot = Snapshot( + id=hash_to_bytes("a56ce2d81c190023bb99a3a36279307522cb85f6"), + branches={ + b"directory": SnapshotBranch( + target=directory.id, target_type=TargetType.DIRECTORY, ), - ], - ), -) - -directory2 = Directory( - id=hash_to_bytes("8505808532953da7d2581741f01b29c04b1cb9ab"), - entries=tuple( - [ - DirectoryEntry( - name=b"oof", - type="file", - target=content2.sha1_git, - perms=from_disk.DentryPerms.content, - ) - ], - ), -) - -directory3 = Directory( - id=hash_to_bytes("4ea8c6b2f54445e5dd1a9d5bb2afd875d66f3150"), - entries=tuple( - [ - DirectoryEntry( - name=b"foo", - type="file", - target=content.sha1_git, - perms=from_disk.DentryPerms.content, + b"directory2": SnapshotBranch( + target=directory2.id, target_type=TargetType.DIRECTORY, ), - DirectoryEntry( - name=b"subdir", - type="dir", - target=directory.id, - perms=from_disk.DentryPerms.directory, + b"content": SnapshotBranch( + target=content.sha1_git, target_type=TargetType.CONTENT, ), - DirectoryEntry( - name=b"hello", - type="file", - target=directory5.id, - perms=from_disk.DentryPerms.content, + b"alias": SnapshotBranch(target=b"revision", target_type=TargetType.ALIAS,), + b"revision": SnapshotBranch( + target=revision.id, target_type=TargetType.REVISION, ), - ], - ), -) - -directory4 = Directory( - id=hash_to_bytes("377aa5fcd944fbabf502dbfda55cd14d33c8c3c6"), - entries=tuple( - [ - DirectoryEntry( - name=b"subdir1", - type="dir", - target=directory3.id, - perms=from_disk.DentryPerms.directory, - ) - ], - ), -) - -directories = (directory2, directory, directory3, directory4, directory5) - -minus_offset = datetime.timezone(datetime.timedelta(minutes=-120)) -plus_offset = datetime.timezone(datetime.timedelta(minutes=120)) - -revision = Revision( - id=hash_to_bytes("066b1b62dbfa033362092af468bf6cfabec230e7"), - message=b"hello", - author=Person( - name=b"Nicolas Dandrimont", - email=b"nicolas@example.com", - fullname=b"Nicolas Dandrimont ", - ), - date=TimestampWithTimezone( - timestamp=Timestamp(seconds=1234567890, microseconds=0), - offset=120, - negative_utc=False, - ), - committer=Person( - name=b"St\xc3fano Zacchiroli", - email=b"stefano@example.com", - fullname=b"St\xc3fano Zacchiroli ", - ), - committer_date=TimestampWithTimezone( - timestamp=Timestamp(seconds=1123456789, microseconds=0), - offset=120, - negative_utc=False, - ), - parents=(), - type=RevisionType.GIT, - directory=directory.id, - metadata={ - "checksums": {"sha1": "tarball-sha1", "sha256": "tarball-sha256",}, - "signed-off-by": "some-dude", - }, - extra_headers=( - (b"gpgsig", b"test123"), - (b"mergetag", b"foo\\bar"), - (b"mergetag", b"\x22\xaf\x89\x80\x01\x00"), - ), - synthetic=True, -) - -revision2 = Revision( - id=hash_to_bytes("df7a6f6a99671fb7f7343641aff983a314ef6161"), - message=b"hello again", - author=Person( - name=b"Roberto Dicosmo", - email=b"roberto@example.com", - fullname=b"Roberto Dicosmo ", - ), - date=TimestampWithTimezone( - timestamp=Timestamp(seconds=1234567843, microseconds=220000,), - offset=-720, - negative_utc=False, - ), - committer=Person( - name=b"tony", email=b"ar@dumont.fr", fullname=b"tony ", - ), - committer_date=TimestampWithTimezone( - timestamp=Timestamp(seconds=1123456789, microseconds=220000,), - offset=0, - negative_utc=False, - ), - parents=tuple([revision.id]), - type=RevisionType.GIT, - directory=directory2.id, - metadata=None, - extra_headers=(), - synthetic=False, -) - -revision3 = Revision( - id=hash_to_bytes("2cbd7bb22c653bbb23a29657852a50a01b591d46"), - message=b"a simple revision with no parents this time", - author=Person( - name=b"Roberto Dicosmo", - email=b"roberto@example.com", - fullname=b"Roberto Dicosmo ", - ), - date=TimestampWithTimezone( - timestamp=Timestamp(seconds=1234567843, microseconds=220000,), - offset=-720, - negative_utc=False, - ), - committer=Person( - name=b"tony", email=b"ar@dumont.fr", fullname=b"tony ", - ), - committer_date=TimestampWithTimezone( - timestamp=Timestamp(seconds=1127351742, microseconds=220000,), - offset=0, - negative_utc=False, - ), - parents=tuple([revision.id, revision2.id]), - type=RevisionType.GIT, - directory=directory2.id, - metadata=None, - extra_headers=(), - synthetic=True, -) - -revision4 = Revision( - id=hash_to_bytes("88cd5126fc958ed70089d5340441a1c2477bcc20"), - message=b"parent of self.revision2", - author=Person(name=b"me", email=b"me@soft.heri", fullname=b"me ",), - date=TimestampWithTimezone( - timestamp=Timestamp(seconds=1234567843, microseconds=220000,), - offset=-720, - negative_utc=False, - ), - committer=Person( - name=b"committer-dude", - email=b"committer@dude.com", - fullname=b"committer-dude ", - ), - committer_date=TimestampWithTimezone( - timestamp=Timestamp(seconds=1244567843, microseconds=220000,), - offset=-720, - negative_utc=False, - ), - parents=tuple([revision3.id]), - type=RevisionType.GIT, - directory=directory.id, - metadata=None, - extra_headers=(), - synthetic=False, -) - -revisions = (revision, revision2, revision3, revision4) - -origins = ( - Origin(url="https://github.com/user1/repo1"), - Origin(url="https://github.com/user2/repo1"), - Origin(url="https://github.com/user3/repo1"), - Origin(url="https://gitlab.com/user1/repo1"), - Origin(url="https://gitlab.com/user2/repo1"), - Origin(url="https://forge.softwareheritage.org/source/repo1"), -) - -origin, origin2 = origins[:2] - -metadata_authority = MetadataAuthority( - type=MetadataAuthorityType.DEPOSIT_CLIENT, - url="http://hal.inria.example.com/", - metadata={"location": "France"}, -) -metadata_authority2 = MetadataAuthority( - type=MetadataAuthorityType.REGISTRY, - url="http://wikidata.example.com/", - metadata={}, -) - -authorities = (metadata_authority, metadata_authority2) - -metadata_fetcher = MetadataFetcher( - name="swh-deposit", version="0.0.1", metadata={"sword_version": "2"}, -) -metadata_fetcher2 = MetadataFetcher(name="swh-example", version="0.0.1", metadata={},) - -fetchers = (metadata_fetcher, metadata_fetcher2) - -date_visit1 = datetime.datetime(2015, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) -type_visit1 = "git" - -date_visit2 = datetime.datetime(2017, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) -type_visit2 = "hg" - -date_visit3 = datetime.datetime(2018, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) -type_visit3 = "deb" - -origin_visit = OriginVisit( - origin=origin.url, visit=1, date=date_visit1, type=type_visit1, -) - -origin_visit2 = OriginVisit( - origin=origin.url, visit=2, date=date_visit2, type=type_visit1, -) - -origin_visit3 = OriginVisit( - origin=origin2.url, visit=1, date=date_visit1, type=type_visit2, -) - -origin_visits = (origin_visit, origin_visit2, origin_visit3) - -release = Release( - id=hash_to_bytes("a673e617fcc6234e29b2cad06b8245f96c415c61"), - name=b"v0.0.1", - author=Person( - name=b"olasd", email=b"nic@olasd.fr", fullname=b"olasd ", - ), - date=TimestampWithTimezone( - timestamp=Timestamp(seconds=1234567890, microseconds=0), - offset=42, - negative_utc=False, - ), - target=revision.id, - target_type=ObjectType.REVISION, - message=b"synthetic release", - synthetic=True, -) - -release2 = Release( - id=hash_to_bytes("6902bd4c82b7d19a421d224aedab2b74197e420d"), - name=b"v0.0.2", - author=Person( - name=b"tony", email=b"ar@dumont.fr", fullname=b"tony ", - ), - date=TimestampWithTimezone( - timestamp=Timestamp(seconds=1634366813, microseconds=0), - offset=-120, - negative_utc=False, - ), - target=revision2.id, - target_type=ObjectType.REVISION, - message=b"v0.0.2\nMisc performance improvements + bug fixes", - synthetic=False, -) - -release3 = Release( - id=hash_to_bytes("3e9050196aa288264f2a9d279d6abab8b158448b"), - name=b"v0.0.2", - author=Person( - name=b"tony", email=b"tony@ardumont.fr", fullname=b"tony ", - ), - date=TimestampWithTimezone( - timestamp=Timestamp(seconds=1634366813, microseconds=0), - offset=-120, - negative_utc=False, - ), - target=revision3.id, - target_type=ObjectType.REVISION, - message=b"yet another synthetic release", - synthetic=True, -) - -releases = (release, release2, release3) - -snapshot = Snapshot( - id=hash_to_bytes("409ee1ff3f10d166714bc90581debfd0446dda57"), - branches={ - b"master": SnapshotBranch(target=revision.id, target_type=TargetType.REVISION,), - }, -) - -empty_snapshot = Snapshot( - id=hash_to_bytes("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"), branches={}, -) - -complete_snapshot = Snapshot( - id=hash_to_bytes("a56ce2d81c190023bb99a3a36279307522cb85f6"), - branches={ - b"directory": SnapshotBranch( - target=directory.id, target_type=TargetType.DIRECTORY, + b"release": SnapshotBranch( + target=release.id, target_type=TargetType.RELEASE, + ), + b"snapshot": SnapshotBranch( + target=empty_snapshot.id, target_type=TargetType.SNAPSHOT, + ), + b"dangling": None, + }, + ) + + snapshots: Tuple[Snapshot, ...] = (snapshot, empty_snapshot, complete_snapshot) + + content_metadata1 = RawExtrinsicMetadata( + type=MetadataTargetType.CONTENT, + id=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), + origin=origin.url, + discovery_date=datetime.datetime( + 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc ), - b"directory2": SnapshotBranch( - target=directory2.id, target_type=TargetType.DIRECTORY, + authority=attr.evolve(metadata_authority, metadata=None), + fetcher=attr.evolve(metadata_fetcher, metadata=None), + format="json", + metadata=b'{"foo": "bar"}', + ) + content_metadata2 = RawExtrinsicMetadata( + type=MetadataTargetType.CONTENT, + id=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), + origin=origin2.url, + discovery_date=datetime.datetime( + 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), - b"content": SnapshotBranch( - target=content.sha1_git, target_type=TargetType.CONTENT, + authority=attr.evolve(metadata_authority, metadata=None), + fetcher=attr.evolve(metadata_fetcher, metadata=None), + format="yaml", + metadata=b"foo: bar", + ) + content_metadata3 = RawExtrinsicMetadata( + type=MetadataTargetType.CONTENT, + id=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), + discovery_date=datetime.datetime( + 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), - b"alias": SnapshotBranch(target=b"revision", target_type=TargetType.ALIAS,), - b"revision": SnapshotBranch( - target=revision.id, target_type=TargetType.REVISION, + authority=attr.evolve(metadata_authority2, metadata=None), + fetcher=attr.evolve(metadata_fetcher2, metadata=None), + format="yaml", + metadata=b"foo: bar", + origin=origin.url, + visit=42, + snapshot=parse_swhid(f"swh:1:snp:{hash_to_hex(snapshot.id)}"), + release=parse_swhid(f"swh:1:rel:{hash_to_hex(release.id)}"), + revision=parse_swhid(f"swh:1:rev:{hash_to_hex(revision.id)}"), + directory=parse_swhid(f"swh:1:dir:{hash_to_hex(directory.id)}"), + path=b"/foo/bar", + ) + + content_metadata: Tuple[RawExtrinsicMetadata, ...] = ( + content_metadata1, + content_metadata2, + content_metadata3, + ) + + origin_metadata1 = RawExtrinsicMetadata( + type=MetadataTargetType.ORIGIN, + id=origin.url, + discovery_date=datetime.datetime( + 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc ), - b"release": SnapshotBranch(target=release.id, target_type=TargetType.RELEASE,), - b"snapshot": SnapshotBranch( - target=empty_snapshot.id, target_type=TargetType.SNAPSHOT, + authority=attr.evolve(metadata_authority, metadata=None), + fetcher=attr.evolve(metadata_fetcher, metadata=None), + format="json", + metadata=b'{"foo": "bar"}', + ) + origin_metadata2 = RawExtrinsicMetadata( + type=MetadataTargetType.ORIGIN, + id=origin.url, + discovery_date=datetime.datetime( + 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), - b"dangling": None, - }, -) - -snapshots = (snapshot, empty_snapshot, complete_snapshot) - -content_metadata1 = RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - id=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), - origin=origin.url, - discovery_date=datetime.datetime( - 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc - ), - authority=attr.evolve(metadata_authority, metadata=None), - fetcher=attr.evolve(metadata_fetcher, metadata=None), - format="json", - metadata=b'{"foo": "bar"}', -) -content_metadata2 = RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - id=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), - origin=origin2.url, - discovery_date=datetime.datetime( - 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc - ), - authority=attr.evolve(metadata_authority, metadata=None), - fetcher=attr.evolve(metadata_fetcher, metadata=None), - format="yaml", - metadata=b"foo: bar", -) -content_metadata3 = RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - id=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), - discovery_date=datetime.datetime( - 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc - ), - authority=attr.evolve(metadata_authority2, metadata=None), - fetcher=attr.evolve(metadata_fetcher2, metadata=None), - format="yaml", - metadata=b"foo: bar", - origin=origin.url, - visit=42, - snapshot=parse_swhid(f"swh:1:snp:{hash_to_hex(snapshot.id)}"), - release=parse_swhid(f"swh:1:rel:{hash_to_hex(release.id)}"), - revision=parse_swhid(f"swh:1:rev:{hash_to_hex(revision.id)}"), - directory=parse_swhid(f"swh:1:dir:{hash_to_hex(directory.id)}"), - path=b"/foo/bar", -) - -content_metadata = ( - content_metadata1, - content_metadata2, - content_metadata3, -) - -origin_metadata1 = RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - id=origin.url, - discovery_date=datetime.datetime( - 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc - ), - authority=attr.evolve(metadata_authority, metadata=None), - fetcher=attr.evolve(metadata_fetcher, metadata=None), - format="json", - metadata=b'{"foo": "bar"}', -) -origin_metadata2 = RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - id=origin.url, - discovery_date=datetime.datetime( - 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc - ), - authority=attr.evolve(metadata_authority, metadata=None), - fetcher=attr.evolve(metadata_fetcher, metadata=None), - format="yaml", - metadata=b"foo: bar", -) -origin_metadata3 = RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - id=origin.url, - discovery_date=datetime.datetime( - 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc - ), - authority=attr.evolve(metadata_authority2, metadata=None), - fetcher=attr.evolve(metadata_fetcher2, metadata=None), - format="yaml", - metadata=b"foo: bar", -) - -origin_metadata = ( - origin_metadata1, - origin_metadata2, - origin_metadata3, -) + authority=attr.evolve(metadata_authority, metadata=None), + fetcher=attr.evolve(metadata_fetcher, metadata=None), + format="yaml", + metadata=b"foo: bar", + ) + origin_metadata3 = RawExtrinsicMetadata( + type=MetadataTargetType.ORIGIN, + id=origin.url, + discovery_date=datetime.datetime( + 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc + ), + authority=attr.evolve(metadata_authority2, metadata=None), + fetcher=attr.evolve(metadata_fetcher2, metadata=None), + format="yaml", + metadata=b"foo: bar", + ) + + origin_metadata: Tuple[RawExtrinsicMetadata, ...] = ( + origin_metadata1, + origin_metadata2, + origin_metadata3, + ) diff --git a/swh/storage/tests/test_buffer.py b/swh/storage/tests/test_buffer.py --- a/swh/storage/tests/test_buffer.py +++ b/swh/storage/tests/test_buffer.py @@ -16,7 +16,7 @@ def test_buffering_proxy_storage_content_threshold_not_hit(sample_data): - contents = sample_data["content"][:2] + contents = sample_data.contents[:2] contents_dict = [c.to_dict() for c in contents] storage = get_storage_with_buffer_config(min_batch_size={"content": 10,}) @@ -38,7 +38,7 @@ def test_buffering_proxy_storage_content_threshold_nb_hit(sample_data): - content = sample_data["content"][0] + content = sample_data.content content_dict = content.to_dict() storage = get_storage_with_buffer_config(min_batch_size={"content": 1,}) @@ -57,7 +57,7 @@ def test_buffering_proxy_storage_content_deduplicate(sample_data): - contents = sample_data["content"][:2] + contents = sample_data.contents[:2] storage = get_storage_with_buffer_config(min_batch_size={"content": 2,}) s = storage.content_add([contents[0], contents[0]]) @@ -80,7 +80,7 @@ def test_buffering_proxy_storage_content_threshold_bytes_hit(sample_data): - contents = sample_data["content"][:2] + contents = sample_data.contents[:2] content_bytes_min_batch_size = 2 storage = get_storage_with_buffer_config( min_batch_size={"content": 10, "content_bytes": content_bytes_min_batch_size,} @@ -102,7 +102,7 @@ def test_buffering_proxy_storage_skipped_content_threshold_not_hit(sample_data): - contents = sample_data["skipped_content"] + contents = sample_data.skipped_contents contents_dict = [c.to_dict() for c in contents] storage = get_storage_with_buffer_config(min_batch_size={"skipped_content": 10,}) s = storage.skipped_content_add([contents[0], contents[1]]) @@ -120,7 +120,7 @@ def test_buffering_proxy_storage_skipped_content_threshold_nb_hit(sample_data): - contents = sample_data["skipped_content"] + contents = sample_data.skipped_contents storage = get_storage_with_buffer_config(min_batch_size={"skipped_content": 1,}) s = storage.skipped_content_add([contents[0]]) @@ -134,7 +134,7 @@ def test_buffering_proxy_storage_skipped_content_deduplicate(sample_data): - contents = sample_data["skipped_content"][:2] + contents = sample_data.skipped_contents[:2] storage = get_storage_with_buffer_config(min_batch_size={"skipped_content": 2,}) s = storage.skipped_content_add([contents[0], contents[0]]) @@ -156,33 +156,32 @@ def test_buffering_proxy_storage_directory_threshold_not_hit(sample_data): - directories = sample_data["directory"] + directory = sample_data.directory storage = get_storage_with_buffer_config(min_batch_size={"directory": 10,}) - s = storage.directory_add([directories[0]]) + s = storage.directory_add([directory]) assert s == {} - directory_id = directories[0].id - missing_directories = storage.directory_missing([directory_id]) - assert list(missing_directories) == [directory_id] + missing_directories = storage.directory_missing([directory.id]) + assert list(missing_directories) == [directory.id] s = storage.flush() assert s == { "directory:add": 1, } - missing_directories = storage.directory_missing([directory_id]) + missing_directories = storage.directory_missing([directory.id]) assert list(missing_directories) == [] def test_buffering_proxy_storage_directory_threshold_hit(sample_data): - directories = sample_data["directory"] + directory = sample_data.directory storage = get_storage_with_buffer_config(min_batch_size={"directory": 1,}) - s = storage.directory_add([directories[0]]) + s = storage.directory_add([directory]) assert s == { "directory:add": 1, } - missing_directories = storage.directory_missing([directories[0].id]) + missing_directories = storage.directory_missing([directory.id]) assert list(missing_directories) == [] s = storage.flush() @@ -190,7 +189,7 @@ def test_buffering_proxy_storage_directory_deduplicate(sample_data): - directories = sample_data["directory"][:2] + directories = sample_data.directories[:2] storage = get_storage_with_buffer_config(min_batch_size={"directory": 2,}) s = storage.directory_add([directories[0], directories[0]]) @@ -212,33 +211,32 @@ def test_buffering_proxy_storage_revision_threshold_not_hit(sample_data): - revisions = sample_data["revision"] + revision = sample_data.revision storage = get_storage_with_buffer_config(min_batch_size={"revision": 10,}) - s = storage.revision_add([revisions[0]]) + s = storage.revision_add([revision]) assert s == {} - revision_id = revisions[0].id - missing_revisions = storage.revision_missing([revision_id]) - assert list(missing_revisions) == [revision_id] + missing_revisions = storage.revision_missing([revision.id]) + assert list(missing_revisions) == [revision.id] s = storage.flush() assert s == { "revision:add": 1, } - missing_revisions = storage.revision_missing([revision_id]) + missing_revisions = storage.revision_missing([revision.id]) assert list(missing_revisions) == [] def test_buffering_proxy_storage_revision_threshold_hit(sample_data): - revisions = sample_data["revision"] + revision = sample_data.revision storage = get_storage_with_buffer_config(min_batch_size={"revision": 1,}) - s = storage.revision_add([revisions[0]]) + s = storage.revision_add([revision]) assert s == { "revision:add": 1, } - missing_revisions = storage.revision_missing([revisions[0].id]) + missing_revisions = storage.revision_missing([revision.id]) assert list(missing_revisions) == [] s = storage.flush() @@ -246,7 +244,7 @@ def test_buffering_proxy_storage_revision_deduplicate(sample_data): - revisions = sample_data["revision"][:2] + revisions = sample_data.revisions[:2] storage = get_storage_with_buffer_config(min_batch_size={"revision": 2,}) s = storage.revision_add([revisions[0], revisions[0]]) @@ -268,7 +266,7 @@ def test_buffering_proxy_storage_release_threshold_not_hit(sample_data): - releases = sample_data["release"] + releases = sample_data.releases threshold = 10 assert len(releases) < threshold @@ -292,7 +290,7 @@ def test_buffering_proxy_storage_release_threshold_hit(sample_data): - releases = sample_data["release"] + releases = sample_data.releases threshold = 2 assert len(releases) > threshold @@ -314,7 +312,7 @@ def test_buffering_proxy_storage_release_deduplicate(sample_data): - releases = sample_data["release"][:2] + releases = sample_data.releases[:2] storage = get_storage_with_buffer_config(min_batch_size={"release": 2,}) s = storage.release_add([releases[0], releases[0]]) @@ -340,15 +338,15 @@ """ threshold = 10 - contents = sample_data["content"] + contents = sample_data.contents assert 0 < len(contents) < threshold - skipped_contents = sample_data["skipped_content"] + skipped_contents = sample_data.skipped_contents assert 0 < len(skipped_contents) < threshold - directories = sample_data["directory"] + directories = sample_data.directories assert 0 < len(directories) < threshold - revisions = sample_data["revision"] + revisions = sample_data.revisions assert 0 < len(revisions) < threshold - releases = sample_data["release"] + releases = sample_data.releases assert 0 < len(releases) < threshold storage = get_storage_with_buffer_config( diff --git a/swh/storage/tests/test_cassandra.py b/swh/storage/tests/test_cassandra.py --- a/swh/storage/tests/test_cassandra.py +++ b/swh/storage/tests/test_cassandra.py @@ -193,7 +193,7 @@ """ called = 0 - cont, cont2 = sample_data["content"][:2] + cont, cont2 = sample_data.contents[:2] # always return a token def mock_cgtfsh(algo, hash_): @@ -236,7 +236,7 @@ """ called = 0 - cont, cont2 = [attr.evolve(c, ctime=now()) for c in sample_data["content"][:2]] + cont, cont2 = [attr.evolve(c, ctime=now()) for c in sample_data.contents[:2]] # always return a token def mock_cgtfsh(algo, hash_): @@ -284,7 +284,7 @@ """ called = 0 - cont, cont2 = [attr.evolve(c, ctime=now()) for c in sample_data["content"][:2]] + cont, cont2 = [attr.evolve(c, ctime=now()) for c in sample_data.contents[:2]] # always return a token def mock_cgtfsh(algo, hash_): diff --git a/swh/storage/tests/test_filter.py b/swh/storage/tests/test_filter.py --- a/swh/storage/tests/test_filter.py +++ b/swh/storage/tests/test_filter.py @@ -21,7 +21,7 @@ def test_filtering_proxy_storage_content(swh_storage, sample_data): - sample_content = sample_data["content"][0] + sample_content = sample_data.content content = next(swh_storage.content_get([sample_content.sha1])) assert not content @@ -43,7 +43,7 @@ def test_filtering_proxy_storage_skipped_content(swh_storage, sample_data): - sample_content = sample_data["skipped_content"][0] + sample_content = sample_data.skipped_content sample_content_dict = sample_content.to_dict() content = next(swh_storage.skipped_content_missing([sample_content_dict])) @@ -67,7 +67,7 @@ swh_storage, sample_data ): sample_contents = [ - attr.evolve(c, sha1_git=None) for c in sample_data["skipped_content"] + attr.evolve(c, sha1_git=None) for c in sample_data.skipped_contents ] sample_content, sample_content2 = [c.to_dict() for c in sample_contents[:2]] @@ -92,7 +92,7 @@ def test_filtering_proxy_storage_revision(swh_storage, sample_data): - sample_revision = sample_data["revision"][0] + sample_revision = sample_data.revision revision = next(swh_storage.revision_get([sample_revision.id])) assert not revision @@ -112,7 +112,7 @@ def test_filtering_proxy_storage_directory(swh_storage, sample_data): - sample_directory = sample_data["directory"][0] + sample_directory = sample_data.directory directory = next(swh_storage.directory_missing([sample_directory.id])) assert directory diff --git a/swh/storage/tests/test_pytest_plugin.py b/swh/storage/tests/test_pytest_plugin.py --- a/swh/storage/tests/test_pytest_plugin.py +++ b/swh/storage/tests/test_pytest_plugin.py @@ -3,31 +3,12 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information - -from swh.model.model import BaseModel from swh.storage.interface import StorageInterface +from swh.storage.tests.storage_data import StorageData def test_sample_data(sample_data): - assert set(sample_data.keys()) == set( - [ - "content", - "skipped_content", - "directory", - "revision", - "release", - "snapshot", - "origin", - "origin_visit", - "fetcher", - "authority", - "origin_metadata", - "content_metadata", - ] - ) - for object_type, objs in sample_data.items(): - for obj in objs: - assert isinstance(obj, BaseModel) + assert isinstance(sample_data, StorageData) def test_swh_storage(swh_storage: StorageInterface): diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py --- a/swh/storage/tests/test_retry.py +++ b/swh/storage/tests/test_retry.py @@ -10,15 +10,10 @@ import psycopg2 import pytest -from swh.model.model import ( - OriginVisit, - MetadataTargetType, -) +from swh.model.model import MetadataTargetType from swh.storage.exc import HashCollision, StorageArgumentException -from .storage_data import date_visit1 - @pytest.fixture def monkeypatch_sleep(monkeypatch, swh_storage): @@ -51,8 +46,7 @@ """Standard content_add works as before """ - sample_content = sample_data["content"][0] - + sample_content = sample_data.content content = next(swh_storage.content_get([sample_content.sha1])) assert not content @@ -82,7 +76,7 @@ {"content:add": 1}, ] - sample_content = sample_data["content"][0] + sample_content = sample_data.content content = next(swh_storage.content_get([sample_content.sha1])) assert not content @@ -104,7 +98,7 @@ mock_memory = mocker.patch("swh.storage.in_memory.InMemoryStorage.content_add") mock_memory.side_effect = StorageArgumentException("Refuse to add content always!") - sample_content = sample_data["content"][0] + sample_content = sample_data.content content = next(swh_storage.content_get([sample_content.sha1])) assert not content @@ -119,7 +113,7 @@ """Standard content_add_metadata works as before """ - sample_content = sample_data["content"][0] + sample_content = sample_data.content content = attr.evolve(sample_content, data=None) pk = content.sha1 @@ -154,7 +148,7 @@ {"content:add": 1}, ] - sample_content = sample_data["content"][0] + sample_content = sample_data.content content = attr.evolve(sample_content, data=None) s = swh_storage.content_add_metadata([content]) @@ -178,7 +172,7 @@ "Refuse to add content_metadata!" ) - sample_content = sample_data["content"][0] + sample_content = sample_data.content content = attr.evolve(sample_content, data=None) pk = content.sha1 @@ -195,7 +189,7 @@ """Standard skipped_content_add works as before """ - sample_content = sample_data["skipped_content"][0] + sample_content = sample_data.skipped_content sample_content_dict = sample_content.to_dict() skipped_contents = list(swh_storage.skipped_content_missing([sample_content_dict])) @@ -227,7 +221,7 @@ {"skipped_content:add": 1}, ] - sample_content = sample_data["skipped_content"][0] + sample_content = sample_data.skipped_content s = swh_storage.skipped_content_add([sample_content]) assert s == {"skipped_content:add": 1} @@ -250,7 +244,7 @@ "Refuse to add content_metadata!" ) - sample_content = sample_data["skipped_content"][0] + sample_content = sample_data.skipped_content sample_content_dict = sample_content.to_dict() skipped_contents = list(swh_storage.skipped_content_missing([sample_content_dict])) @@ -269,14 +263,15 @@ """Standard origin_visit_add works as before """ - origin = sample_data["origin"][0] + origin = sample_data.origin + visit = sample_data.origin_visit + assert visit.origin == origin.url swh_storage.origin_add([origin]) origins = list(swh_storage.origin_visit_get(origin.url)) assert not origins - visit = OriginVisit(origin=origin.url, date=date_visit1, type="hg") origin_visit = swh_storage.origin_visit_add([visit])[0] assert origin_visit.origin == origin.url assert isinstance(origin_visit.visit, int) @@ -292,11 +287,13 @@ """Multiple retries for hash collision and psycopg2 error but finally ok """ - origin = sample_data["origin"][1] + origin = sample_data.origin + visit = sample_data.origin_visit + assert visit.origin == origin.url + swh_storage.origin_add([origin]) mock_memory = mocker.patch("swh.storage.in_memory.InMemoryStorage.origin_visit_add") - visit = OriginVisit(origin=origin.url, date=date_visit1, type="git") mock_memory.side_effect = [ # first try goes ko fake_hash_collision, @@ -326,13 +323,14 @@ mock_memory = mocker.patch("swh.storage.in_memory.InMemoryStorage.origin_visit_add") mock_memory.side_effect = StorageArgumentException("Refuse to add origin always!") - origin = sample_data["origin"][0] + origin = sample_data.origin + visit = sample_data.origin_visit + assert visit.origin == origin.url origins = list(swh_storage.origin_visit_get(origin.url)) assert not origins with pytest.raises(StorageArgumentException, match="Refuse to add"): - visit = OriginVisit(origin=origin.url, date=date_visit1, type="svn",) swh_storage.origin_visit_add([visit]) mock_memory.assert_has_calls( @@ -344,7 +342,7 @@ """Standard metadata_fetcher_add works as before """ - fetcher = sample_data["fetcher"][0] + fetcher = sample_data.metadata_fetcher metadata_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version) assert not metadata_fetcher @@ -361,7 +359,7 @@ """Multiple retries for hash collision and psycopg2 error but finally ok """ - fetcher = sample_data["fetcher"][0] + fetcher = sample_data.metadata_fetcher mock_memory = mocker.patch( "swh.storage.in_memory.InMemoryStorage.metadata_fetcher_add" ) @@ -397,7 +395,7 @@ "Refuse to add metadata_fetcher always!" ) - fetcher = sample_data["fetcher"][0] + fetcher = sample_data.metadata_fetcher actual_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version) assert not actual_fetcher @@ -412,7 +410,7 @@ """Standard metadata_authority_add works as before """ - authority = sample_data["authority"][0] + authority = sample_data.metadata_authority assert not swh_storage.metadata_authority_get(authority.type, authority.url) @@ -428,7 +426,7 @@ """Multiple retries for hash collision and psycopg2 error but finally ok """ - authority = sample_data["authority"][0] + authority = sample_data.metadata_authority mock_memory = mocker.patch( "swh.storage.in_memory.InMemoryStorage.metadata_authority_add" @@ -464,7 +462,7 @@ "Refuse to add authority_id always!" ) - authority = sample_data["authority"][0] + authority = sample_data.metadata_authority swh_storage.metadata_authority_get(authority.type, authority.url) @@ -478,12 +476,12 @@ """Standard object_metadata_add works as before """ - origin = sample_data["origin"][0] - ori_meta = sample_data["origin_metadata"][0] + origin = sample_data.origin + ori_meta = sample_data.origin_metadata1 assert origin.url == ori_meta.id swh_storage.origin_add([origin]) - swh_storage.metadata_authority_add([sample_data["authority"][0]]) - swh_storage.metadata_fetcher_add([sample_data["fetcher"][0]]) + swh_storage.metadata_authority_add([sample_data.metadata_authority]) + swh_storage.metadata_fetcher_add([sample_data.metadata_fetcher]) origin_metadata = swh_storage.object_metadata_get( MetadataTargetType.ORIGIN, ori_meta.id, ori_meta.authority @@ -505,12 +503,12 @@ """Multiple retries for hash collision and psycopg2 error but finally ok """ - origin = sample_data["origin"][0] - ori_meta = sample_data["origin_metadata"][0] + origin = sample_data.origin + ori_meta = sample_data.origin_metadata1 assert origin.url == ori_meta.id swh_storage.origin_add([origin]) - swh_storage.metadata_authority_add([sample_data["authority"][0]]) - swh_storage.metadata_fetcher_add([sample_data["fetcher"][0]]) + swh_storage.metadata_authority_add([sample_data.metadata_authority]) + swh_storage.metadata_fetcher_add([sample_data.metadata_fetcher]) mock_memory = mocker.patch( "swh.storage.in_memory.InMemoryStorage.object_metadata_add" ) @@ -547,8 +545,8 @@ ) mock_memory.side_effect = StorageArgumentException("Refuse to add always!") - origin = sample_data["origin"][0] - ori_meta = sample_data["origin_metadata"][0] + origin = sample_data.origin + ori_meta = sample_data.origin_metadata1 assert origin.url == ori_meta.id swh_storage.origin_add([origin]) @@ -562,7 +560,7 @@ """Standard directory_add works as before """ - sample_dir = sample_data["directory"][0] + sample_dir = sample_data.directory directory = swh_storage.directory_get_random() # no directory assert not directory @@ -592,7 +590,7 @@ {"directory:add": 1}, ] - sample_dir = sample_data["directory"][1] + sample_dir = sample_data.directories[1] directory_id = swh_storage.directory_get_random() # no directory assert not directory_id @@ -618,7 +616,7 @@ "Refuse to add directory always!" ) - sample_dir = sample_data["directory"][0] + sample_dir = sample_data.directory directory_id = swh_storage.directory_get_random() # no directory assert not directory_id @@ -633,7 +631,7 @@ """Standard revision_add works as before """ - sample_rev = sample_data["revision"][0] + sample_rev = sample_data.revision revision = next(swh_storage.revision_get([sample_rev.id])) assert not revision @@ -663,7 +661,7 @@ {"revision:add": 1}, ] - sample_rev = sample_data["revision"][0] + sample_rev = sample_data.revision revision = next(swh_storage.revision_get([sample_rev.id])) assert not revision @@ -687,7 +685,7 @@ mock_memory = mocker.patch("swh.storage.in_memory.InMemoryStorage.revision_add") mock_memory.side_effect = StorageArgumentException("Refuse to add revision always!") - sample_rev = sample_data["revision"][0] + sample_rev = sample_data.revision revision = next(swh_storage.revision_get([sample_rev.id])) assert not revision @@ -702,7 +700,7 @@ """Standard release_add works as before """ - sample_rel = sample_data["release"][0] + sample_rel = sample_data.release release = next(swh_storage.release_get([sample_rel.id])) assert not release @@ -732,7 +730,7 @@ {"release:add": 1}, ] - sample_rel = sample_data["release"][0] + sample_rel = sample_data.release release = next(swh_storage.release_get([sample_rel.id])) assert not release @@ -756,7 +754,7 @@ mock_memory = mocker.patch("swh.storage.in_memory.InMemoryStorage.release_add") mock_memory.side_effect = StorageArgumentException("Refuse to add release always!") - sample_rel = sample_data["release"][0] + sample_rel = sample_data.release release = next(swh_storage.release_get([sample_rel.id])) assert not release @@ -771,7 +769,7 @@ """Standard snapshot_add works as before """ - sample_snap = sample_data["snapshot"][0] + sample_snap = sample_data.snapshot snapshot = swh_storage.snapshot_get(sample_snap.id) assert not snapshot @@ -801,7 +799,7 @@ {"snapshot:add": 1}, ] - sample_snap = sample_data["snapshot"][0] + sample_snap = sample_data.snapshot snapshot = swh_storage.snapshot_get(sample_snap.id) assert not snapshot @@ -825,7 +823,7 @@ mock_memory = mocker.patch("swh.storage.in_memory.InMemoryStorage.snapshot_add") mock_memory.side_effect = StorageArgumentException("Refuse to add snapshot always!") - sample_snap = sample_data["snapshot"][0] + sample_snap = sample_data.snapshot snapshot = swh_storage.snapshot_get(sample_snap.id) assert not snapshot diff --git a/swh/storage/tests/test_revision_bw_compat.py b/swh/storage/tests/test_revision_bw_compat.py --- a/swh/storage/tests/test_revision_bw_compat.py +++ b/swh/storage/tests/test_revision_bw_compat.py @@ -17,7 +17,7 @@ def test_revision_extra_header_in_metadata(swh_storage_backend_config, sample_data): storage = get_storage(**swh_storage_backend_config) - rev = sample_data["revision"][0] + rev = sample_data.revision md_w_extra = dict( rev.metadata.items(), diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -44,8 +44,6 @@ from swh.storage.interface import StorageInterface from swh.storage.utils import content_hex_hashes, now -from .storage_data import data - @contextmanager def db_transaction(storage): @@ -105,8 +103,7 @@ class LazyContent(Content): def with_data(self): - raw_data = data.content.data - return Content.from_dict({**self.to_dict(), "data": raw_data}) + return Content.from_dict({**self.to_dict(), "data": b"42\n"}) class TestStorage: @@ -159,7 +156,7 @@ assert swh_storage.check_config(check_write=False) def test_content_add(self, swh_storage, sample_data): - cont = sample_data["content"][0] + cont = sample_data.content insertion_start_time = now() actual_result = swh_storage.content_add([cont]) @@ -191,7 +188,7 @@ assert swh_storage.stat_counters()["content"] == 1 def test_content_add_from_generator(self, swh_storage, sample_data): - cont = sample_data["content"][0] + cont = sample_data.content def _cnt_gen(): yield cont @@ -207,7 +204,7 @@ assert swh_storage.stat_counters()["content"] == 1 def test_content_add_from_lazy_content(self, swh_storage, sample_data): - cont = sample_data["content"][0] + cont = sample_data.content lazy_content = LazyContent.from_dict(cont.to_dict()) insertion_start_time = now() @@ -243,7 +240,7 @@ assert swh_storage.stat_counters()["content"] == 1 def test_content_get_missing(self, swh_storage, sample_data): - cont, cont2 = sample_data["content"][:2] + cont, cont2 = sample_data.contents[:2] swh_storage.content_add([cont]) @@ -261,7 +258,7 @@ assert results == [None, {"sha1": cont.sha1, "data": cont.data}] def test_content_add_different_input(self, swh_storage, sample_data): - cont, cont2 = sample_data["content"][:2] + cont, cont2 = sample_data.contents[:2] actual_result = swh_storage.content_add([cont, cont2]) assert actual_result == { @@ -270,7 +267,7 @@ } def test_content_add_twice(self, swh_storage, sample_data): - cont, cont2 = sample_data["content"][:2] + cont, cont2 = sample_data.contents[:2] actual_result = swh_storage.content_add([cont]) assert actual_result == { @@ -290,7 +287,7 @@ assert len(swh_storage.content_find(cont2.to_dict())) == 1 def test_content_add_collision(self, swh_storage, sample_data): - cont1 = sample_data["content"][0] + cont1 = sample_data.content # create (corrupted) content with same sha1{,_git} but != sha256 sha256_array = bytearray(cont1.sha256) @@ -317,7 +314,7 @@ ] def test_content_add_duplicate(self, swh_storage, sample_data): - cont = sample_data["content"][0] + cont = sample_data.content swh_storage.content_add([cont, cont]) assert list(swh_storage.content_get([cont.sha1])) == [ @@ -325,7 +322,7 @@ ] def test_content_update(self, swh_storage, sample_data): - cont1 = sample_data["content"][0] + cont1 = sample_data.content if hasattr(swh_storage, "journal_writer"): swh_storage.journal_writer.journal = None # TODO, not supported @@ -346,7 +343,7 @@ assert tuple(results[cont1.sha1]) == (expected_content,) def test_content_add_metadata(self, swh_storage, sample_data): - cont = attr.evolve(sample_data["content"][0], data=None, ctime=now()) + cont = attr.evolve(sample_data.content, data=None, ctime=now()) actual_result = swh_storage.content_add_metadata([cont]) assert actual_result == { @@ -370,7 +367,7 @@ assert obj == cont def test_content_add_metadata_different_input(self, swh_storage, sample_data): - contents = sample_data["content"][:2] + contents = sample_data.contents[:2] cont = attr.evolve(contents[0], data=None, ctime=now()) cont2 = attr.evolve(contents[1], data=None, ctime=now()) @@ -380,7 +377,7 @@ } def test_content_add_metadata_collision(self, swh_storage, sample_data): - cont1 = attr.evolve(sample_data["content"][0], data=None, ctime=now()) + cont1 = attr.evolve(sample_data.content, data=None, ctime=now()) # create (corrupted) content with same sha1{,_git} but != sha256 sha1_git_array = bytearray(cont1.sha256) @@ -407,7 +404,7 @@ ] def test_skipped_content_add(self, swh_storage, sample_data): - contents = sample_data["skipped_content"][:2] + contents = sample_data.skipped_contents[:2] cont = contents[0] cont2 = attr.evolve(contents[1], blake2s256=None) @@ -427,7 +424,7 @@ def test_skipped_content_add_missing_hashes(self, swh_storage, sample_data): cont, cont2 = [ - attr.evolve(c, sha1_git=None) for c in sample_data["skipped_content"][:2] + attr.evolve(c, sha1_git=None) for c in sample_data.skipped_contents[:2] ] contents_dict = [c.to_dict() for c in [cont, cont2]] @@ -443,7 +440,7 @@ assert missing == [] def test_skipped_content_missing_partial_hash(self, swh_storage, sample_data): - cont = sample_data["skipped_content"][0] + cont = sample_data.skipped_content cont2 = attr.evolve(cont, sha1_git=None) contents_dict = [c.to_dict() for c in [cont, cont2]] @@ -466,9 +463,9 @@ min_size=0, ) ) - def test_content_missing(self, swh_storage, algos): + def test_content_missing(self, swh_storage, sample_data, algos): algos |= {"sha1"} - content, missing_content = [data.content2, data.skipped_content] + content, missing_content = [sample_data.content2, sample_data.skipped_content] swh_storage.content_add([content]) test_contents = [content.to_dict()] @@ -496,9 +493,9 @@ min_size=0, ) ) - def test_content_missing_unknown_algo(self, swh_storage, algos): + def test_content_missing_unknown_algo(self, swh_storage, sample_data, algos): algos |= {"sha1"} - content, missing_content = [data.content2, data.skipped_content] + content, missing_content = [sample_data.content2, sample_data.skipped_content] swh_storage.content_add([content]) test_contents = [content.to_dict()] @@ -522,8 +519,8 @@ def test_content_missing_per_sha1(self, swh_storage, sample_data): # given - cont = sample_data["content"][0] - missing_cont = sample_data["skipped_content"][0] + cont = sample_data.content + missing_cont = sample_data.skipped_content swh_storage.content_add([cont]) # when @@ -532,8 +529,8 @@ assert list(gen) == [missing_cont.sha1] def test_content_missing_per_sha1_git(self, swh_storage, sample_data): - cont, cont2 = sample_data["content"][:2] - missing_cont = sample_data["skipped_content"][0] + cont, cont2 = sample_data.contents[:2] + missing_cont = sample_data.skipped_content swh_storage.content_add([cont, cont2]) @@ -618,7 +615,7 @@ assert_contents_ok(expected_contents, actual_contents, ["sha1"]) def test_content_get_metadata(self, swh_storage, sample_data): - cont1, cont2 = sample_data["content"][:2] + cont1, cont2 = sample_data.contents[:2] swh_storage.content_add([cont1, cont2]) @@ -636,8 +633,8 @@ assert len(actual_md.keys()) == 2 def test_content_get_metadata_missing_sha1(self, swh_storage, sample_data): - cont1, cont2 = sample_data["content"][:2] - missing_cont = sample_data["skipped_content"][0] + cont1, cont2 = sample_data.contents[:2] + missing_cont = sample_data.skipped_content swh_storage.content_add([cont1, cont2]) @@ -647,7 +644,7 @@ assert tuple(actual_contents[missing_cont.sha1]) == () def test_content_get_random(self, swh_storage, sample_data): - cont, cont2, cont3 = sample_data["content"][:3] + cont, cont2, cont3 = sample_data.contents[:3] swh_storage.content_add([cont, cont2, cont3]) assert swh_storage.content_get_random() in { @@ -657,7 +654,7 @@ } def test_directory_add(self, swh_storage, sample_data): - directory = sample_data["directory"][1] + directory = sample_data.directories[1] init_missing = list(swh_storage.directory_missing([directory.id])) assert [directory.id] == init_missing @@ -681,7 +678,7 @@ assert swh_storage.stat_counters()["directory"] == 1 def test_directory_add_from_generator(self, swh_storage, sample_data): - directory = sample_data["directory"][1] + directory = sample_data.directories[1] def _dir_gen(): yield directory @@ -697,7 +694,7 @@ assert swh_storage.stat_counters()["directory"] == 1 def test_directory_add_twice(self, swh_storage, sample_data): - directory = sample_data["directory"][1] + directory = sample_data.directories[1] actual_result = swh_storage.directory_add([directory]) assert actual_result == {"directory:add": 1} @@ -714,7 +711,7 @@ ] def test_directory_get_recursive(self, swh_storage, sample_data): - dir1, dir2, dir3 = sample_data["directory"][:3] + dir1, dir2, dir3 = sample_data.directories[:3] init_missing = list(swh_storage.directory_missing([dir1.id])) assert init_missing == [dir1.id] @@ -749,7 +746,7 @@ assert sorted(expected_data, key=cmpdir) == sorted(actual_data, key=cmpdir) def test_directory_get_non_recursive(self, swh_storage, sample_data): - dir1, dir2, dir3 = sample_data["directory"][:3] + dir1, dir2, dir3 = sample_data.directories[:3] init_missing = list(swh_storage.directory_missing([dir1.id])) assert init_missing == [dir1.id] @@ -780,8 +777,8 @@ assert sorted(expected_data, key=cmpdir) == sorted(actual_data, key=cmpdir) def test_directory_entry_get_by_path(self, swh_storage, sample_data): - cont = sample_data["content"][0] - dir1, dir2, dir3, dir4, dir5 = sample_data["directory"][:5] + cont = sample_data.content + dir1, dir2, dir3, dir4, dir5 = sample_data.directories[:5] # given init_missing = list(swh_storage.directory_missing([dir3.id])) @@ -853,7 +850,7 @@ assert actual_entry is None def test_directory_get_random(self, swh_storage, sample_data): - dir1, dir2, dir3 = sample_data["directory"][:3] + dir1, dir2, dir3 = sample_data.directories[:3] swh_storage.directory_add([dir1, dir2, dir3]) assert swh_storage.directory_get_random() in { @@ -863,7 +860,7 @@ } def test_revision_add(self, swh_storage, sample_data): - revision = sample_data["revision"][0] + revision = sample_data.revision init_missing = swh_storage.revision_missing([revision.id]) assert list(init_missing) == [revision.id] @@ -885,7 +882,7 @@ assert swh_storage.stat_counters()["revision"] == 1 def test_revision_add_from_generator(self, swh_storage, sample_data): - revision = sample_data["revision"][0] + revision = sample_data.revision def _rev_gen(): yield revision @@ -897,7 +894,7 @@ assert swh_storage.stat_counters()["revision"] == 1 def test_revision_add_twice(self, swh_storage, sample_data): - revision, revision2 = sample_data["revision"][:2] + revision, revision2 = sample_data.revisions[:2] actual_result = swh_storage.revision_add([revision]) assert actual_result == {"revision:add": 1} @@ -915,7 +912,7 @@ ] def test_revision_add_name_clash(self, swh_storage, sample_data): - revision, revision2 = sample_data["revision"][:2] + revision, revision2 = sample_data.revisions[:2] revision1 = attr.evolve( revision, @@ -937,7 +934,7 @@ assert actual_result == {"revision:add": 2} def test_revision_get_order(self, swh_storage, sample_data): - revision, revision2 = sample_data["revision"][:2] + revision, revision2 = sample_data.revisions[:2] add_result = swh_storage.revision_add([revision, revision2]) assert add_result == {"revision:add": 2} @@ -952,7 +949,7 @@ assert [Revision.from_dict(r) for r in res2] == [revision2, revision] def test_revision_log(self, swh_storage, sample_data): - revision1, revision2, revision3, revision4 = sample_data["revision"][:4] + revision1, revision2, revision3, revision4 = sample_data.revisions[:4] # rev4 -is-child-of-> rev3 -> rev1, (rev2 -> rev1) swh_storage.revision_add([revision1, revision2, revision3, revision4]) @@ -966,7 +963,7 @@ assert actual_results == [revision4, revision3, revision1, revision2] def test_revision_log_with_limit(self, swh_storage, sample_data): - revision1, revision2, revision3, revision4 = sample_data["revision"][:4] + revision1, revision2, revision3, revision4 = sample_data.revisions[:4] # revision4 -is-child-of-> revision3 swh_storage.revision_add([revision3, revision4]) @@ -977,12 +974,12 @@ assert actual_results[0] == revision4 def test_revision_log_unknown_revision(self, swh_storage, sample_data): - revision = sample_data["revision"][0] + revision = sample_data.revision rev_log = list(swh_storage.revision_log([revision.id])) assert rev_log == [] def test_revision_shortlog(self, swh_storage, sample_data): - revision1, revision2, revision3, revision4 = sample_data["revision"][:4] + revision1, revision2, revision3, revision4 = sample_data.revisions[:4] # rev4 -is-child-of-> rev3 -> (rev1, rev2); rev2 -> rev1 swh_storage.revision_add([revision1, revision2, revision3, revision4]) @@ -999,7 +996,7 @@ ] def test_revision_shortlog_with_limit(self, swh_storage, sample_data): - revision1, revision2, revision3, revision4 = sample_data["revision"][:4] + revision1, revision2, revision3, revision4 = sample_data.revisions[:4] # revision4 -is-child-of-> revision3 swh_storage.revision_add([revision1, revision2, revision3, revision4]) @@ -1010,7 +1007,7 @@ assert list(actual_results[0]) == [revision4.id, revision4.parents] def test_revision_get(self, swh_storage, sample_data): - revision, revision2 = sample_data["revision"][:2] + revision, revision2 = sample_data.revisions[:2] swh_storage.revision_add([revision]) @@ -1021,7 +1018,7 @@ assert actual_revisions[1] is None def test_revision_get_no_parents(self, swh_storage, sample_data): - revision = sample_data["revision"][0] + revision = sample_data.revision swh_storage.revision_add([revision]) get = list(swh_storage.revision_get([revision.id])) @@ -1031,7 +1028,7 @@ assert tuple(get[0]["parents"]) == () # no parents on this one def test_revision_get_random(self, swh_storage, sample_data): - revision1, revision2, revision3 = sample_data["revision"][:3] + revision1, revision2, revision3 = sample_data.revisions[:3] swh_storage.revision_add([revision1, revision2, revision3]) @@ -1042,7 +1039,7 @@ } def test_release_add(self, swh_storage, sample_data): - release, release2 = sample_data["release"][:2] + release, release2 = sample_data.releases[:2] init_missing = swh_storage.release_missing([release.id, release2.id]) assert list(init_missing) == [release.id, release2.id] @@ -1066,7 +1063,7 @@ assert swh_storage.stat_counters()["release"] == 2 def test_release_add_from_generator(self, swh_storage, sample_data): - release, release2 = sample_data["release"][:2] + release, release2 = sample_data.releases[:2] def _rel_gen(): yield release @@ -1084,7 +1081,7 @@ assert swh_storage.stat_counters()["release"] == 2 def test_release_add_no_author_date(self, swh_storage, sample_data): - full_release = sample_data["release"][0] + full_release = sample_data.release release = attr.evolve(full_release, author=None, date=None) actual_result = swh_storage.release_add([release]) @@ -1098,7 +1095,7 @@ ] def test_release_add_twice(self, swh_storage, sample_data): - release, release2 = sample_data["release"][:2] + release, release2 = sample_data.releases[:2] actual_result = swh_storage.release_add([release]) assert actual_result == {"release:add": 1} @@ -1124,14 +1121,14 @@ email=b"john.doe@example.com", ), ) - for c in sample_data["release"][:2] + for c in sample_data.releases[:2] ] actual_result = swh_storage.release_add([release, release2]) assert actual_result == {"release:add": 2} def test_release_get(self, swh_storage, sample_data): - release, release2, release3 = sample_data["release"][:3] + release, release2, release3 = sample_data.releases[:3] # given swh_storage.release_add([release, release2]) @@ -1147,7 +1144,7 @@ assert unknown_releases[0] is None def test_release_get_order(self, swh_storage, sample_data): - release, release2 = sample_data["release"][:2] + release, release2 = sample_data.releases[:2] add_result = swh_storage.release_add([release, release2]) assert add_result == {"release:add": 2} @@ -1161,7 +1158,7 @@ assert list(res2) == [release2.to_dict(), release.to_dict()] def test_release_get_random(self, swh_storage, sample_data): - release, release2, release3 = sample_data["release"][:3] + release, release2, release3 = sample_data.releases[:3] swh_storage.release_add([release, release2, release3]) @@ -1172,7 +1169,7 @@ } def test_origin_add(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] origin_dict, origin2_dict = [o.to_dict() for o in [origin, origin2]] assert swh_storage.origin_get([origin_dict])[0] is None @@ -1194,7 +1191,7 @@ assert swh_storage.stat_counters()["origin"] == 2 def test_origin_add_from_generator(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] origin_dict, origin2_dict = [o.to_dict() for o in [origin, origin2]] def _ori_gen(): @@ -1218,7 +1215,7 @@ assert swh_storage.stat_counters()["origin"] == 2 def test_origin_add_twice(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] origin_dict, origin2_dict = [o.to_dict() for o in [origin, origin2]] add1 = swh_storage.origin_add([origin, origin2]) @@ -1234,7 +1231,7 @@ assert add2 == {"origin:add": 0} def test_origin_get_legacy(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] origin_dict, origin2_dict = [o.to_dict() for o in [origin, origin2]] assert swh_storage.origin_get(origin_dict) is None @@ -1244,7 +1241,7 @@ assert actual_origin0["url"] == origin.url def test_origin_get(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] origin_dict, origin2_dict = [o.to_dict() for o in [origin, origin2]] assert swh_storage.origin_get(origin_dict) is None @@ -1281,18 +1278,24 @@ return visits def test_origin_visit_get_all(self, swh_storage, sample_data): - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) visits = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin.url, date=data.date_visit1, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ), OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ), OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ), ] ) @@ -1351,7 +1354,7 @@ assert [] == list(swh_storage.origin_visit_get("foo")) def test_origin_visit_get_random(self, swh_storage, sample_data): - origins = sample_data["origin"][:2] + origins = sample_data.origins[:2] swh_storage.origin_add(origins) # Add some random visits within the selection range @@ -1388,7 +1391,7 @@ assert random_origin_visit["origin"] in [o.url for o in origins] def test_origin_visit_get_random_nothing_found(self, swh_storage, sample_data): - origins = sample_data["origin"] + origins = sample_data.origins swh_storage.origin_add(origins) visit_type = "hg" # Add some visits outside of the random generation selection so nothing @@ -1415,7 +1418,7 @@ assert random_origin_visit is None def test_origin_get_by_sha1(self, swh_storage, sample_data): - origin = sample_data["origin"][0] + origin = sample_data.origin assert swh_storage.origin_get(origin.to_dict()) is None swh_storage.origin_add([origin]) @@ -1424,14 +1427,14 @@ assert origins[0]["url"] == origin.url def test_origin_get_by_sha1_not_found(self, swh_storage, sample_data): - origin = sample_data["origin"][0] + origin = sample_data.origin assert swh_storage.origin_get(origin.to_dict()) is None origins = list(swh_storage.origin_get_by_sha1([sha1(origin.url)])) assert len(origins) == 1 assert origins[0] is None def test_origin_search_single_result(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] found_origins = list(swh_storage.origin_search(origin.url)) assert len(found_origins) == 0 @@ -1465,7 +1468,7 @@ assert found_origins[0] == origin2_data def test_origin_search_no_regexp(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] origin_dicts = [o.to_dict() for o in [origin, origin2]] swh_storage.origin_add([origin, origin2]) @@ -1488,7 +1491,7 @@ assert found_origins0 != found_origins1 def test_origin_search_regexp_substring(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] origin_dicts = [o.to_dict() for o in [origin, origin2]] swh_storage.origin_add([origin, origin2]) @@ -1515,7 +1518,7 @@ assert found_origins0 != found_origins1 def test_origin_search_regexp_fullstring(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] + origin, origin2 = sample_data.origins[:2] origin_dicts = [o.to_dict() for o in [origin, origin2]] swh_storage.origin_add([origin, origin2]) @@ -1542,7 +1545,7 @@ assert found_origins0 != found_origins1 def test_origin_visit_add(self, swh_storage, sample_data): - origin1 = sample_data["origin"][1] + origin1 = sample_data.origins[1] swh_storage.origin_add([origin1]) date_visit = now() @@ -1552,10 +1555,10 @@ date_visit2 = round_to_milliseconds(date_visit2) visit1 = OriginVisit( - origin=origin1.url, date=date_visit, type=data.type_visit1, + origin=origin1.url, date=date_visit, type=sample_data.type_visit1, ) visit2 = OriginVisit( - origin=origin1.url, date=date_visit2, type=data.type_visit2, + origin=origin1.url, date=date_visit2, type=sample_data.type_visit2, ) # add once @@ -1601,11 +1604,9 @@ for obj in expected_objects: assert obj in actual_objects - def test_origin_visit_add_validation(self, swh_storage): + def test_origin_visit_add_validation(self, swh_storage, sample_data): """Unknown origin when adding visits should raise""" - visit = OriginVisit( - origin="something-unknown", date=now(), type=data.type_visit1, - ) + visit = attr.evolve(sample_data.origin_visit, origin="something-unknonw") with pytest.raises(StorageArgumentException, match="Unknown origin"): swh_storage.origin_visit_add([visit]) @@ -1632,18 +1633,22 @@ """Correct origin visit statuses should add a new visit status """ - snapshot = sample_data["snapshot"][0] - origin1 = sample_data["origin"][1] + snapshot = sample_data.snapshot + origin1 = sample_data.origins[1] origin2 = Origin(url="new-origin") swh_storage.origin_add([origin1, origin2]) ov1, ov2 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin1.url, date=data.date_visit1, type=data.type_visit1, + origin=origin1.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ), OriginVisit( - origin=origin2.url, date=data.date_visit2, type=data.type_visit2, + origin=origin2.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ), ] ) @@ -1651,14 +1656,14 @@ ovs1 = OriginVisitStatus( origin=origin1.url, visit=ov1.visit, - date=data.date_visit1, + date=sample_data.date_visit1, status="created", snapshot=None, ) ovs2 = OriginVisitStatus( origin=origin2.url, visit=ov2.visit, - date=data.date_visit2, + date=sample_data.date_visit2, status="created", snapshot=None, ) @@ -1718,13 +1723,15 @@ """Correct origin visit statuses should add a new visit status """ - snapshot = sample_data["snapshot"][0] - origin1 = sample_data["origin"][1] + snapshot = sample_data.snapshot + origin1 = sample_data.origins[1] swh_storage.origin_add([origin1]) ov1 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin1.url, date=data.date_visit1, type=data.type_visit1, + origin=origin1.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ), ] )[0] @@ -1732,7 +1739,7 @@ ovs1 = OriginVisitStatus( origin=origin1.url, visit=ov1.visit, - date=data.date_visit1, + date=sample_data.date_visit1, status="created", snapshot=None, ) @@ -1774,61 +1781,73 @@ assert obj in actual_objects def test_origin_visit_find_by_date(self, swh_storage, sample_data): - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) visit1 = OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit1, ) visit2 = OriginVisit( - origin=origin.url, date=data.date_visit3, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit3, + type=sample_data.type_visit2, ) visit3 = OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit3, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit3, ) ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3]) ovs1 = OriginVisitStatus( origin=origin.url, visit=ov1.visit, - date=data.date_visit2, + date=sample_data.date_visit2, status="ongoing", snapshot=None, ) ovs2 = OriginVisitStatus( origin=origin.url, visit=ov2.visit, - date=data.date_visit3, + date=sample_data.date_visit3, status="ongoing", snapshot=None, ) ovs3 = OriginVisitStatus( origin=origin.url, visit=ov3.visit, - date=data.date_visit2, + date=sample_data.date_visit2, status="ongoing", snapshot=None, ) swh_storage.origin_visit_status_add([ovs1, ovs2, ovs3]) # Simple case - visit = swh_storage.origin_visit_find_by_date(origin.url, data.date_visit3) + visit = swh_storage.origin_visit_find_by_date( + origin.url, sample_data.date_visit3 + ) assert visit["visit"] == ov2.visit # There are two visits at the same date, the latest must be returned - visit = swh_storage.origin_visit_find_by_date(origin.url, data.date_visit2) + visit = swh_storage.origin_visit_find_by_date( + origin.url, sample_data.date_visit2 + ) assert visit["visit"] == ov3.visit - def test_origin_visit_find_by_date__unknown_origin(self, swh_storage): - swh_storage.origin_visit_find_by_date("foo", data.date_visit2) + def test_origin_visit_find_by_date__unknown_origin(self, swh_storage, sample_data): + swh_storage.origin_visit_find_by_date("foo", sample_data.date_visit2) def test_origin_visit_get_by(self, swh_storage, sample_data): - snapshot = sample_data["snapshot"][0] - origins = sample_data["origin"][:2] + snapshot = sample_data.snapshot + origins = sample_data.origins[:2] swh_storage.origin_add(origins) origin_url, origin_url2 = [o.url for o in origins] visit = OriginVisit( - origin=origin_url, date=data.date_visit2, type=data.type_visit2, + origin=origin_url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ) origin_visit1 = swh_storage.origin_visit_add([visit])[0] @@ -1847,10 +1866,14 @@ # Add some other {origin, visit} entries visit2 = OriginVisit( - origin=origin_url, date=data.date_visit3, type=data.type_visit3, + origin=origin_url, + date=sample_data.date_visit3, + type=sample_data.type_visit3, ) visit3 = OriginVisit( - origin=origin_url2, date=data.date_visit3, type=data.type_visit3, + origin=origin_url2, + date=sample_data.date_visit3, + type=sample_data.type_visit3, ) swh_storage.origin_visit_add([visit2, visit3]) @@ -1878,8 +1901,8 @@ { "origin": origin_url, "visit": origin_visit1.visit, - "date": data.date_visit2, - "type": data.type_visit2, + "date": sample_data.date_visit2, + "type": sample_data.type_visit2, "metadata": visit1_metadata, "status": "full", "snapshot": snapshot.id, @@ -1898,7 +1921,7 @@ assert swh_storage.origin_visit_get_by("foo", 10) is None def test_origin_visit_get_by_no_result(self, swh_storage, sample_data): - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) actual_origin_visit = swh_storage.origin_visit_get_by(origin.url, 999) assert actual_origin_visit is None @@ -1911,7 +1934,7 @@ assert swh_storage.origin_visit_get_latest("unknown-origin") is None # unknown type - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) assert swh_storage.origin_visit_get_latest(origin.url, type="unknown") is None @@ -1919,41 +1942,47 @@ """Filtering origin visit get latest with filter type should be ok """ - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) visit1 = OriginVisit( - origin=origin.url, date=data.date_visit1, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ) visit2 = OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ) # Add a visit with the same date as the previous one visit3 = OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ) - assert data.type_visit1 != data.type_visit2 - assert data.date_visit1 < data.date_visit2 + assert sample_data.type_visit1 != sample_data.type_visit2 + assert sample_data.date_visit1 < sample_data.date_visit2 ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3]) origin_visit1 = swh_storage.origin_visit_get_by(origin.url, ov1.visit) origin_visit3 = swh_storage.origin_visit_get_by(origin.url, ov3.visit) - assert data.type_visit1 != data.type_visit2 + assert sample_data.type_visit1 != sample_data.type_visit2 # Check type filter is ok actual_ov1 = swh_storage.origin_visit_get_latest( - origin.url, type=data.type_visit1, + origin.url, type=sample_data.type_visit1, ) assert actual_ov1 == origin_visit1 actual_ov3 = swh_storage.origin_visit_get_latest( - origin.url, type=data.type_visit2, + origin.url, type=sample_data.type_visit2, ) assert actual_ov3 == origin_visit3 new_type = "npm" - assert new_type not in [data.type_visit1, data.type_visit2] + assert new_type not in [sample_data.type_visit1, sample_data.type_visit2] assert ( swh_storage.origin_visit_get_latest( @@ -1963,19 +1992,25 @@ ) def test_origin_visit_get_latest(self, swh_storage, sample_data): - empty_snapshot, complete_snapshot = sample_data["snapshot"][1:3] - origin = sample_data["origin"][0] + empty_snapshot, complete_snapshot = sample_data.snapshots[1:3] + origin = sample_data.origin swh_storage.origin_add([origin]) visit1 = OriginVisit( - origin=origin.url, date=data.date_visit1, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ) visit2 = OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ) # Add a visit with the same date as the previous one visit3 = OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ) ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3]) @@ -2110,8 +2145,8 @@ } == swh_storage.origin_visit_get_latest(origin.url, require_snapshot=True) def test_origin_visit_status_get_latest(self, swh_storage, sample_data): - snapshot = sample_data["snapshot"][2] - origin1 = sample_data["origin"][0] + snapshot = sample_data.snapshots[2] + origin1 = sample_data.origin swh_storage.origin_add([origin1]) # to have some reference visits @@ -2119,10 +2154,14 @@ ov1, ov2 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin1.url, date=data.date_visit1, type=data.type_visit1, + origin=origin1.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ), OriginVisit( - origin=origin1.url, date=data.date_visit2, type=data.type_visit2, + origin=origin1.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ), ] ) @@ -2130,27 +2169,28 @@ date_now = now() date_now = round_to_milliseconds(date_now) - assert data.date_visit1 < data.date_visit2 - assert data.date_visit2 < date_now + assert sample_data.date_visit1 < sample_data.date_visit2 + assert sample_data.date_visit2 < date_now ovs1 = OriginVisitStatus( origin=origin1.url, visit=ov1.visit, - date=data.date_visit1, + date=sample_data.date_visit1, status="partial", snapshot=None, ) ovs2 = OriginVisitStatus( origin=origin1.url, visit=ov1.visit, - date=data.date_visit2, + date=sample_data.date_visit2, status="ongoing", snapshot=None, ) ovs3 = OriginVisitStatus( origin=origin1.url, visit=ov2.visit, - date=data.date_visit2 + datetime.timedelta(minutes=1), # to not be ignored + date=sample_data.date_visit2 + + datetime.timedelta(minutes=1), # to not be ignored status="ongoing", snapshot=None, ) @@ -2217,7 +2257,7 @@ assert actual_origin_visit3 == ovs3 def test_person_fullname_unicity(self, swh_storage, sample_data): - revision, rev2 = sample_data["revision"][0:2] + revision, rev2 = sample_data.revisions[0:2] # create a revision with same committer fullname but wo name and email revision2 = attr.evolve( rev2, @@ -2235,15 +2275,17 @@ assert revisions[0]["committer"] == revisions[1]["committer"] def test_snapshot_add_get_empty(self, swh_storage, sample_data): - empty_snapshot = sample_data["snapshot"][1] + empty_snapshot = sample_data.snapshots[1] empty_snapshot_dict = empty_snapshot.to_dict() - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) ov1 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin.url, date=data.date_visit1, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ) ] )[0] @@ -2274,7 +2316,7 @@ ovs1 = OriginVisitStatus.from_dict( { "origin": origin.url, - "date": data.date_visit1, + "date": sample_data.date_visit1, "visit": ov1.visit, "status": "created", "snapshot": None, @@ -2304,13 +2346,15 @@ assert obj in actual_objects def test_snapshot_add_get_complete(self, swh_storage, sample_data): - complete_snapshot = sample_data["snapshot"][2] + complete_snapshot = sample_data.snapshots[2] complete_snapshot_dict = complete_snapshot.to_dict() - origin = sample_data["origin"][0] + origin = sample_data.origin swh_storage.origin_add([origin]) visit = OriginVisit( - origin=origin.url, date=data.date_visit1, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ) origin_visit1 = swh_storage.origin_visit_add([visit])[0] visit_id = origin_visit1.visit @@ -2336,7 +2380,7 @@ assert by_ov == {**complete_snapshot_dict, "next_branch": None} def test_snapshot_add_many(self, swh_storage, sample_data): - snapshot, _, complete_snapshot = sample_data["snapshot"][:3] + snapshot, _, complete_snapshot = sample_data.snapshots[:3] actual_result = swh_storage.snapshot_add([snapshot, complete_snapshot]) assert actual_result == {"snapshot:add": 2} @@ -2355,7 +2399,7 @@ assert swh_storage.stat_counters()["snapshot"] == 2 def test_snapshot_add_many_from_generator(self, swh_storage, sample_data): - snapshot, _, complete_snapshot = sample_data["snapshot"][:3] + snapshot, _, complete_snapshot = sample_data.snapshots[:3] def _snp_gen(): yield from [snapshot, complete_snapshot] @@ -2367,7 +2411,7 @@ assert swh_storage.stat_counters()["snapshot"] == 2 def test_snapshot_add_many_incremental(self, swh_storage, sample_data): - snapshot, _, complete_snapshot = sample_data["snapshot"][:3] + snapshot, _, complete_snapshot = sample_data.snapshots[:3] actual_result = swh_storage.snapshot_add([complete_snapshot]) assert actual_result == {"snapshot:add": 1} @@ -2386,7 +2430,7 @@ } def test_snapshot_add_twice(self, swh_storage, sample_data): - snapshot, empty_snapshot = sample_data["snapshot"][:2] + snapshot, empty_snapshot = sample_data.snapshots[:2] actual_result = swh_storage.snapshot_add([empty_snapshot]) assert actual_result == {"snapshot:add": 1} @@ -2404,7 +2448,7 @@ ] def test_snapshot_add_count_branches(self, swh_storage, sample_data): - complete_snapshot = sample_data["snapshot"][2] + complete_snapshot = sample_data.snapshots[2] actual_result = swh_storage.snapshot_add([complete_snapshot]) assert actual_result == {"snapshot:add": 1} @@ -2423,7 +2467,7 @@ assert snp_size == expected_snp_size def test_snapshot_add_get_paginated(self, swh_storage, sample_data): - complete_snapshot = sample_data["snapshot"][2] + complete_snapshot = sample_data.snapshots[2] swh_storage.snapshot_add([complete_snapshot]) @@ -2471,12 +2515,14 @@ assert snapshot == expected_snapshot def test_snapshot_add_get_filtered(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - complete_snapshot = sample_data["snapshot"][2] + origin = sample_data.origin + complete_snapshot = sample_data.snapshots[2] swh_storage.origin_add([origin]) visit = OriginVisit( - origin=origin.url, date=data.date_visit1, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ) origin_visit1 = swh_storage.origin_visit_add([visit])[0] @@ -2527,7 +2573,7 @@ assert snapshot == expected_snapshot def test_snapshot_add_get_filtered_and_paginated(self, swh_storage, sample_data): - complete_snapshot = sample_data["snapshot"][2] + complete_snapshot = sample_data.snapshots[2] swh_storage.snapshot_add([complete_snapshot]) @@ -2596,7 +2642,7 @@ assert snapshot == expected_snapshot def test_snapshot_add_get_branch_by_type(self, swh_storage, sample_data): - complete_snapshot = sample_data["snapshot"][2] + complete_snapshot = sample_data.snapshots[2] snapshot = complete_snapshot.to_dict() alias1 = b"alias1" @@ -2628,12 +2674,14 @@ assert alias1 in branches def test_snapshot_add_get(self, swh_storage, sample_data): - snapshot = sample_data["snapshot"][0] - origin = sample_data["origin"][0] + snapshot = sample_data.snapshot + origin = sample_data.origin swh_storage.origin_add([origin]) visit = OriginVisit( - origin=origin.url, date=data.date_visit1, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ) origin_visit1 = swh_storage.origin_visit_add([visit])[0] visit_id = origin_visit1.visit @@ -2663,14 +2711,16 @@ assert origin_visit_info["snapshot"] == snapshot.id def test_snapshot_add_twice__by_origin_visit(self, swh_storage, sample_data): - snapshot = sample_data["snapshot"][0] - origin = sample_data["origin"][0] + snapshot = sample_data.snapshot + origin = sample_data.origin swh_storage.origin_add([origin]) ov1 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin.url, date=data.date_visit1, type=data.type_visit1, + origin=origin.url, + date=sample_data.date_visit1, + type=sample_data.type_visit1, ) ] )[0] @@ -2697,7 +2747,9 @@ ov2 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ) ] )[0] @@ -2721,7 +2773,7 @@ ovs1 = OriginVisitStatus.from_dict( { "origin": origin.url, - "date": data.date_visit1, + "date": sample_data.date_visit1, "visit": ov1.visit, "status": "created", "metadata": None, @@ -2741,7 +2793,7 @@ ovs3 = OriginVisitStatus.from_dict( { "origin": origin.url, - "date": data.date_visit2, + "date": sample_data.date_visit2, "visit": ov2.visit, "status": "created", "metadata": None, @@ -2774,7 +2826,7 @@ assert obj in actual_objects def test_snapshot_get_random(self, swh_storage, sample_data): - snapshot, empty_snapshot, complete_snapshot = sample_data["snapshot"][:3] + snapshot, empty_snapshot, complete_snapshot = sample_data.snapshots[:3] swh_storage.snapshot_add([snapshot, empty_snapshot, complete_snapshot]) assert swh_storage.snapshot_get_random() in { @@ -2784,7 +2836,7 @@ } def test_snapshot_missing(self, swh_storage, sample_data): - snapshot, missing_snapshot = sample_data["snapshot"][:2] + snapshot, missing_snapshot = sample_data.snapshots[:2] snapshots = [snapshot.id, missing_snapshot.id] swh_storage.snapshot_add([snapshot]) @@ -2793,12 +2845,12 @@ assert list(missing_snapshots) == [missing_snapshot.id] def test_stat_counters(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - snapshot = sample_data["snapshot"][0] - revision = sample_data["revision"][0] - release = sample_data["release"][0] - directory = sample_data["directory"][0] - content = sample_data["content"][0] + origin = sample_data.origin + snapshot = sample_data.snapshot + revision = sample_data.revision + release = sample_data.release + directory = sample_data.directory + content = sample_data.content expected_keys = ["content", "directory", "origin", "revision"] @@ -2827,7 +2879,9 @@ swh_storage.origin_add([origin]) visit = OriginVisit( - origin=origin.url, date=data.date_visit2, type=data.type_visit2, + origin=origin.url, + date=sample_data.date_visit2, + type=sample_data.type_visit2, ) origin_visit1 = swh_storage.origin_visit_add([visit])[0] @@ -2861,7 +2915,7 @@ assert counters["person"] == 3 def test_content_find_ctime(self, swh_storage, sample_data): - origin_content = sample_data["content"][0] + origin_content = sample_data.content ctime = round_to_milliseconds(now()) content = attr.evolve(origin_content, data=None, ctime=ctime) swh_storage.content_add_metadata([content]) @@ -2870,7 +2924,7 @@ assert actually_present[0] == content.to_dict() def test_content_find_with_present_content(self, swh_storage, sample_data): - content = sample_data["content"][0] + content = sample_data.content expected_content = content.to_dict() del expected_content["data"] del expected_content["ctime"] @@ -2902,7 +2956,7 @@ assert actually_present[0] == expected_content def test_content_find_with_non_present_content(self, swh_storage, sample_data): - missing_content = sample_data["skipped_content"][0] + missing_content = sample_data.skipped_content # 1. with something that does not exist actually_present = swh_storage.content_find({"sha1": missing_content.sha1}) @@ -2919,7 +2973,7 @@ assert actually_present == [] def test_content_find_with_duplicate_input(self, swh_storage, sample_data): - content = sample_data["content"][0] + content = sample_data.content # Create fake data with colliding sha256 and blake2s256 sha1_array = bytearray(content.sha1) @@ -2959,7 +3013,7 @@ assert result in actual_result def test_content_find_with_duplicate_sha256(self, swh_storage, sample_data): - content = sample_data["content"][0] + content = sample_data.content hashes = {} # Create fake data with colliding sha256 @@ -3015,7 +3069,7 @@ assert actual_result == [expected_duplicated_content] def test_content_find_with_duplicate_blake2s256(self, swh_storage, sample_data): - content = sample_data["content"][0] + content = sample_data.content # Create fake data with colliding sha256 and blake2s256 sha1_array = bytearray(content.sha1) @@ -3077,10 +3131,10 @@ swh_storage.content_find({"unknown-sha1": "something"}) # not the right key def test_object_find_by_sha1_git(self, swh_storage, sample_data): - content = sample_data["content"][0] - directory = sample_data["directory"][0] - revision = sample_data["revision"][0] - release = sample_data["release"][0] + content = sample_data.content + directory = sample_data.directory + revision = sample_data.revision + release = sample_data.release sha1_gits = [b"00000000000000000000"] expected = { @@ -3111,7 +3165,7 @@ assert expected == ret def test_metadata_fetcher_add_get(self, swh_storage, sample_data): - fetcher = sample_data["fetcher"][0] + fetcher = sample_data.metadata_fetcher actual_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version) assert actual_fetcher is None # does not exist @@ -3121,7 +3175,7 @@ assert res == fetcher def test_metadata_authority_add_get(self, swh_storage, sample_data): - authority = sample_data["authority"][0] + authority = sample_data.metadata_authority actual_authority = swh_storage.metadata_authority_get( authority.type, authority.url @@ -3134,10 +3188,10 @@ assert res == authority def test_content_metadata_add(self, swh_storage, sample_data): - content = sample_data["content"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - content_metadata = sample_data["content_metadata"][:2] + content = sample_data.content + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + content_metadata = sample_data.content_metadata[:2] content_swhid = SWHID( object_type="content", object_id=hash_to_bytes(content.sha1_git) @@ -3158,10 +3212,10 @@ def test_content_metadata_add_duplicate(self, swh_storage, sample_data): """Duplicates should be silently updated.""" - content = sample_data["content"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - content_metadata, content_metadata2 = sample_data["content_metadata"][:2] + content = sample_data.content + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + content_metadata, content_metadata2 = sample_data.content_metadata[:2] content_swhid = SWHID( object_type="content", object_id=hash_to_bytes(content.sha1_git) ) @@ -3190,12 +3244,14 @@ ) def test_content_metadata_get(self, swh_storage, sample_data): - content, content2 = sample_data["content"][:2] - fetcher, fetcher2 = sample_data["fetcher"][:2] - authority, authority2 = sample_data["authority"][:2] - content1_metadata1, content1_metadata2, content1_metadata3 = sample_data[ - "content_metadata" - ][:3] + content, content2 = sample_data.contents[:2] + fetcher, fetcher2 = sample_data.fetchers[:2] + authority, authority2 = sample_data.authorities[:2] + ( + content1_metadata1, + content1_metadata2, + content1_metadata3, + ) = sample_data.content_metadata[:3] content1_swhid = SWHID(object_type="content", object_id=content.sha1_git) content2_swhid = SWHID(object_type="content", object_id=content2.sha1_git) @@ -3236,10 +3292,10 @@ assert [content2_metadata] == list(result["results"],) def test_content_metadata_get_after(self, swh_storage, sample_data): - content = sample_data["content"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - content_metadata, content_metadata2 = sample_data["content_metadata"][:2] + content = sample_data.content + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + content_metadata, content_metadata2 = sample_data.content_metadata[:2] content_swhid = SWHID(object_type="content", object_id=content.sha1_git) @@ -3278,10 +3334,10 @@ assert result["results"] == [] def test_content_metadata_get_paginate(self, swh_storage, sample_data): - content = sample_data["content"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - content_metadata, content_metadata2 = sample_data["content_metadata"][:2] + content = sample_data.content + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + content_metadata, content_metadata2 = sample_data.content_metadata[:2] content_swhid = SWHID(object_type="content", object_id=content.sha1_git) @@ -3309,10 +3365,10 @@ assert result["results"] == [content_metadata2] def test_content_metadata_get_paginate_same_date(self, swh_storage, sample_data): - content = sample_data["content"][0] - fetcher1, fetcher2 = sample_data["fetcher"][:2] - authority = sample_data["authority"][0] - content_metadata, content_metadata2 = sample_data["content_metadata"][:2] + content = sample_data.content + fetcher1, fetcher2 = sample_data.fetchers[:2] + authority = sample_data.metadata_authority + content_metadata, content_metadata2 = sample_data.content_metadata[:2] content_swhid = SWHID(object_type="content", object_id=content.sha1_git) @@ -3344,10 +3400,10 @@ assert result["results"] == [new_content_metadata2] def test_content_metadata_get__invalid_id(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - content_metadata, content_metadata2 = sample_data["content_metadata"][:2] + origin = sample_data.origin + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + content_metadata, content_metadata2 = sample_data.content_metadata[:2] swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) @@ -3359,10 +3415,10 @@ ) def test_origin_metadata_add(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - origin_metadata, origin_metadata2 = sample_data["origin_metadata"][:2] + origin = sample_data.origin + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] assert swh_storage.origin_add([origin]) == {"origin:add": 1} @@ -3382,10 +3438,10 @@ def test_origin_metadata_add_duplicate(self, swh_storage, sample_data): """Duplicates should be silently updated.""" - origin = sample_data["origin"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - origin_metadata, origin_metadata2 = sample_data["origin_metadata"][:2] + origin = sample_data.origin + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] assert swh_storage.origin_add([origin]) == {"origin:add": 1} new_origin_metadata2 = attr.evolve( @@ -3413,12 +3469,14 @@ ) def test_origin_metadata_get(self, swh_storage, sample_data): - origin, origin2 = sample_data["origin"][:2] - fetcher, fetcher2 = sample_data["fetcher"][:2] - authority, authority2 = sample_data["authority"][:2] - origin1_metadata1, origin1_metadata2, origin1_metadata3 = sample_data[ - "origin_metadata" - ][:3] + origin, origin2 = sample_data.origins[:2] + fetcher, fetcher2 = sample_data.fetchers[:2] + authority, authority2 = sample_data.authorities[:2] + ( + origin1_metadata1, + origin1_metadata2, + origin1_metadata3, + ) = sample_data.origin_metadata[:3] assert swh_storage.origin_add([origin, origin2]) == {"origin:add": 2} @@ -3454,10 +3512,10 @@ assert [origin2_metadata] == list(result["results"],) def test_origin_metadata_get_after(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - origin_metadata, origin_metadata2 = sample_data["origin_metadata"][:2] + origin = sample_data.origin + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] assert swh_storage.origin_add([origin]) == {"origin:add": 1} @@ -3496,10 +3554,10 @@ assert result["results"] == [] def test_origin_metadata_get_paginate(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - fetcher = sample_data["fetcher"][0] - authority = sample_data["authority"][0] - origin_metadata, origin_metadata2 = sample_data["origin_metadata"][:2] + origin = sample_data.origin + fetcher = sample_data.metadata_fetcher + authority = sample_data.metadata_authority + origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] assert swh_storage.origin_add([origin]) == {"origin:add": 1} swh_storage.metadata_fetcher_add([fetcher]) @@ -3528,10 +3586,10 @@ assert result["results"] == [origin_metadata2] def test_origin_metadata_get_paginate_same_date(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - fetcher1, fetcher2 = sample_data["fetcher"][:2] - authority = sample_data["authority"][0] - origin_metadata, origin_metadata2 = sample_data["origin_metadata"][:2] + origin = sample_data.origin + fetcher1, fetcher2 = sample_data.fetchers[:2] + authority = sample_data.metadata_authority + origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] assert swh_storage.origin_add([origin]) == {"origin:add": 1} swh_storage.metadata_fetcher_add([fetcher1, fetcher2]) @@ -3562,9 +3620,9 @@ assert result["results"] == [new_origin_metadata2] def test_origin_metadata_add_missing_authority(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - fetcher = sample_data["fetcher"][0] - origin_metadata, origin_metadata2 = sample_data["origin_metadata"][:2] + origin = sample_data.origin + fetcher = sample_data.metadata_fetcher + origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] assert swh_storage.origin_add([origin]) == {"origin:add": 1} swh_storage.metadata_fetcher_add([fetcher]) @@ -3573,9 +3631,9 @@ swh_storage.object_metadata_add([origin_metadata, origin_metadata2]) def test_origin_metadata_add_missing_fetcher(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - authority = sample_data["authority"][0] - origin_metadata, origin_metadata2 = sample_data["origin_metadata"][:2] + origin = sample_data.origin + authority = sample_data.metadata_authority + origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] assert swh_storage.origin_add([origin]) == {"origin:add": 1} swh_storage.metadata_authority_add([authority]) @@ -3584,11 +3642,11 @@ swh_storage.object_metadata_add([origin_metadata, origin_metadata2]) def test_origin_metadata_get__invalid_id_type(self, swh_storage, sample_data): - origin = sample_data["origin"][0] - authority = sample_data["authority"][0] - fetcher = sample_data["fetcher"][0] - origin_metadata, origin_metadata2 = sample_data["origin_metadata"][:2] - content_metadata = sample_data["content_metadata"][0] + origin = sample_data.origin + authority = sample_data.metadata_authority + fetcher = sample_data.metadata_fetcher + origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] + content_metadata = sample_data.content_metadata[0] assert swh_storage.origin_add([origin]) == {"origin:add": 1} swh_storage.metadata_fetcher_add([fetcher]) @@ -3788,7 +3846,7 @@ assert sorted(returned_origins) == sorted(expected_origins) def test_origin_count(self, swh_storage, sample_data): - swh_storage.origin_add(sample_data["origin"]) + swh_storage.origin_add(sample_data.origins) assert swh_storage.origin_count("github") == 3 assert swh_storage.origin_count("gitlab") == 2 @@ -3798,7 +3856,7 @@ assert swh_storage.origin_count(".*user1.*", regexp=False) == 0 def test_origin_count_with_visit_no_visits(self, swh_storage, sample_data): - swh_storage.origin_add(sample_data["origin"]) + swh_storage.origin_add(sample_data.origins) # none of them have visits, so with_visit=True => 0 assert swh_storage.origin_count("github", with_visit=True) == 0 @@ -3811,7 +3869,7 @@ def test_origin_count_with_visit_with_visits_no_snapshot( self, swh_storage, sample_data ): - swh_storage.origin_add(sample_data["origin"]) + swh_storage.origin_add(sample_data.origins) origin_url = "https://github.com/user1/repo1" visit = OriginVisit(origin=origin_url, date=now(), type="git",) @@ -3837,8 +3895,8 @@ def test_origin_count_with_visit_with_visits_and_snapshot( self, swh_storage, sample_data ): - snapshot = sample_data["snapshot"][0] - swh_storage.origin_add(sample_data["origin"]) + snapshot = sample_data.snapshot + swh_storage.origin_add(sample_data.origins) swh_storage.snapshot_add([snapshot]) origin_url = "https://github.com/user1/repo1" @@ -3892,7 +3950,7 @@ # This test is only relevant on the local storage, with an actual # objstorage raising an exception def test_content_add_objstorage_exception(self, swh_storage, sample_data): - content = sample_data["content"][0] + content = sample_data.content swh_storage.objstorage.content_add = Mock( side_effect=Exception("mocked broken objstorage") @@ -3909,7 +3967,7 @@ class TestStorageRaceConditions: @pytest.mark.xfail def test_content_add_race(self, swh_storage, sample_data): - content = sample_data["content"][0] + content = sample_data.content results = queue.Queue() @@ -3951,7 +4009,7 @@ """ def test_content_update_with_new_cols(self, swh_storage, sample_data): - content, content2 = sample_data["content"][:2] + content, content2 = sample_data.contents[:2] swh_storage.journal_writer.journal = None # TODO, not supported @@ -3996,7 +4054,7 @@ ) def test_content_add_db(self, swh_storage, sample_data): - content = sample_data["content"][0] + content = sample_data.content actual_result = swh_storage.content_add([content]) @@ -4033,7 +4091,7 @@ assert contents[0] == attr.evolve(content, data=None) def test_content_add_metadata_db(self, swh_storage, sample_data): - content = attr.evolve(sample_data["content"][0], data=None, ctime=now()) + content = attr.evolve(sample_data.content, data=None, ctime=now()) actual_result = swh_storage.content_add_metadata([content]) @@ -4067,7 +4125,7 @@ assert contents[0] == content def test_skipped_content_add_db(self, swh_storage, sample_data): - content, cont2 = sample_data["skipped_content"][:2] + content, cont2 = sample_data.skipped_contents[:2] content2 = attr.evolve(cont2, blake2s256=None) actual_result = swh_storage.skipped_content_add([content, content, content2]) diff --git a/swh/storage/tests/test_storage_data.py b/swh/storage/tests/test_storage_data.py new file mode 100644 --- /dev/null +++ b/swh/storage/tests/test_storage_data.py @@ -0,0 +1,29 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.model.model import BaseModel + +from swh.storage.tests.storage_data import StorageData + + +def test_storage_data(): + data = StorageData() + + for attribute_key in [ + "contents", + "skipped_contents", + "directories", + "revisions", + "releases", + "snapshots", + "origins", + "origin_visits", + "fetchers", + "authorities", + "origin_metadata", + "content_metadata", + ]: + for obj in getattr(data, attribute_key): + assert isinstance(obj, BaseModel)