diff --git a/swh/storage/pytest_plugin.py b/swh/storage/pytest_plugin.py --- a/swh/storage/pytest_plugin.py +++ b/swh/storage/pytest_plugin.py @@ -6,7 +6,7 @@ import glob from os import path, environ -from typing import Dict, Iterable, Union +from typing import Dict, Tuple, Union import pytest @@ -16,20 +16,7 @@ from pytest_postgresql.janitor import DatabaseJanitor, psycopg2, Version from swh.core.utils import numfile_sortkey as sortkey -from swh.model.model import ( - BaseModel, - Content, - Directory, - MetadataAuthority, - MetadataFetcher, - Origin, - OriginVisit, - RawExtrinsicMetadata, - Release, - Revision, - SkippedContent, - Snapshot, -) +from swh.model.model import BaseModel from swh.storage import get_storage from swh.storage.tests.storage_data import data @@ -201,75 +188,25 @@ @pytest.fixture -def sample_data() -> Dict: +def sample_data_model() -> Dict[str, Tuple[BaseModel, ...]]: """Pre-defined sample storage object data to manipulate Returns: - Dict of data (keys: content, directory, revision, release, person, + Dict of data model objects (keys: content, directory, revision, release, person, origin) """ return { - "content": [data.content, data.content2, data.content3], - "skipped_content": [data.skipped_content, data.skipped_content2], - "directory": [ - data.directory2, - data.directory, - data.directory3, - data.directory4, - data.directory5, - ], - "revision": [data.revision, data.revision2, data.revision3, data.revision4], + "content": data.contents, + "skipped_content": data.skipped_contents, + "directory": data.directories, + "revision": data.revisions, "release": data.releases, "snapshot": data.snapshots, "origin": data.origins, "origin_visit": data.origin_visits, - "fetcher": [data.metadata_fetcher, data.metadata_fetcher2], - "authority": [data.metadata_authority, data.metadata_authority2], - "origin_metadata": [ - data.origin_metadata, - data.origin_metadata2, - data.origin_metadata3, - ], - "content_metadata": [ - data.content_metadata, - data.content_metadata2, - data.content_metadata3, - ], - } - - -# FIXME: Add the metadata keys when we can (right now, we cannot as the data model -# changed but not the endpoints yet) -OBJECT_FACTORY = { - "content": Content.from_dict, - "skipped_content": SkippedContent.from_dict, - "directory": Directory.from_dict, - "revision": Revision.from_dict, - "release": Release.from_dict, - "snapshot": Snapshot.from_dict, - "origin": Origin.from_dict, - "origin_visit": OriginVisit.from_dict, - "fetcher": MetadataFetcher.from_dict, - "authority": MetadataAuthority.from_dict, - "origin_metadata": RawExtrinsicMetadata.from_dict, - "content_metadata": RawExtrinsicMetadata.from_dict, -} - - -@pytest.fixture -def sample_data_model(sample_data) -> Dict[str, Iterable[BaseModel]]: - """Pre-defined sample storage object model to manipulate - - Returns: - Dict of data (keys: content, directory, revision, release, person, origin, ...) - values list of object data model with the corresponding types - - """ - return { - object_type: [ - convert_fn(obj) if isinstance(obj, dict) else obj - for obj in sample_data[object_type] - ] - for object_type, convert_fn in OBJECT_FACTORY.items() + "fetcher": data.fetchers, + "authority": data.authorities, + "origin_metadata": data.origin_metadata, + "content_metadata": data.content_metadata, } diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py --- a/swh/storage/tests/storage_data.py +++ b/swh/storage/tests/storage_data.py @@ -92,20 +92,8 @@ ctime=datetime.datetime(2019, 12, 1, tzinfo=datetime.timezone.utc), ) +contents = (content, content2, content3) -missing_content = Content( - data=b"something missing", - length=8, - sha1=hash_to_bytes("f9c24e2abb82063a3ba2c44efd2d3c797f28ac90"), - sha1_git=hash_to_bytes("33e45d56f88993aae6a0198013efa80716fd8919"), - sha256=hash_to_bytes( - "6bbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a" - ), - blake2s256=hash_to_bytes( - "306856b8fd879edb7b6f1aeaaf8db9bbecc993cd7f776c333ac3a782fa5c6eba" - ), - status="visible", -) skipped_content = SkippedContent( length=1024 * 1024 * 200, @@ -136,6 +124,7 @@ status="absent", ) +skipped_contents = (skipped_content, skipped_content2) directory5 = Directory(entries=()) @@ -213,6 +202,8 @@ ), ) +directories = (directory2, directory, directory3, directory4, directory5) + minus_offset = datetime.timezone(datetime.timedelta(minutes=-120)) plus_offset = datetime.timezone(datetime.timedelta(minutes=120)) @@ -339,14 +330,16 @@ synthetic=False, ) -origins = [ +revisions = (revision, revision2, revision3, revision4) + +origins = ( Origin(url="https://github.com/user1/repo1"), Origin(url="https://github.com/user2/repo1"), Origin(url="https://github.com/user3/repo1"), Origin(url="https://gitlab.com/user1/repo1"), Origin(url="https://gitlab.com/user2/repo1"), Origin(url="https://forge.softwareheritage.org/source/repo1"), -] +) origin, origin2 = origins[:2] @@ -361,11 +354,15 @@ metadata={}, ) +authorities = (metadata_authority, metadata_authority2) + metadata_fetcher = MetadataFetcher( name="swh-deposit", version="0.0.1", metadata={"sword_version": "2"}, ) metadata_fetcher2 = MetadataFetcher(name="swh-example", version="0.0.1", metadata={},) +fetchers = (metadata_fetcher, metadata_fetcher2) + date_visit1 = datetime.datetime(2015, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) type_visit1 = "git" @@ -387,7 +384,7 @@ origin=origin2.url, visit=1, date=date_visit1, type=type_visit2, ) -origin_visits = [origin_visit, origin_visit2, origin_visit3] +origin_visits = (origin_visit, origin_visit2, origin_visit3) release = Release( id=hash_to_bytes("a673e617fcc6234e29b2cad06b8245f96c415c61"), @@ -440,7 +437,7 @@ synthetic=True, ) -releases = [release, release2, release3] +releases = (release, release2, release3) snapshot = Snapshot( id=hash_to_bytes("409ee1ff3f10d166714bc90581debfd0446dda57"), @@ -477,9 +474,9 @@ }, ) -snapshots = [snapshot, empty_snapshot, complete_snapshot] +snapshots = (snapshot, empty_snapshot, complete_snapshot) -content_metadata = RawExtrinsicMetadata( +content_metadata1 = RawExtrinsicMetadata( type=MetadataTargetType.CONTENT, id=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), origin=origin.url, @@ -522,7 +519,13 @@ path=b"/foo/bar", ) -origin_metadata = RawExtrinsicMetadata( +content_metadata = ( + content_metadata1, + content_metadata2, + content_metadata3, +) + +origin_metadata1 = RawExtrinsicMetadata( type=MetadataTargetType.ORIGIN, id=origin.url, discovery_date=datetime.datetime( @@ -555,3 +558,9 @@ format="yaml", metadata=b"foo: bar", ) + +origin_metadata = ( + origin_metadata1, + origin_metadata2, + origin_metadata3, +) diff --git a/swh/storage/tests/test_pytest_plugin.py b/swh/storage/tests/test_pytest_plugin.py --- a/swh/storage/tests/test_pytest_plugin.py +++ b/swh/storage/tests/test_pytest_plugin.py @@ -4,36 +4,11 @@ # See top-level LICENSE file for more information -from swh.storage.pytest_plugin import OBJECT_FACTORY - - from swh.model.model import BaseModel from swh.storage.interface import StorageInterface -def test_sample_data(sample_data, sample_data_model): - assert set(sample_data.keys()) == set( - [ - "content", - "skipped_content", - "directory", - "revision", - "release", - "snapshot", - "origin", - "origin_visit", - "fetcher", - "authority", - "origin_metadata", - "content_metadata", - ] - ) - for object_type, objs in sample_data.items(): - for obj in objs: - assert isinstance(obj, BaseModel) - - -def test_sample_data_model(sample_data, sample_data_model): +def test_sample_data(sample_data_model): assert set(sample_data_model.keys()) == set( [ "content", @@ -50,15 +25,10 @@ "content_metadata", ] ) - for object_type, objs in sample_data_model.items(): - assert object_type in OBJECT_FACTORY - for obj in objs: assert isinstance(obj, BaseModel) - assert len(objs) == len(sample_data[object_type]) - def test_swh_storage(swh_storage: StorageInterface): assert isinstance(swh_storage, StorageInterface) is not None diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py --- a/swh/storage/tests/test_retry.py +++ b/swh/storage/tests/test_retry.py @@ -35,7 +35,7 @@ @pytest.fixture -def fake_hash_collision(sample_data): +def fake_hash_collision(sample_data_model): return HashCollision("sha1", "38762cf7f55934b34d179ae6a4c80cadccbb7f0a", []) diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -469,7 +469,7 @@ ) def test_content_missing(self, swh_storage, algos): algos |= {"sha1"} - content, missing_content = [data.content2, data.missing_content] + content, missing_content = [data.content2, data.skipped_content] swh_storage.content_add([content]) test_contents = [content.to_dict()] @@ -499,7 +499,7 @@ ) def test_content_missing_unknown_algo(self, swh_storage, algos): algos |= {"sha1"} - content, missing_content = [data.content2, data.missing_content] + content, missing_content = [data.content2, data.skipped_content] swh_storage.content_add([content]) test_contents = [content.to_dict()] @@ -3161,9 +3161,8 @@ MetadataTargetType.CONTENT, content_swhid, authority ) assert result["next_page_token"] is None - assert ( - list(sorted(result["results"], key=lambda x: x.discovery_date,)) - == content_metadata + assert list(sorted(result["results"], key=lambda x: x.discovery_date,)) == list( + content_metadata ) def test_content_metadata_add_duplicate(self, swh_storage, sample_data_model):