diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,5 +1,5 @@ swh.core >= 0.3 -swh.model >= 0.7.2 +swh.model >= 1.0.0 swh.objstorage >= 0.2.2 swh.scheduler >= 0.4.0 swh.storage >= 0.13.1 diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py --- a/swh/loader/core/loader.py +++ b/swh/loader/core/loader.py @@ -359,9 +359,7 @@ status, extra={ "swh_task_args": [], - "swh_task_kwargs": { - "origin": self.origin.url - }, + "swh_task_kwargs": {"origin": self.origin.url}, }, ) visit_status = OriginVisitStatus( diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py --- a/swh/loader/package/deposit/tests/test_deposit.py +++ b/swh/loader/package/deposit/tests/test_deposit.py @@ -16,12 +16,17 @@ from swh.loader.package.tests.common import check_metadata_paths from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats from swh.model.hashutil import hash_to_bytes, hash_to_hex -from swh.model.identifiers import SWHID +from swh.model.identifiers import ( + CoreSWHID, + ExtendedObjectType, + ExtendedSWHID, + ObjectType, +) from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, + Origin, RawExtrinsicMetadata, Snapshot, SnapshotBranch, @@ -233,7 +238,7 @@ # Check origin metadata orig_meta = loader.storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, url, authority + Origin(url).swhid(), authority ) assert orig_meta.next_page_token is None raw_meta = loader.client.metadata_get(deposit_id) @@ -245,10 +250,11 @@ assert orig_meta0.fetcher == fetcher # Check directory metadata - directory_id = hash_to_hex(revision.directory) - directory_swhid = SWHID(object_type="directory", object_id=directory_id) + directory_swhid = CoreSWHID( + object_type=ObjectType.DIRECTORY, object_id=revision.directory + ) actual_dir_meta = loader.storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, directory_swhid, authority + directory_swhid, authority ) assert actual_dir_meta.next_page_token is None assert len(actual_dir_meta.results) == len(all_metadata_raw) @@ -366,20 +372,21 @@ # Check the origin metadata swh side origin_extrinsic_metadata = loader.storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, url, authority + Origin(url).swhid(), authority ) assert origin_extrinsic_metadata.next_page_token is None all_metadata_raw: List[str] = raw_meta["metadata_raw"] # 1 raw metadata xml + 1 json dict assert len(origin_extrinsic_metadata.results) == len(all_metadata_raw) + 1 + origin_swhid = Origin(url).swhid() + expected_metadata = [] for idx, raw_meta in enumerate(all_metadata_raw): origin_meta = origin_extrinsic_metadata.results[idx] expected_metadata.append( RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=url, + target=origin_swhid, discovery_date=origin_meta.discovery_date, metadata=raw_meta.encode(), format="sword-v2-atom-codemeta-v2", @@ -395,8 +402,7 @@ } expected_metadata.append( RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=url, + target=origin_swhid, discovery_date=origin_extrinsic_metadata.results[-1].discovery_date, metadata=json.dumps(origin_metadata).encode(), format="original-artifacts-json", @@ -410,18 +416,20 @@ assert orig_meta in expected_metadata # Check the revision metadata swh side - directory_id = hash_to_hex(revision.directory) - directory_swhid = SWHID(object_type="directory", object_id=directory_id) + directory_swhid = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=revision.directory + ) actual_directory_metadata = loader.storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, directory_swhid, authority + directory_swhid, authority ) assert actual_directory_metadata.next_page_token is None assert len(actual_directory_metadata.results) == len(all_metadata_raw) - revision_swhid = SWHID(object_type="revision", object_id=revision_id) + revision_swhid = CoreSWHID( + object_type=ObjectType.REVISION, object_id=hash_to_bytes(revision_id) + ) dir_metadata_template = RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=directory_swhid, format="sword-v2-atom-codemeta-v2", authority=authority, diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -34,12 +34,16 @@ from swh.model import from_disk from swh.model.collections import ImmutableDict from swh.model.hashutil import hash_to_hex -from swh.model.identifiers import SWHID +from swh.model.identifiers import ( + CoreSWHID, + ExtendedObjectType, + ExtendedSWHID, + ObjectType, +) from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, OriginVisit, OriginVisitStatus, @@ -596,15 +600,16 @@ revision = attr.evolve(revision, metadata=ImmutableDict(full_metadata)) original_artifact_metadata = RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, - target=SWHID(object_type="directory", object_id=revision.directory), + target=ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=revision.directory + ), discovery_date=self.visit_date, authority=SWH_METADATA_AUTHORITY, fetcher=self.get_metadata_fetcher(), format="original-artifacts-json", metadata=json.dumps(metadata).encode(), origin=self.url, - revision=SWHID(object_type="revision", object_id=revision.id), + revision=CoreSWHID(object_type=ObjectType.REVISION, object_id=revision.id), ) self._load_metadata_objects([original_artifact_metadata]) @@ -718,8 +723,7 @@ for item in metadata_items: metadata_objects.append( RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=self.url, + target=Origin(self.url).swhid(), discovery_date=item.discovery_date or self.visit_date, authority=authority, fetcher=fetcher, @@ -753,8 +757,9 @@ for item in metadata_items: metadata_objects.append( RawExtrinsicMetadata( - type=MetadataTargetType.SNAPSHOT, - target=SWHID(object_type="snapshot", object_id=snapshot_id), + target=ExtendedSWHID( + object_type=ExtendedObjectType.SNAPSHOT, object_id=snapshot_id + ), discovery_date=item.discovery_date or self.visit_date, authority=authority, fetcher=fetcher, @@ -782,16 +787,17 @@ for item in p_info.directory_extrinsic_metadata: metadata_objects.append( RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, - target=SWHID(object_type="directory", object_id=directory_id), + target=ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=directory_id + ), discovery_date=item.discovery_date or self.visit_date, authority=authority, fetcher=fetcher, format=item.format, metadata=item.metadata, origin=self.url, - revision=SWHID( - object_type="revision", object_id=hash_to_hex(revision_id) + revision=CoreSWHID( + object_type=ObjectType.REVISION, object_id=revision_id ), ) ) diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py --- a/swh/loader/package/nixguix/tests/test_nixguix.py +++ b/swh/loader/package/nixguix/tests/test_nixguix.py @@ -27,12 +27,11 @@ from swh.loader.tests import check_snapshot as check_snapshot_full from swh.loader.tests import get_stats from swh.model.hashutil import hash_to_bytes, hash_to_hex -from swh.model.identifiers import SWHID +from swh.model.identifiers import ExtendedObjectType, ExtendedSWHID from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, RawExtrinsicMetadata, Snapshot, SnapshotBranch, @@ -302,15 +301,14 @@ ) visit_status = origin_get_latest_visit_status(swh_storage, sources_url) - snapshot_swhid = SWHID( - object_type="snapshot", object_id=hash_to_hex(visit_status.snapshot) + snapshot_swhid = ExtendedSWHID( + object_type=ExtendedObjectType.SNAPSHOT, object_id=visit_status.snapshot ) metadata_authority = MetadataAuthority( type=MetadataAuthorityType.FORGE, url=sources_url, ) expected_metadata = [ RawExtrinsicMetadata( - type=MetadataTargetType.SNAPSHOT, target=snapshot_swhid, authority=metadata_authority, fetcher=MetadataFetcher( @@ -324,7 +322,7 @@ ) ] assert swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.SNAPSHOT, snapshot_swhid, metadata_authority, + snapshot_swhid, metadata_authority, ) == PagedResult(next_page_token=None, results=expected_metadata,) diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py --- a/swh/loader/package/npm/tests/test_npm.py +++ b/swh/loader/package/npm/tests/test_npm.py @@ -18,12 +18,16 @@ from swh.loader.package.tests.common import check_metadata_paths from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats from swh.model.hashutil import hash_to_bytes -from swh.model.identifiers import SWHID +from swh.model.identifiers import ( + CoreSWHID, + ExtendedObjectType, + ExtendedSWHID, + ObjectType, +) from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Person, RawExtrinsicMetadata, Snapshot, @@ -405,11 +409,14 @@ for (version_name, revision_id) in versions: revision = swh_storage.revision_get([hash_to_bytes(revision_id)])[0] directory_id = revision.directory - directory_swhid = SWHID(object_type="directory", object_id=directory_id,) - revision_swhid = SWHID(object_type="revision", object_id=revision_id,) + directory_swhid = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=directory_id, + ) + revision_swhid = CoreSWHID( + object_type=ObjectType.REVISION, object_id=hash_to_bytes(revision_id), + ) expected_metadata = [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=directory_swhid, authority=metadata_authority, fetcher=MetadataFetcher( @@ -425,7 +432,7 @@ ) ] assert swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, directory_swhid, metadata_authority, + directory_swhid, metadata_authority, ) == PagedResult(next_page_token=None, results=expected_metadata,) diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py --- a/swh/loader/package/pypi/tests/test_pypi.py +++ b/swh/loader/package/pypi/tests/test_pypi.py @@ -22,13 +22,17 @@ ) from swh.loader.package.tests.common import check_metadata_paths from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats -from swh.model.hashutil import hash_to_bytes, hash_to_hex -from swh.model.identifiers import SWHID +from swh.model.hashutil import hash_to_bytes +from swh.model.identifiers import ( + CoreSWHID, + ExtendedObjectType, + ExtendedSWHID, + ObjectType, +) from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Person, RawExtrinsicMetadata, Snapshot, @@ -349,18 +353,17 @@ paths=[("filename", str), ("length", int), ("checksums", dict),], ) - revision_swhid = SWHID( - object_type="revision", object_id=hash_to_hex(expected_revision_id) + revision_swhid = CoreSWHID( + object_type=ObjectType.REVISION, object_id=expected_revision_id ) - directory_swhid = SWHID( - object_type="directory", object_id=hash_to_hex(revision.directory) + directory_swhid = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=revision.directory ) metadata_authority = MetadataAuthority( type=MetadataAuthorityType.FORGE, url="https://pypi.org/", ) expected_metadata = [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=directory_swhid, authority=metadata_authority, fetcher=MetadataFetcher( @@ -376,7 +379,7 @@ ) ] assert swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, directory_swhid, metadata_authority, + directory_swhid, metadata_authority, ) == PagedResult(next_page_token=None, results=expected_metadata,) diff --git a/swh/loader/package/tests/test_loader_metadata.py b/swh/loader/package/tests/test_loader_metadata.py --- a/swh/loader/package/tests/test_loader_metadata.py +++ b/swh/loader/package/tests/test_loader_metadata.py @@ -15,12 +15,17 @@ RawExtrinsicMetadataCore, ) from swh.model.hashutil import hash_to_bytes -from swh.model.identifiers import SWHID +from swh.model.identifiers import ( + CoreSWHID, + ExtendedObjectType, + ExtendedSWHID, + ObjectType, +) from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, + Origin, Person, RawExtrinsicMetadata, Revision, @@ -35,11 +40,14 @@ type=MetadataAuthorityType.FORGE, url="http://example.org/", ) ORIGIN_URL = "http://example.org/archive.tgz" +ORIGIN_SWHID = Origin(ORIGIN_URL).swhid() REVISION_ID = hash_to_bytes("8ff44f081d43176474b267de5451f2c2e88089d0") -REVISION_SWHID = SWHID(object_type="revision", object_id=REVISION_ID) +REVISION_SWHID = CoreSWHID(object_type=ObjectType.REVISION, object_id=REVISION_ID) DIRECTORY_ID = hash_to_bytes("aa" * 20) -DIRECTORY_SWHID = SWHID(object_type="directory", object_id=DIRECTORY_ID) +DIRECTORY_SWHID = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=DIRECTORY_ID +) FETCHER = MetadataFetcher( @@ -51,7 +59,6 @@ DIRECTORY_METADATA = [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=DISCOVERY_DATE, authority=AUTHORITY, @@ -62,7 +69,6 @@ revision=REVISION_SWHID, ), RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=DISCOVERY_DATE + datetime.timedelta(seconds=1), authority=AUTHORITY, @@ -76,8 +82,7 @@ ORIGIN_METADATA = [ RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=ORIGIN_URL, + target=ORIGIN_SWHID, discovery_date=datetime.datetime.now(tz=datetime.timezone.utc), authority=AUTHORITY, fetcher=FETCHER, @@ -150,13 +155,10 @@ type=MetadataAuthorityType.REGISTRY, url="https://softwareheritage.org/", ) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority, - ) + result = swh_storage.raw_extrinsic_metadata_get(DIRECTORY_SWHID, authority,) assert result.next_page_token is None assert len(result.results) == 1 assert result.results[0] == RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=result.results[0].discovery_date, authority=authority, @@ -177,15 +179,11 @@ "snapshot_id": FULL_SNAPSHOT_ID, } - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, AUTHORITY, - ) + result = swh_storage.raw_extrinsic_metadata_get(DIRECTORY_SWHID, AUTHORITY,) assert result.next_page_token is None assert result.results == DIRECTORY_METADATA - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, ORIGIN_URL, AUTHORITY, - ) + result = swh_storage.raw_extrinsic_metadata_get(ORIGIN_SWHID, AUTHORITY,) assert result.next_page_token is None assert result.results == ORIGIN_METADATA @@ -201,9 +199,7 @@ "snapshot_id": FULL_SNAPSHOT_ID, } - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, AUTHORITY, - ) + result = swh_storage.raw_extrinsic_metadata_get(DIRECTORY_SWHID, AUTHORITY,) assert result.next_page_token is None assert result.results == DIRECTORY_METADATA @@ -219,9 +215,7 @@ "snapshot_id": FULL_SNAPSHOT_ID, } - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, AUTHORITY, - ) + result = swh_storage.raw_extrinsic_metadata_get(DIRECTORY_SWHID, AUTHORITY,) assert result.next_page_token is None assert result.results == DIRECTORY_METADATA diff --git a/swh/loader/tests/test_cli.py b/swh/loader/tests/test_cli.py --- a/swh/loader/tests/test_cli.py +++ b/swh/loader/tests/test_cli.py @@ -75,8 +75,9 @@ with pytest.raises(ValueError, match="Missing storage"): runner.invoke( - loader_cli, ["-C", conf_path, "run", "pypi", "url=https://some-url",], - catch_exceptions=False + loader_cli, + ["-C", conf_path, "run", "pypi", "url=https://some-url",], + catch_exceptions=False, )