diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py --- a/swh/loader/package/deposit/loader.py +++ b/swh/loader/package/deposit/loader.py @@ -61,6 +61,9 @@ # which computes itself the values. The loader needs to use those to create the # revision. + raw_metadata_from_origin = json.dumps( + metadata["origin_metadata"]["metadata"] + ).encode() metadata = metadata.copy() # FIXME: this removes information from 'raw' metadata depo = metadata.pop("deposit") @@ -77,6 +80,9 @@ committer=parse_author(depo["committer"]), revision_parents=tuple(hash_to_bytes(p) for p in depo["revision_parents"]), raw_info=metadata, + revision_extrinsic_metadata=[ + (None, "sword-v2-atom-codemeta-v2-in-json", raw_metadata_from_origin,), + ], ) @@ -107,6 +113,24 @@ # branch return ["HEAD"] + def get_metadata_authority(self) -> MetadataAuthority: + provider = self.metadata["origin_metadata"]["provider"] + assert provider["provider_type"] == "deposit_client" + return MetadataAuthority( + type=MetadataAuthorityType.DEPOSIT_CLIENT, + url=provider["provider_url"], + metadata={ + "name": provider["provider_name"], + **(provider["metadata"] or {}), + }, + ) + + def get_metadata_fetcher(self) -> MetadataFetcher: + tool = self.metadata["origin_metadata"]["tool"] + return MetadataFetcher( + name=tool["name"], version=tool["version"], metadata=tool["configuration"], + ) + def get_package_info( self, version: str ) -> Iterator[Tuple[str, DepositPackageInfo]]: @@ -165,24 +189,10 @@ origin_metadata = self.metadata["origin_metadata"] logger.debug("origin_metadata: %s", origin_metadata) - provider = origin_metadata["provider"] - assert provider["provider_type"] == "deposit_client" - authority = MetadataAuthority( - type=MetadataAuthorityType.DEPOSIT_CLIENT, - url=provider["provider_url"], - metadata={ - "name": provider["provider_name"], - **(provider["metadata"] or {}), - }, - ) + authority = self.get_metadata_authority() self.storage.metadata_authority_add([authority]) - tool = origin_metadata["tool"] - fetcher = MetadataFetcher( - name=tool["name"], - version=tool["version"], - metadata=tool["configuration"], - ) + fetcher = self.get_metadata_fetcher() self.storage.metadata_fetcher_add([fetcher]) self.storage.object_metadata_add( diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py --- a/swh/loader/package/deposit/tests/test_deposit.py +++ b/swh/loader/package/deposit/tests/test_deposit.py @@ -6,9 +6,11 @@ import json import re +import attr import pytest from swh.model.hashutil import hash_to_bytes, hash_to_hex +from swh.model.identifiers import SWHID from swh.model.model import ( Snapshot, SnapshotBranch, @@ -203,6 +205,7 @@ url="https://hal-test.archives-ouvertes.fr/", ) + # Check origin metadata orig_meta = loader.storage.object_metadata_get( MetadataTargetType.ORIGIN, url, authority ) @@ -211,10 +214,22 @@ assert len(orig_meta["results"]) == 1 assert orig_meta["next_page_token"] is None orig_meta0 = orig_meta["results"][0] - assert orig_meta0.authority == authority assert orig_meta0.fetcher == fetcher + # Check revision metadata + revision_swhid = SWHID(object_type="revision", object_id=revision_id) + rev_meta = loader.storage.object_metadata_get( + MetadataTargetType.REVISION, revision_swhid, authority + ) + assert rev_meta is not None + assert isinstance(rev_meta, dict) + assert len(rev_meta["results"]) == 1 + assert rev_meta["next_page_token"] is None + rev_meta0 = rev_meta["results"][0] + assert rev_meta0.authority == authority + assert rev_meta0.fetcher == fetcher + # Retrieve the information for deposit status update query to the deposit urls = [ m @@ -326,7 +341,7 @@ url="https://hal-test.archives-ouvertes.fr/", ) - # Check the metadata swh side + # Check the origin metadata swh side orig_meta = loader.storage.object_metadata_get( MetadataTargetType.ORIGIN, url, authority ) @@ -339,9 +354,9 @@ orig_meta0 = orig_meta["results"][0] - assert orig_meta0 == RawExtrinsicMetadata( + expected_metadata = RawExtrinsicMetadata( type=MetadataTargetType.ORIGIN, - id="https://hal-test.archives-ouvertes.fr/some-external-id", + id=url, discovery_date=orig_meta0.discovery_date, metadata=json.dumps( { @@ -358,6 +373,29 @@ fetcher=fetcher, ) + assert orig_meta0 == expected_metadata + + # Check the revision metadata swh side + revision_swhid = SWHID(object_type="revision", object_id=revision_id) + rev_meta = loader.storage.object_metadata_get( + MetadataTargetType.REVISION, revision_swhid, authority + ) + assert rev_meta is not None + assert isinstance(rev_meta, dict) + assert len(rev_meta["results"]) == 1 + assert rev_meta["next_page_token"] is None + + assert len(rev_meta["results"]) == 1 + + rev_meta0 = rev_meta["results"][0] + + assert rev_meta0 == attr.evolve( + expected_metadata, + type=MetadataTargetType.REVISION, + id=revision_swhid, + origin=url, + ) + # Retrieve the information for deposit status update query to the deposit urls = [ m diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -77,10 +77,12 @@ # See revision_extrinsic_metadata = attr.ib( - type=List[Tuple[datetime.datetime, str, bytes]], default=[], kw_only=True, + type=List[Tuple[Optional[datetime.datetime], str, bytes]], + default=[], + kw_only=True, ) - """Tuple elements are respectively the 'discovery_date', 'format', - and 'metadata' fields of RawExtrinsicMetadata""" + """Tuple elements are respectively the 'discovery_date' (which defaults to the + visit date), 'format', and 'metadata' fields of RawExtrinsicMetadata""" # TODO: add support for metadata for origins, directories, and contents @@ -598,7 +600,7 @@ RawExtrinsicMetadata( type=MetadataTargetType.REVISION, id=SWHID(object_type="revision", object_id=revision_id), - discovery_date=discovery_date, + discovery_date=discovery_date or self.visit_date, authority=authority, fetcher=fetcher, format=format,