Page MenuHomeSoftware Heritage

D3614.diff
No OneTemporary

D3614.diff

diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py
--- a/swh/loader/package/deposit/loader.py
+++ b/swh/loader/package/deposit/loader.py
@@ -61,6 +61,9 @@
# which computes itself the values. The loader needs to use those to create the
# revision.
+ raw_metadata_from_origin = json.dumps(
+ metadata["origin_metadata"]["metadata"]
+ ).encode()
metadata = metadata.copy()
# FIXME: this removes information from 'raw' metadata
depo = metadata.pop("deposit")
@@ -77,6 +80,9 @@
committer=parse_author(depo["committer"]),
revision_parents=tuple(hash_to_bytes(p) for p in depo["revision_parents"]),
raw_info=metadata,
+ revision_extrinsic_metadata=[
+ (None, "sword-v2-atom-codemeta-v2-in-json", raw_metadata_from_origin,),
+ ],
)
@@ -107,6 +113,24 @@
# branch
return ["HEAD"]
+ def get_metadata_authority(self) -> MetadataAuthority:
+ provider = self.metadata["origin_metadata"]["provider"]
+ assert provider["provider_type"] == "deposit_client"
+ return MetadataAuthority(
+ type=MetadataAuthorityType.DEPOSIT_CLIENT,
+ url=provider["provider_url"],
+ metadata={
+ "name": provider["provider_name"],
+ **(provider["metadata"] or {}),
+ },
+ )
+
+ def get_metadata_fetcher(self) -> MetadataFetcher:
+ tool = self.metadata["origin_metadata"]["tool"]
+ return MetadataFetcher(
+ name=tool["name"], version=tool["version"], metadata=tool["configuration"],
+ )
+
def get_package_info(
self, version: str
) -> Iterator[Tuple[str, DepositPackageInfo]]:
@@ -165,24 +189,10 @@
origin_metadata = self.metadata["origin_metadata"]
logger.debug("origin_metadata: %s", origin_metadata)
- provider = origin_metadata["provider"]
- assert provider["provider_type"] == "deposit_client"
- authority = MetadataAuthority(
- type=MetadataAuthorityType.DEPOSIT_CLIENT,
- url=provider["provider_url"],
- metadata={
- "name": provider["provider_name"],
- **(provider["metadata"] or {}),
- },
- )
+ authority = self.get_metadata_authority()
self.storage.metadata_authority_add([authority])
- tool = origin_metadata["tool"]
- fetcher = MetadataFetcher(
- name=tool["name"],
- version=tool["version"],
- metadata=tool["configuration"],
- )
+ fetcher = self.get_metadata_fetcher()
self.storage.metadata_fetcher_add([fetcher])
self.storage.object_metadata_add(
diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py
--- a/swh/loader/package/deposit/tests/test_deposit.py
+++ b/swh/loader/package/deposit/tests/test_deposit.py
@@ -6,9 +6,11 @@
import json
import re
+import attr
import pytest
from swh.model.hashutil import hash_to_bytes, hash_to_hex
+from swh.model.identifiers import SWHID
from swh.model.model import (
Snapshot,
SnapshotBranch,
@@ -203,6 +205,7 @@
url="https://hal-test.archives-ouvertes.fr/",
)
+ # Check origin metadata
orig_meta = loader.storage.object_metadata_get(
MetadataTargetType.ORIGIN, url, authority
)
@@ -211,10 +214,22 @@
assert len(orig_meta["results"]) == 1
assert orig_meta["next_page_token"] is None
orig_meta0 = orig_meta["results"][0]
-
assert orig_meta0.authority == authority
assert orig_meta0.fetcher == fetcher
+ # Check revision metadata
+ revision_swhid = SWHID(object_type="revision", object_id=revision_id)
+ rev_meta = loader.storage.object_metadata_get(
+ MetadataTargetType.REVISION, revision_swhid, authority
+ )
+ assert rev_meta is not None
+ assert isinstance(rev_meta, dict)
+ assert len(rev_meta["results"]) == 1
+ assert rev_meta["next_page_token"] is None
+ rev_meta0 = rev_meta["results"][0]
+ assert rev_meta0.authority == authority
+ assert rev_meta0.fetcher == fetcher
+
# Retrieve the information for deposit status update query to the deposit
urls = [
m
@@ -326,7 +341,7 @@
url="https://hal-test.archives-ouvertes.fr/",
)
- # Check the metadata swh side
+ # Check the origin metadata swh side
orig_meta = loader.storage.object_metadata_get(
MetadataTargetType.ORIGIN, url, authority
)
@@ -339,9 +354,9 @@
orig_meta0 = orig_meta["results"][0]
- assert orig_meta0 == RawExtrinsicMetadata(
+ expected_metadata = RawExtrinsicMetadata(
type=MetadataTargetType.ORIGIN,
- id="https://hal-test.archives-ouvertes.fr/some-external-id",
+ id=url,
discovery_date=orig_meta0.discovery_date,
metadata=json.dumps(
{
@@ -358,6 +373,29 @@
fetcher=fetcher,
)
+ assert orig_meta0 == expected_metadata
+
+ # Check the revision metadata swh side
+ revision_swhid = SWHID(object_type="revision", object_id=revision_id)
+ rev_meta = loader.storage.object_metadata_get(
+ MetadataTargetType.REVISION, revision_swhid, authority
+ )
+ assert rev_meta is not None
+ assert isinstance(rev_meta, dict)
+ assert len(rev_meta["results"]) == 1
+ assert rev_meta["next_page_token"] is None
+
+ assert len(rev_meta["results"]) == 1
+
+ rev_meta0 = rev_meta["results"][0]
+
+ assert rev_meta0 == attr.evolve(
+ expected_metadata,
+ type=MetadataTargetType.REVISION,
+ id=revision_swhid,
+ origin=url,
+ )
+
# Retrieve the information for deposit status update query to the deposit
urls = [
m
diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py
--- a/swh/loader/package/loader.py
+++ b/swh/loader/package/loader.py
@@ -77,10 +77,12 @@
# See <https://github.com/python-attrs/attrs/issues/38>
revision_extrinsic_metadata = attr.ib(
- type=List[Tuple[datetime.datetime, str, bytes]], default=[], kw_only=True,
+ type=List[Tuple[Optional[datetime.datetime], str, bytes]],
+ default=[],
+ kw_only=True,
)
- """Tuple elements are respectively the 'discovery_date', 'format',
- and 'metadata' fields of RawExtrinsicMetadata"""
+ """Tuple elements are respectively the 'discovery_date' (which defaults to the
+ visit date), 'format', and 'metadata' fields of RawExtrinsicMetadata"""
# TODO: add support for metadata for origins, directories, and contents
@@ -598,7 +600,7 @@
RawExtrinsicMetadata(
type=MetadataTargetType.REVISION,
id=SWHID(object_type="revision", object_id=revision_id),
- discovery_date=discovery_date,
+ discovery_date=discovery_date or self.visit_date,
authority=authority,
fetcher=fetcher,
format=format,

File Metadata

Mime Type
text/plain
Expires
Tue, Dec 17, 6:16 PM (1 w, 9 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220650

Event Timeline