Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123058
D3614.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D3614.diff
View Options
diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py
--- a/swh/loader/package/deposit/loader.py
+++ b/swh/loader/package/deposit/loader.py
@@ -61,6 +61,9 @@
# which computes itself the values. The loader needs to use those to create the
# revision.
+ raw_metadata_from_origin = json.dumps(
+ metadata["origin_metadata"]["metadata"]
+ ).encode()
metadata = metadata.copy()
# FIXME: this removes information from 'raw' metadata
depo = metadata.pop("deposit")
@@ -77,6 +80,9 @@
committer=parse_author(depo["committer"]),
revision_parents=tuple(hash_to_bytes(p) for p in depo["revision_parents"]),
raw_info=metadata,
+ revision_extrinsic_metadata=[
+ (None, "sword-v2-atom-codemeta-v2-in-json", raw_metadata_from_origin,),
+ ],
)
@@ -107,6 +113,24 @@
# branch
return ["HEAD"]
+ def get_metadata_authority(self) -> MetadataAuthority:
+ provider = self.metadata["origin_metadata"]["provider"]
+ assert provider["provider_type"] == "deposit_client"
+ return MetadataAuthority(
+ type=MetadataAuthorityType.DEPOSIT_CLIENT,
+ url=provider["provider_url"],
+ metadata={
+ "name": provider["provider_name"],
+ **(provider["metadata"] or {}),
+ },
+ )
+
+ def get_metadata_fetcher(self) -> MetadataFetcher:
+ tool = self.metadata["origin_metadata"]["tool"]
+ return MetadataFetcher(
+ name=tool["name"], version=tool["version"], metadata=tool["configuration"],
+ )
+
def get_package_info(
self, version: str
) -> Iterator[Tuple[str, DepositPackageInfo]]:
@@ -165,24 +189,10 @@
origin_metadata = self.metadata["origin_metadata"]
logger.debug("origin_metadata: %s", origin_metadata)
- provider = origin_metadata["provider"]
- assert provider["provider_type"] == "deposit_client"
- authority = MetadataAuthority(
- type=MetadataAuthorityType.DEPOSIT_CLIENT,
- url=provider["provider_url"],
- metadata={
- "name": provider["provider_name"],
- **(provider["metadata"] or {}),
- },
- )
+ authority = self.get_metadata_authority()
self.storage.metadata_authority_add([authority])
- tool = origin_metadata["tool"]
- fetcher = MetadataFetcher(
- name=tool["name"],
- version=tool["version"],
- metadata=tool["configuration"],
- )
+ fetcher = self.get_metadata_fetcher()
self.storage.metadata_fetcher_add([fetcher])
self.storage.object_metadata_add(
diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py
--- a/swh/loader/package/deposit/tests/test_deposit.py
+++ b/swh/loader/package/deposit/tests/test_deposit.py
@@ -6,9 +6,11 @@
import json
import re
+import attr
import pytest
from swh.model.hashutil import hash_to_bytes, hash_to_hex
+from swh.model.identifiers import SWHID
from swh.model.model import (
Snapshot,
SnapshotBranch,
@@ -203,6 +205,7 @@
url="https://hal-test.archives-ouvertes.fr/",
)
+ # Check origin metadata
orig_meta = loader.storage.object_metadata_get(
MetadataTargetType.ORIGIN, url, authority
)
@@ -211,10 +214,22 @@
assert len(orig_meta["results"]) == 1
assert orig_meta["next_page_token"] is None
orig_meta0 = orig_meta["results"][0]
-
assert orig_meta0.authority == authority
assert orig_meta0.fetcher == fetcher
+ # Check revision metadata
+ revision_swhid = SWHID(object_type="revision", object_id=revision_id)
+ rev_meta = loader.storage.object_metadata_get(
+ MetadataTargetType.REVISION, revision_swhid, authority
+ )
+ assert rev_meta is not None
+ assert isinstance(rev_meta, dict)
+ assert len(rev_meta["results"]) == 1
+ assert rev_meta["next_page_token"] is None
+ rev_meta0 = rev_meta["results"][0]
+ assert rev_meta0.authority == authority
+ assert rev_meta0.fetcher == fetcher
+
# Retrieve the information for deposit status update query to the deposit
urls = [
m
@@ -326,7 +341,7 @@
url="https://hal-test.archives-ouvertes.fr/",
)
- # Check the metadata swh side
+ # Check the origin metadata swh side
orig_meta = loader.storage.object_metadata_get(
MetadataTargetType.ORIGIN, url, authority
)
@@ -339,9 +354,9 @@
orig_meta0 = orig_meta["results"][0]
- assert orig_meta0 == RawExtrinsicMetadata(
+ expected_metadata = RawExtrinsicMetadata(
type=MetadataTargetType.ORIGIN,
- id="https://hal-test.archives-ouvertes.fr/some-external-id",
+ id=url,
discovery_date=orig_meta0.discovery_date,
metadata=json.dumps(
{
@@ -358,6 +373,29 @@
fetcher=fetcher,
)
+ assert orig_meta0 == expected_metadata
+
+ # Check the revision metadata swh side
+ revision_swhid = SWHID(object_type="revision", object_id=revision_id)
+ rev_meta = loader.storage.object_metadata_get(
+ MetadataTargetType.REVISION, revision_swhid, authority
+ )
+ assert rev_meta is not None
+ assert isinstance(rev_meta, dict)
+ assert len(rev_meta["results"]) == 1
+ assert rev_meta["next_page_token"] is None
+
+ assert len(rev_meta["results"]) == 1
+
+ rev_meta0 = rev_meta["results"][0]
+
+ assert rev_meta0 == attr.evolve(
+ expected_metadata,
+ type=MetadataTargetType.REVISION,
+ id=revision_swhid,
+ origin=url,
+ )
+
# Retrieve the information for deposit status update query to the deposit
urls = [
m
diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py
--- a/swh/loader/package/loader.py
+++ b/swh/loader/package/loader.py
@@ -77,10 +77,12 @@
# See <https://github.com/python-attrs/attrs/issues/38>
revision_extrinsic_metadata = attr.ib(
- type=List[Tuple[datetime.datetime, str, bytes]], default=[], kw_only=True,
+ type=List[Tuple[Optional[datetime.datetime], str, bytes]],
+ default=[],
+ kw_only=True,
)
- """Tuple elements are respectively the 'discovery_date', 'format',
- and 'metadata' fields of RawExtrinsicMetadata"""
+ """Tuple elements are respectively the 'discovery_date' (which defaults to the
+ visit date), 'format', and 'metadata' fields of RawExtrinsicMetadata"""
# TODO: add support for metadata for origins, directories, and contents
@@ -598,7 +600,7 @@
RawExtrinsicMetadata(
type=MetadataTargetType.REVISION,
id=SWHID(object_type="revision", object_id=revision_id),
- discovery_date=discovery_date,
+ discovery_date=discovery_date or self.visit_date,
authority=authority,
fetcher=fetcher,
format=format,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Dec 17, 6:16 PM (6 d, 21 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220650
Attached To
D3614: deposit loader: Write revision metadata to the metadata storage.
Event Timeline
Log In to Comment