Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7122999
D3616.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
9 KB
Subscribers
None
D3616.diff
View Options
diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py
--- a/swh/loader/package/deposit/loader.py
+++ b/swh/loader/package/deposit/loader.py
@@ -22,8 +22,6 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
- RawExtrinsicMetadata,
)
from swh.loader.package.loader import (
BasePackageInfo,
@@ -181,6 +179,16 @@
},
)
+ def get_extrinsic_origin_metadata(self) -> List[RawExtrinsicMetadataCore]:
+ origin_metadata = self.metadata["origin_metadata"]
+ return [
+ RawExtrinsicMetadataCore(
+ format="sword-v2-atom-codemeta-v2-in-json",
+ metadata=json.dumps(origin_metadata["metadata"]).encode(),
+ discovery_date=None,
+ )
+ ]
+
def load(self) -> Dict:
# First making sure the deposit is known prior to trigger a loading
try:
@@ -192,31 +200,6 @@
r = super().load()
success = r["status"] != "failed"
- if success:
- # Update archive with metadata information
- origin_metadata = self.metadata["origin_metadata"]
- logger.debug("origin_metadata: %s", origin_metadata)
-
- authority = self.get_metadata_authority()
- self.storage.metadata_authority_add([authority])
-
- fetcher = self.get_metadata_fetcher()
- self.storage.metadata_fetcher_add([fetcher])
-
- self.storage.object_metadata_add(
- [
- RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- id=self.url,
- discovery_date=self.visit_date,
- authority=authority,
- fetcher=fetcher,
- format="sword-v2-atom-codemeta-v2-in-json",
- metadata=json.dumps(origin_metadata["metadata"]).encode(),
- )
- ]
- )
-
# Update deposit status
try:
if not success:
diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py
--- a/swh/loader/package/loader.py
+++ b/swh/loader/package/loader.py
@@ -13,6 +13,7 @@
Dict,
Iterator,
Generic,
+ Iterable,
List,
Mapping,
Optional,
@@ -92,7 +93,7 @@
type=List[RawExtrinsicMetadataCore], default=[], kw_only=True,
)
- # TODO: add support for metadata for origins, directories, and contents
+ # TODO: add support for metadata for directories and contents
@property
def ID_KEYS(self):
@@ -438,13 +439,22 @@
snapshot = self._load_snapshot(
default_version, tmp_revisions, extra_branches
)
-
+ self.storage.flush()
except Exception as e:
logger.exception("Failed to build snapshot for origin %s", self.url)
sentry_sdk.capture_exception(e)
status_visit = "partial"
status_load = "failed"
+ try:
+ metadata_objects = self.build_extrinsic_origin_metadata()
+ self._load_metadata_objects(metadata_objects)
+ except Exception as e:
+ logger.exception("Failed to extrinsic origin metadata for %s", self.url)
+ sentry_sdk.capture_exception(e)
+ status_visit = "partial"
+ status_load = "failed"
+
return finalize_visit()
def _load_revision(self, p_info: TPackageInfo, origin) -> Optional[Sha1Git]:
@@ -590,6 +600,39 @@
"""
raise NotImplementedError("get_metadata_authority")
+ def get_extrinsic_origin_metadata(self) -> List[RawExtrinsicMetadataCore]:
+ """Returns metadata items, used by build_extrinsic_origin_metadata."""
+ return []
+
+ def build_extrinsic_origin_metadata(self) -> List[RawExtrinsicMetadata]:
+ """Builds a list of full RawExtrinsicMetadata objects, using
+ metadata returned by get_extrinsic_origin_metadata."""
+ metadata_items = self.get_extrinsic_origin_metadata()
+ if not metadata_items:
+ # If this package loader doesn't write metadata, no need to require
+ # an implementation for get_metadata_authority.
+ return []
+
+ authority = self.get_metadata_authority()
+ fetcher = self.get_metadata_fetcher()
+
+ metadata_objects = []
+
+ for item in metadata_items:
+ metadata_objects.append(
+ RawExtrinsicMetadata(
+ type=MetadataTargetType.ORIGIN,
+ id=self.url,
+ discovery_date=item.discovery_date or self.visit_date,
+ authority=authority,
+ fetcher=fetcher,
+ format=item.format,
+ metadata=item.metadata,
+ )
+ )
+
+ return metadata_objects
+
def build_extrinsic_revision_metadata(
self, p_info: TPackageInfo, revision_id: Sha1Git
) -> List[RawExtrinsicMetadata]:
@@ -623,26 +666,31 @@
self, p_info: TPackageInfo, revision_id: Sha1Git
) -> None:
metadata_objects = self.build_extrinsic_revision_metadata(p_info, revision_id)
+ self._load_metadata_objects(metadata_objects)
+
+ def _load_metadata_objects(
+ self, metadata_objects: List[RawExtrinsicMetadata]
+ ) -> None:
+ if not metadata_objects:
+ # If this package loader doesn't write metadata, no need to require
+ # an implementation for get_metadata_authority.
+ return
- authorities = {
- (
- metadata_object.authority.type,
- metadata_object.authority.url,
- ): metadata_object.authority
- for metadata_object in metadata_objects
+ self._create_authorities(mo.authority for mo in metadata_objects)
+ self._create_fetchers(mo.fetcher for mo in metadata_objects)
+
+ self.storage.object_metadata_add(metadata_objects)
+
+ def _create_authorities(self, authorities: Iterable[MetadataAuthority]) -> None:
+ deduplicated_authorities = {
+ (authority.type, authority.url): authority for authority in authorities
}
if authorities:
- self.storage.metadata_authority_add(authorities.values())
-
- fetchers = {
- (
- metadata_object.fetcher.name,
- metadata_object.fetcher.version,
- ): metadata_object.fetcher
- for metadata_object in metadata_objects
+ self.storage.metadata_authority_add(deduplicated_authorities.values())
+
+ def _create_fetchers(self, fetchers: Iterable[MetadataFetcher]) -> None:
+ deduplicated_fetchers = {
+ (fetcher.name, fetcher.version): fetcher for fetcher in fetchers
}
if fetchers:
- self.storage.metadata_fetcher_add(fetchers.values())
-
- if metadata_objects:
- self.storage.object_metadata_add(metadata_objects)
+ self.storage.metadata_fetcher_add(deduplicated_fetchers.values())
diff --git a/swh/loader/package/tests/test_loader_metadata.py b/swh/loader/package/tests/test_loader_metadata.py
--- a/swh/loader/package/tests/test_loader_metadata.py
+++ b/swh/loader/package/tests/test_loader_metadata.py
@@ -4,7 +4,7 @@
# See top-level LICENSE file for more information
import datetime
-from typing import Iterator, Optional, Sequence, Tuple
+from typing import Iterator, List, Optional, Sequence, Tuple
import attr
@@ -44,7 +44,7 @@
version=__version__,
)
-METADATA = [
+REVISION_METADATA = [
RawExtrinsicMetadata(
type=MetadataTargetType.REVISION,
id=REVISION_SWHID,
@@ -67,6 +67,18 @@
),
]
+ORIGIN_METADATA = [
+ RawExtrinsicMetadata(
+ type=MetadataTargetType.ORIGIN,
+ id=ORIGIN_URL,
+ discovery_date=datetime.datetime.now(),
+ authority=AUTHORITY,
+ fetcher=FETCHER,
+ format="test-format3",
+ metadata=b"baz qux",
+ ),
+]
+
class MetadataTestLoader(PackageLoader[BasePackageInfo]):
def get_versions(self) -> Sequence[str]:
@@ -79,8 +91,8 @@
return attr.evolve(AUTHORITY, metadata={})
def get_package_info(self, version: str) -> Iterator[Tuple[str, BasePackageInfo]]:
- m0 = METADATA[0]
- m1 = METADATA[1]
+ m0 = REVISION_METADATA[0]
+ m1 = REVISION_METADATA[1]
p_info = BasePackageInfo(
url=ORIGIN_URL,
filename="archive.tgz",
@@ -92,8 +104,12 @@
yield (version, p_info)
+ def get_extrinsic_origin_metadata(self) -> List[RawExtrinsicMetadataCore]:
+ m = ORIGIN_METADATA[0]
+ return [RawExtrinsicMetadataCore(m.format, m.metadata, m.discovery_date)]
+
-def test_load_revision_metadata(swh_config, caplog):
+def test_load_metadata(swh_config, caplog):
storage = get_storage("memory")
loader = MetadataTestLoader(ORIGIN_URL)
@@ -109,7 +125,13 @@
MetadataTargetType.REVISION, REVISION_SWHID, AUTHORITY,
)
assert result["next_page_token"] is None
- assert result["results"] == METADATA
+ assert result["results"] == REVISION_METADATA
+
+ result = storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, ORIGIN_URL, AUTHORITY,
+ )
+ assert result["next_page_token"] is None
+ assert result["results"] == ORIGIN_METADATA
assert caplog.text == ""
@@ -133,7 +155,7 @@
MetadataTargetType.REVISION, REVISION_SWHID, AUTHORITY,
)
assert result["next_page_token"] is None
- assert result["results"] == METADATA
+ assert result["results"] == REVISION_METADATA
assert caplog.text == ""
@@ -157,6 +179,6 @@
MetadataTargetType.REVISION, REVISION_SWHID, AUTHORITY,
)
assert result["next_page_token"] is None
- assert result["results"] == METADATA
+ assert result["results"] == REVISION_METADATA
assert caplog.text == ""
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Dec 17, 3:19 PM (2 d, 21 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220590
Attached To
D3616: Move origin metadata loading from DepositLoader to base PackageLoader.
Event Timeline
Log In to Comment