Page MenuHomeSoftware Heritage

D3667.id12901.diff
No OneTemporary

D3667.id12901.diff

diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py
--- a/swh/loader/package/loader.py
+++ b/swh/loader/package/loader.py
@@ -449,6 +449,18 @@
status_visit = "partial"
status_load = "failed"
+ if snapshot:
+ try:
+ metadata_objects = self.build_extrinsic_snapshot_metadata(snapshot.id)
+ self._load_metadata_objects(metadata_objects)
+ except Exception as e:
+ logger.exception(
+ "Failed to load extrinsic snapshot metadata for %s", self.url
+ )
+ sentry_sdk.capture_exception(e)
+ status_visit = "partial"
+ status_load = "failed"
+
try:
metadata_objects = self.build_extrinsic_origin_metadata()
self._load_metadata_objects(metadata_objects)
@@ -638,6 +650,42 @@
return metadata_objects
+ def get_extrinsic_snapshot_metadata(self) -> List[RawExtrinsicMetadataCore]:
+ """Returns metadata items, used by build_extrinsic_snapshot_metadata."""
+ return []
+
+ def build_extrinsic_snapshot_metadata(
+ self, snapshot_id: Sha1Git
+ ) -> List[RawExtrinsicMetadata]:
+ """Builds a list of full RawExtrinsicMetadata objects, using
+ metadata returned by get_extrinsic_snapshot_metadata."""
+ metadata_items = self.get_extrinsic_snapshot_metadata()
+ if not metadata_items:
+ # If this package loader doesn't write metadata, no need to require
+ # an implementation for get_metadata_authority.
+ return []
+
+ authority = self.get_metadata_authority()
+ fetcher = self.get_metadata_fetcher()
+
+ metadata_objects = []
+
+ for item in metadata_items:
+ metadata_objects.append(
+ RawExtrinsicMetadata(
+ type=MetadataTargetType.SNAPSHOT,
+ id=SWHID(object_type="snapshot", object_id=snapshot_id),
+ discovery_date=item.discovery_date or self.visit_date,
+ authority=authority,
+ fetcher=fetcher,
+ format=item.format,
+ metadata=item.metadata,
+ origin=self.url,
+ )
+ )
+
+ return metadata_objects
+
def build_extrinsic_revision_metadata(
self, p_info: TPackageInfo, revision_id: Sha1Git
) -> List[RawExtrinsicMetadata]:
diff --git a/swh/loader/package/nixguix/loader.py b/swh/loader/package/nixguix/loader.py
--- a/swh/loader/package/nixguix/loader.py
+++ b/swh/loader/package/nixguix/loader.py
@@ -12,6 +12,8 @@
from swh.model import hashutil
from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
Revision,
RevisionType,
TargetType,
@@ -21,7 +23,11 @@
)
from swh.loader.package.utils import EMPTY_AUTHOR
-from swh.loader.package.loader import BasePackageInfo, PackageLoader
+from swh.loader.package.loader import (
+ BasePackageInfo,
+ PackageLoader,
+ RawExtrinsicMetadataCore,
+)
logger = logging.getLogger(__name__)
@@ -55,8 +61,8 @@
def __init__(self, url):
super().__init__(url=url)
- raw = retrieve_sources(url)
- clean = clean_sources(raw)
+ self.raw_sources = retrieve_sources(url)
+ clean = clean_sources(parse_sources(self.raw_sources))
self.sources = clean["sources"]
self.provider_url = url
@@ -76,6 +82,20 @@
"""
return self._integrityByUrl.keys()
+ def get_metadata_authority(self):
+ return MetadataAuthority(
+ type=MetadataAuthorityType.FORGE, url=self.url, metadata={},
+ )
+
+ def get_extrinsic_snapshot_metadata(self):
+ return [
+ RawExtrinsicMetadataCore(
+ format="nixguix-sources-json",
+ metadata=self.raw_sources,
+ discovery_date=None,
+ ),
+ ]
+
# Note: this could be renamed get_artifact_info in the PackageLoader
# base class.
def get_package_info(self, url) -> Iterator[Tuple[str, NixGuixPackageInfo]]:
@@ -186,12 +206,16 @@
)
-def retrieve_sources(url: str) -> Dict[str, Any]:
+def retrieve_sources(url: str) -> bytes:
response = requests.get(url, allow_redirects=True)
if response.status_code != 200:
raise ValueError("Got %d HTTP code on %s", response.status_code, url)
- return json.loads(response.content.decode("utf-8"))
+ return response.content
+
+
+def parse_sources(raw_sources: bytes) -> Dict[str, Any]:
+ return json.loads(raw_sources.decode("utf-8"))
def clean_sources(sources: Dict[str, Any]) -> Dict[str, Any]:
diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py
--- a/swh/loader/package/nixguix/tests/test_nixguix.py
+++ b/swh/loader/package/nixguix/tests/test_nixguix.py
@@ -15,11 +15,22 @@
from unittest.mock import patch
-from swh.model.model import Snapshot, SnapshotBranch, TargetType
+from swh.model.identifiers import SWHID
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
+ Snapshot,
+ SnapshotBranch,
+ TargetType,
+)
from swh.loader.package.archive.loader import ArchiveLoader
from swh.loader.package.nixguix.loader import (
NixGuixPackageInfo,
NixGuixLoader,
+ parse_sources,
retrieve_sources,
clean_sources,
)
@@ -27,6 +38,9 @@
from swh.loader.package.utils import download
from swh.model.hashutil import hash_to_bytes, hash_to_hex
from swh.storage.exc import HashCollision
+from swh.storage.algos.origin import origin_get_latest_visit_status
+
+from swh.loader.package import __version__
from swh.loader.tests import (
assert_last_visit_matches,
@@ -38,6 +52,17 @@
sources_url = "https://nix-community.github.io/nixpkgs-swh/sources.json"
+@pytest.fixture
+def raw_sources(datadir) -> bytes:
+ with open(
+ os.path.join(
+ datadir, "https_nix-community.github.io", "nixpkgs-swh_sources.json"
+ ),
+ "rb",
+ ) as f:
+ return f.read()
+
+
SNAPSHOT1 = Snapshot(
id=hash_to_bytes("0c5881c74283793ebe9a09a105a9381e41380383"),
branches={
@@ -80,7 +105,7 @@
def test_retrieve_sources(swh_config, requests_mock_datadir):
- j = retrieve_sources(sources_url)
+ j = parse_sources(retrieve_sources(sources_url))
assert "sources" in j.keys()
assert len(j["sources"]) == 2
@@ -136,7 +161,7 @@
assert len(clean["sources"]) == 1
-def test_loader_one_visit(swh_config, requests_mock_datadir):
+def test_loader_one_visit(swh_config, requests_mock_datadir, raw_sources):
loader = NixGuixLoader(sources_url)
res = loader.load()
assert res["status"] == "eventful"
@@ -160,6 +185,35 @@
loader.storage, sources_url, status="partial", type="nixguix"
)
+ (_, visit_status) = origin_get_latest_visit_status(loader.storage, sources_url)
+ snapshot_swhid = SWHID(
+ object_type="snapshot", object_id=hash_to_hex(visit_status.snapshot)
+ )
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.FORGE, url=sources_url,
+ )
+ expected_metadata = [
+ RawExtrinsicMetadata(
+ type=MetadataTargetType.SNAPSHOT,
+ id=snapshot_swhid,
+ authority=metadata_authority,
+ fetcher=MetadataFetcher(
+ name="swh.loader.package.nixguix.loader.NixGuixLoader",
+ version=__version__,
+ ),
+ discovery_date=loader.visit_date,
+ format="nixguix-sources-json",
+ metadata=raw_sources,
+ origin=sources_url,
+ )
+ ]
+ print(loader.storage)
+ assert loader.storage.raw_extrinsic_metadata_get(
+ object_type=MetadataTargetType.SNAPSHOT,
+ id=snapshot_swhid,
+ authority=metadata_authority,
+ ) == {"next_page_token": None, "results": expected_metadata,}
+
def test_uncompress_failure(swh_config, requests_mock_datadir):
"""Non tarball files are currently not supported and the uncompress
@@ -305,7 +359,7 @@
} == stats
-def test_resolve_revision_from(swh_config, requests_mock_datadir):
+def test_resolve_revision_from(swh_config, requests_mock_datadir, datadir):
loader = NixGuixLoader(sources_url)
known_artifacts = {
diff --git a/swh/loader/package/nixguix/tests/test_tasks.py b/swh/loader/package/nixguix/tests/test_tasks.py
--- a/swh/loader/package/nixguix/tests/test_tasks.py
+++ b/swh/loader/package/nixguix/tests/test_tasks.py
@@ -3,6 +3,8 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import json
+
def test_nixguix_loader(
mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
@@ -13,11 +15,9 @@
mock_retrieve_sources = mocker.patch(
"swh.loader.package.nixguix.loader.retrieve_sources"
)
- mock_retrieve_sources.return_value = {
- "version": 1,
- "sources": [],
- "revision": "some-revision",
- }
+ mock_retrieve_sources.return_value = json.dumps(
+ {"version": 1, "sources": [], "revision": "some-revision",}
+ ).encode()
res = swh_scheduler_celery_app.send_task(
"swh.loader.package.nixguix.tasks.LoadNixguix", kwargs=dict(url="some-url")
diff --git a/swh/loader/tests/__init__.py b/swh/loader/tests/__init__.py
--- a/swh/loader/tests/__init__.py
+++ b/swh/loader/tests/__init__.py
@@ -59,6 +59,7 @@
f"Visit_status points to snapshot {visit_status.snapshot.hex()} "
f"instead of {snapshot.hex()}"
)
+
return visit_status

File Metadata

Mime Type
text/plain
Expires
Tue, Jun 3, 7:26 PM (1 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3234317

Event Timeline