Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F8394558
D3667.id12922.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
13 KB
Subscribers
None
D3667.id12922.diff
View Options
diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py
--- a/swh/loader/package/deposit/tests/test_deposit.py
+++ b/swh/loader/package/deposit/tests/test_deposit.py
@@ -209,11 +209,9 @@
orig_meta = loader.storage.raw_extrinsic_metadata_get(
MetadataTargetType.ORIGIN, url, authority
)
- assert orig_meta is not None
- assert isinstance(orig_meta, dict)
- assert len(orig_meta["results"]) == 1
- assert orig_meta["next_page_token"] is None
- orig_meta0 = orig_meta["results"][0]
+ assert orig_meta.next_page_token is None
+ assert len(orig_meta.results) == 1
+ orig_meta0 = orig_meta.results[0]
assert orig_meta0.authority == authority
assert orig_meta0.fetcher == fetcher
@@ -222,11 +220,9 @@
rev_meta = loader.storage.raw_extrinsic_metadata_get(
MetadataTargetType.REVISION, revision_swhid, authority
)
- assert rev_meta is not None
- assert isinstance(rev_meta, dict)
- assert len(rev_meta["results"]) == 1
- assert rev_meta["next_page_token"] is None
- rev_meta0 = rev_meta["results"][0]
+ assert rev_meta.next_page_token is None
+ assert len(rev_meta.results) == 1
+ rev_meta0 = rev_meta.results[0]
assert rev_meta0.authority == authority
assert rev_meta0.fetcher == fetcher
@@ -345,14 +341,10 @@
orig_meta = loader.storage.raw_extrinsic_metadata_get(
MetadataTargetType.ORIGIN, url, authority
)
- assert orig_meta is not None
- assert isinstance(orig_meta, dict)
- assert len(orig_meta["results"]) == 1
- assert orig_meta["next_page_token"] is None
+ assert orig_meta.next_page_token is None
+ assert len(orig_meta.results) == 1
- assert len(orig_meta["results"]) == 1
-
- orig_meta0 = orig_meta["results"][0]
+ orig_meta0 = orig_meta.results[0]
expected_metadata = RawExtrinsicMetadata(
type=MetadataTargetType.ORIGIN,
@@ -380,14 +372,12 @@
rev_meta = loader.storage.raw_extrinsic_metadata_get(
MetadataTargetType.REVISION, revision_swhid, authority
)
- assert rev_meta is not None
- assert isinstance(rev_meta, dict)
- assert len(rev_meta["results"]) == 1
- assert rev_meta["next_page_token"] is None
- assert len(rev_meta["results"]) == 1
+ assert rev_meta.next_page_token is None
+
+ assert len(rev_meta.results) == 1
- rev_meta0 = rev_meta["results"][0]
+ rev_meta0 = rev_meta.results[0]
assert rev_meta0 == attr.evolve(
expected_metadata,
diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py
--- a/swh/loader/package/loader.py
+++ b/swh/loader/package/loader.py
@@ -449,6 +449,18 @@
status_visit = "partial"
status_load = "failed"
+ if snapshot:
+ try:
+ metadata_objects = self.build_extrinsic_snapshot_metadata(snapshot.id)
+ self._load_metadata_objects(metadata_objects)
+ except Exception as e:
+ logger.exception(
+ "Failed to load extrinsic snapshot metadata for %s", self.url
+ )
+ sentry_sdk.capture_exception(e)
+ status_visit = "partial"
+ status_load = "failed"
+
try:
metadata_objects = self.build_extrinsic_origin_metadata()
self._load_metadata_objects(metadata_objects)
@@ -638,6 +650,42 @@
return metadata_objects
+ def get_extrinsic_snapshot_metadata(self) -> List[RawExtrinsicMetadataCore]:
+ """Returns metadata items, used by build_extrinsic_snapshot_metadata."""
+ return []
+
+ def build_extrinsic_snapshot_metadata(
+ self, snapshot_id: Sha1Git
+ ) -> List[RawExtrinsicMetadata]:
+ """Builds a list of full RawExtrinsicMetadata objects, using
+ metadata returned by get_extrinsic_snapshot_metadata."""
+ metadata_items = self.get_extrinsic_snapshot_metadata()
+ if not metadata_items:
+ # If this package loader doesn't write metadata, no need to require
+ # an implementation for get_metadata_authority.
+ return []
+
+ authority = self.get_metadata_authority()
+ fetcher = self.get_metadata_fetcher()
+
+ metadata_objects = []
+
+ for item in metadata_items:
+ metadata_objects.append(
+ RawExtrinsicMetadata(
+ type=MetadataTargetType.SNAPSHOT,
+ id=SWHID(object_type="snapshot", object_id=snapshot_id),
+ discovery_date=item.discovery_date or self.visit_date,
+ authority=authority,
+ fetcher=fetcher,
+ format=item.format,
+ metadata=item.metadata,
+ origin=self.url,
+ )
+ )
+
+ return metadata_objects
+
def build_extrinsic_revision_metadata(
self, p_info: TPackageInfo, revision_id: Sha1Git
) -> List[RawExtrinsicMetadata]:
diff --git a/swh/loader/package/nixguix/loader.py b/swh/loader/package/nixguix/loader.py
--- a/swh/loader/package/nixguix/loader.py
+++ b/swh/loader/package/nixguix/loader.py
@@ -12,6 +12,8 @@
from swh.model import hashutil
from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
Revision,
RevisionType,
TargetType,
@@ -21,7 +23,11 @@
)
from swh.loader.package.utils import EMPTY_AUTHOR
-from swh.loader.package.loader import BasePackageInfo, PackageLoader
+from swh.loader.package.loader import (
+ BasePackageInfo,
+ PackageLoader,
+ RawExtrinsicMetadataCore,
+)
logger = logging.getLogger(__name__)
@@ -55,8 +61,8 @@
def __init__(self, url):
super().__init__(url=url)
- raw = retrieve_sources(url)
- clean = clean_sources(raw)
+ self.raw_sources = retrieve_sources(url)
+ clean = clean_sources(parse_sources(self.raw_sources))
self.sources = clean["sources"]
self.provider_url = url
@@ -76,6 +82,20 @@
"""
return self._integrityByUrl.keys()
+ def get_metadata_authority(self):
+ return MetadataAuthority(
+ type=MetadataAuthorityType.FORGE, url=self.url, metadata={},
+ )
+
+ def get_extrinsic_snapshot_metadata(self):
+ return [
+ RawExtrinsicMetadataCore(
+ format="nixguix-sources-json",
+ metadata=self.raw_sources,
+ discovery_date=None,
+ ),
+ ]
+
# Note: this could be renamed get_artifact_info in the PackageLoader
# base class.
def get_package_info(self, url) -> Iterator[Tuple[str, NixGuixPackageInfo]]:
@@ -186,12 +206,16 @@
)
-def retrieve_sources(url: str) -> Dict[str, Any]:
+def retrieve_sources(url: str) -> bytes:
response = requests.get(url, allow_redirects=True)
if response.status_code != 200:
raise ValueError("Got %d HTTP code on %s", response.status_code, url)
- return json.loads(response.content.decode("utf-8"))
+ return response.content
+
+
+def parse_sources(raw_sources: bytes) -> Dict[str, Any]:
+ return json.loads(raw_sources.decode("utf-8"))
def clean_sources(sources: Dict[str, Any]) -> Dict[str, Any]:
diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py
--- a/swh/loader/package/nixguix/tests/test_nixguix.py
+++ b/swh/loader/package/nixguix/tests/test_nixguix.py
@@ -15,11 +15,22 @@
from unittest.mock import patch
-from swh.model.model import Snapshot, SnapshotBranch, TargetType
+from swh.model.identifiers import SWHID
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
+ Snapshot,
+ SnapshotBranch,
+ TargetType,
+)
from swh.loader.package.archive.loader import ArchiveLoader
from swh.loader.package.nixguix.loader import (
NixGuixPackageInfo,
NixGuixLoader,
+ parse_sources,
retrieve_sources,
clean_sources,
)
@@ -27,6 +38,10 @@
from swh.loader.package.utils import download
from swh.model.hashutil import hash_to_bytes, hash_to_hex
from swh.storage.exc import HashCollision
+from swh.storage.algos.origin import origin_get_latest_visit_status
+from swh.storage.interface import PagedResult
+
+from swh.loader.package import __version__
from swh.loader.tests import (
assert_last_visit_matches,
@@ -38,6 +53,17 @@
sources_url = "https://nix-community.github.io/nixpkgs-swh/sources.json"
+@pytest.fixture
+def raw_sources(datadir) -> bytes:
+ with open(
+ os.path.join(
+ datadir, "https_nix-community.github.io", "nixpkgs-swh_sources.json"
+ ),
+ "rb",
+ ) as f:
+ return f.read()
+
+
SNAPSHOT1 = Snapshot(
id=hash_to_bytes("0c5881c74283793ebe9a09a105a9381e41380383"),
branches={
@@ -80,7 +106,7 @@
def test_retrieve_sources(swh_config, requests_mock_datadir):
- j = retrieve_sources(sources_url)
+ j = parse_sources(retrieve_sources(sources_url))
assert "sources" in j.keys()
assert len(j["sources"]) == 2
@@ -136,7 +162,7 @@
assert len(clean["sources"]) == 1
-def test_loader_one_visit(swh_config, requests_mock_datadir):
+def test_loader_one_visit(swh_config, requests_mock_datadir, raw_sources):
loader = NixGuixLoader(sources_url)
res = loader.load()
assert res["status"] == "eventful"
@@ -160,6 +186,34 @@
loader.storage, sources_url, status="partial", type="nixguix"
)
+ (_, visit_status) = origin_get_latest_visit_status(loader.storage, sources_url)
+ snapshot_swhid = SWHID(
+ object_type="snapshot", object_id=hash_to_hex(visit_status.snapshot)
+ )
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.FORGE, url=sources_url,
+ )
+ expected_metadata = [
+ RawExtrinsicMetadata(
+ type=MetadataTargetType.SNAPSHOT,
+ id=snapshot_swhid,
+ authority=metadata_authority,
+ fetcher=MetadataFetcher(
+ name="swh.loader.package.nixguix.loader.NixGuixLoader",
+ version=__version__,
+ ),
+ discovery_date=loader.visit_date,
+ format="nixguix-sources-json",
+ metadata=raw_sources,
+ origin=sources_url,
+ )
+ ]
+ assert loader.storage.raw_extrinsic_metadata_get(
+ type=MetadataTargetType.SNAPSHOT,
+ id=snapshot_swhid,
+ authority=metadata_authority,
+ ) == PagedResult(next_page_token=None, results=expected_metadata,)
+
def test_uncompress_failure(swh_config, requests_mock_datadir):
"""Non tarball files are currently not supported and the uncompress
@@ -305,7 +359,7 @@
} == stats
-def test_resolve_revision_from(swh_config, requests_mock_datadir):
+def test_resolve_revision_from(swh_config, requests_mock_datadir, datadir):
loader = NixGuixLoader(sources_url)
known_artifacts = {
diff --git a/swh/loader/package/nixguix/tests/test_tasks.py b/swh/loader/package/nixguix/tests/test_tasks.py
--- a/swh/loader/package/nixguix/tests/test_tasks.py
+++ b/swh/loader/package/nixguix/tests/test_tasks.py
@@ -3,6 +3,8 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import json
+
def test_nixguix_loader(
mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
@@ -13,11 +15,9 @@
mock_retrieve_sources = mocker.patch(
"swh.loader.package.nixguix.loader.retrieve_sources"
)
- mock_retrieve_sources.return_value = {
- "version": 1,
- "sources": [],
- "revision": "some-revision",
- }
+ mock_retrieve_sources.return_value = json.dumps(
+ {"version": 1, "sources": [], "revision": "some-revision",}
+ ).encode()
res = swh_scheduler_celery_app.send_task(
"swh.loader.package.nixguix.tasks.LoadNixguix", kwargs=dict(url="some-url")
diff --git a/swh/loader/package/tests/test_loader_metadata.py b/swh/loader/package/tests/test_loader_metadata.py
--- a/swh/loader/package/tests/test_loader_metadata.py
+++ b/swh/loader/package/tests/test_loader_metadata.py
@@ -124,14 +124,14 @@
result = storage.raw_extrinsic_metadata_get(
MetadataTargetType.REVISION, REVISION_SWHID, AUTHORITY,
)
- assert result["next_page_token"] is None
- assert result["results"] == REVISION_METADATA
+ assert result.next_page_token is None
+ assert result.results == REVISION_METADATA
result = storage.raw_extrinsic_metadata_get(
MetadataTargetType.ORIGIN, ORIGIN_URL, AUTHORITY,
)
- assert result["next_page_token"] is None
- assert result["results"] == ORIGIN_METADATA
+ assert result.next_page_token is None
+ assert result.results == ORIGIN_METADATA
assert caplog.text == ""
@@ -154,8 +154,8 @@
result = storage.raw_extrinsic_metadata_get(
MetadataTargetType.REVISION, REVISION_SWHID, AUTHORITY,
)
- assert result["next_page_token"] is None
- assert result["results"] == REVISION_METADATA
+ assert result.next_page_token is None
+ assert result.results == REVISION_METADATA
assert caplog.text == ""
@@ -178,7 +178,7 @@
result = storage.raw_extrinsic_metadata_get(
MetadataTargetType.REVISION, REVISION_SWHID, AUTHORITY,
)
- assert result["next_page_token"] is None
- assert result["results"] == REVISION_METADATA
+ assert result.next_page_token is None
+ assert result.results == REVISION_METADATA
assert caplog.text == ""
diff --git a/swh/loader/tests/__init__.py b/swh/loader/tests/__init__.py
--- a/swh/loader/tests/__init__.py
+++ b/swh/loader/tests/__init__.py
@@ -59,6 +59,7 @@
f"Visit_status points to snapshot {visit_status.snapshot.hex()} "
f"instead of {snapshot.hex()}"
)
+
return visit_status
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Jun 3, 7:26 PM (1 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3234319
Attached To
D3667: nixguix: Load the content of sources.json as extrinsic snapshot metadata.
Event Timeline
Log In to Comment