Page MenuHomeSoftware Heritage

D8665.id31305.diff
No OneTemporary

D8665.id31305.diff

diff --git a/swh/loader/package/pubdev/loader.py b/swh/loader/package/pubdev/loader.py
--- a/swh/loader/package/pubdev/loader.py
+++ b/swh/loader/package/pubdev/loader.py
@@ -3,12 +3,16 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
-from typing import Dict, Iterator, Optional, Sequence, Tuple
+from typing import Any, Dict, Iterator, Optional, Sequence, Tuple
import attr
from packaging.version import parse as parse_version
-from swh.loader.package.loader import BasePackageInfo, PackageLoader
+from swh.loader.package.loader import (
+ BasePackageInfo,
+ PackageLoader,
+ RawExtrinsicMetadataCore,
+)
from swh.loader.package.utils import (
EMPTY_AUTHOR,
Person,
@@ -16,7 +20,14 @@
get_url_body,
release_name,
)
-from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ ObjectType,
+ Release,
+ Sha1Git,
+ TimestampWithTimezone,
+)
from swh.storage.interface import StorageInterface
@@ -57,7 +68,9 @@
@cached_method
def info(self) -> Dict:
- """Return the project metadata information (fetched from pub.dev registry)"""
+ """Return the project metadata information (fetched from pub.dev registry) with
+ url pattern `{PUBDEV_BASE_URL}api/packages/{pkgname}`
+ """
# Use strict=False in order to correctly manage case where \n is present in a string
info = json.loads(get_url_body(self.package_info_url), strict=False)
# Arrange versions list as a new dict with `version` as key
@@ -92,10 +105,18 @@
latest = self.info()["latest"]
return latest["version"]
+ def get_metadata_authority(self):
+ return MetadataAuthority(
+ type=MetadataAuthorityType.FORGE,
+ url=self.PUBDEV_BASE_URL,
+ )
+
def get_package_info(self, version: str) -> Iterator[Tuple[str, PubDevPackageInfo]]:
"""Get release name and package information from version
- Package info comes from extrinsic metadata (from self.info())
+ Package info comes from extrinsic metadata (from self.info()),
+ `directory_extrinsic_metadata` is populated with one format,
+ `original-artifacts-json`.
Args:
version: Package version (e.g: "0.1.0")
@@ -121,6 +142,13 @@
else:
author = EMPTY_AUTHOR
+ # Artifact extrinsic metadata
+ artifact: Dict[str, Any] = {
+ "checksums": checksums,
+ "filename": filename,
+ "url": url,
+ }
+
p_info = PubDevPackageInfo(
name=name,
filename=filename,
@@ -129,6 +157,12 @@
last_modified=last_modified,
author=author,
checksums=checksums,
+ directory_extrinsic_metadata=[
+ RawExtrinsicMetadataCore(
+ format="original-artifacts-json",
+ metadata=json.dumps([artifact]).encode(),
+ ),
+ ],
)
yield release_name(version), p_info
diff --git a/swh/loader/package/pubdev/tests/test_pubdev.py b/swh/loader/package/pubdev/tests/test_pubdev.py
--- a/swh/loader/package/pubdev/tests/test_pubdev.py
+++ b/swh/loader/package/pubdev/tests/test_pubdev.py
@@ -3,21 +3,31 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import json
+
import pytest
+from swh.loader.package import __version__
from swh.loader.package.pubdev.loader import PubDevLoader
from swh.loader.package.utils import EMPTY_AUTHOR
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
from swh.model.hashutil import hash_to_bytes
from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
ObjectType,
Person,
+ RawExtrinsicMetadata,
Release,
Snapshot,
SnapshotBranch,
TargetType,
TimestampWithTimezone,
)
+from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID
+from swh.model.swhids import ObjectType as OType
+from swh.storage.interface import PagedResult
EXPECTED_PACKAGES = [
{
@@ -323,3 +333,75 @@
swh_storage,
"http://nowhere/api/packages/42",
)
+
+
+def test_pubdev_loader_raw_extrinsic_metadata(
+ datadir, requests_mock_datadir, swh_storage
+):
+
+ loader = PubDevLoader(
+ swh_storage,
+ url=EXPECTED_PACKAGES[0]["url"],
+ )
+ actual_load_status = loader.load()
+ assert actual_load_status["status"] == "eventful"
+ assert actual_load_status["snapshot_id"] is not None
+
+ expected_release_id = "1e2e7226ac9136f2eb7ce28f32ca08fff28590b1"
+
+ release = swh_storage.release_get([hash_to_bytes(expected_release_id)])[0]
+
+ release_swhid = CoreSWHID(
+ object_type=OType.RELEASE, object_id=hash_to_bytes(expected_release_id)
+ )
+ directory_swhid = ExtendedSWHID(
+ object_type=ExtendedObjectType.DIRECTORY, object_id=release.target
+ )
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.FORGE,
+ url="https://pub.dev/",
+ )
+
+ expected_metadata = [
+ RawExtrinsicMetadata(
+ target=directory_swhid,
+ authority=metadata_authority,
+ fetcher=MetadataFetcher(
+ name="swh.loader.package.pubdev.loader.PubDevLoader",
+ version=__version__,
+ metadata=None,
+ ),
+ discovery_date=loader.visit_date,
+ format="original-artifacts-json",
+ metadata=json.dumps(
+ [
+ {
+ "checksums": {
+ "sha256": "ca6149c2bb566b07beaf731930ade8b77fad86055b3f37b6eb2f17aca2fbc1b1", # noqa: B950
+ },
+ "filename": "Autolinker-0.1.1.tar.gz",
+ "url": "https://pub.dartlang.org/packages/Autolinker/versions/0.1.1.tar.gz", # noqa: B950
+ }
+ ]
+ ).encode(),
+ origin=EXPECTED_PACKAGES[0]["url"],
+ release=release_swhid,
+ ),
+ ]
+
+ raw = swh_storage.raw_extrinsic_metadata_get(directory_swhid, metadata_authority)
+ expected_raw = PagedResult(
+ next_page_token=None,
+ results=expected_metadata,
+ )
+
+ assert swh_storage.raw_extrinsic_metadata_get(
+ directory_swhid,
+ metadata_authority,
+ ) == PagedResult(
+ next_page_token=None,
+ results=expected_metadata,
+ )
+
+ for result in expected_raw.results:
+ assert result in raw.results

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 19, 10:09 AM (19 h, 22 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223889

Event Timeline