diff --git a/swh/loader/package/pubdev/loader.py b/swh/loader/package/pubdev/loader.py --- a/swh/loader/package/pubdev/loader.py +++ b/swh/loader/package/pubdev/loader.py @@ -3,12 +3,16 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json -from typing import Dict, Iterator, Optional, Sequence, Tuple +from typing import Any, Dict, Iterator, Optional, Sequence, Tuple import attr from packaging.version import parse as parse_version -from swh.loader.package.loader import BasePackageInfo, PackageLoader +from swh.loader.package.loader import ( + BasePackageInfo, + PackageLoader, + RawExtrinsicMetadataCore, +) from swh.loader.package.utils import ( EMPTY_AUTHOR, Person, @@ -16,7 +20,14 @@ get_url_body, release_name, ) -from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone +from swh.model.model import ( + MetadataAuthority, + MetadataAuthorityType, + ObjectType, + Release, + Sha1Git, + TimestampWithTimezone, +) from swh.storage.interface import StorageInterface @@ -57,7 +68,9 @@ @cached_method def info(self) -> Dict: - """Return the project metadata information (fetched from pub.dev registry)""" + """Return the project metadata information (fetched from pub.dev registry) with + url pattern `{PUBDEV_BASE_URL}api/packages/{pkgname}` + """ # Use strict=False in order to correctly manage case where \n is present in a string info = json.loads(get_url_body(self.package_info_url), strict=False) # Arrange versions list as a new dict with `version` as key @@ -92,10 +105,22 @@ latest = self.info()["latest"] return latest["version"] + def get_metadata_authority(self): + return MetadataAuthority( + type=MetadataAuthorityType.FORGE, + url=self.PUBDEV_BASE_URL, + ) + def get_package_info(self, version: str) -> Iterator[Tuple[str, PubDevPackageInfo]]: """Get release name and package information from version - Package info comes from extrinsic metadata (from self.info()) + Package info comes from extrinsic metadata (from self.info()), + `directory_extrinsic_metadata` is populated with two format, + `original-artifacts-json` and `pubdev-pubspec-json`. + + The `pubdev-pubspec-json` format is a json object of the pubspec file of the + package returned as json by the api endpoint. See `pubspec` specifications + https://dart.dev/tools/pub/pubspec Args: version: Package version (e.g: "0.1.0") @@ -121,6 +146,15 @@ else: author = EMPTY_AUTHOR + # Artifact extrinsic metadata + artifact: Dict[str, Any] = { + "checksums": checksums, + "filename": filename, + "url": url, + } + # Pubdev metadata (see pubspec specifications https://dart.dev/tools/pub/pubspec ) + pubdev_metadata: Dict[str, Any] = v.get("pubspec", {}) + p_info = PubDevPackageInfo( name=name, filename=filename, @@ -129,6 +163,16 @@ last_modified=last_modified, author=author, checksums=checksums, + directory_extrinsic_metadata=[ + RawExtrinsicMetadataCore( + format="original-artifacts-json", + metadata=json.dumps([artifact]).encode(), + ), + RawExtrinsicMetadataCore( + format="pubdev-pubspec-json", + metadata=json.dumps([pubdev_metadata]).encode(), + ), + ], ) yield release_name(version), p_info diff --git a/swh/loader/package/pubdev/tests/test_pubdev.py b/swh/loader/package/pubdev/tests/test_pubdev.py --- a/swh/loader/package/pubdev/tests/test_pubdev.py +++ b/swh/loader/package/pubdev/tests/test_pubdev.py @@ -3,21 +3,31 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import json + import pytest +from swh.loader.package import __version__ from swh.loader.package.pubdev.loader import PubDevLoader from swh.loader.package.utils import EMPTY_AUTHOR from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats from swh.model.hashutil import hash_to_bytes from swh.model.model import ( + MetadataAuthority, + MetadataAuthorityType, + MetadataFetcher, ObjectType, Person, + RawExtrinsicMetadata, Release, Snapshot, SnapshotBranch, TargetType, TimestampWithTimezone, ) +from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID +from swh.model.swhids import ObjectType as OType +from swh.storage.interface import PagedResult EXPECTED_PACKAGES = [ { @@ -323,3 +333,90 @@ swh_storage, "http://nowhere/api/packages/42", ) + + +def test_pubdev_loader_raw_extrinsic_metadata( + datadir, requests_mock_datadir, swh_storage +): + + loader = PubDevLoader( + swh_storage, + url=EXPECTED_PACKAGES[0]["url"], + ) + actual_load_status = loader.load() + assert actual_load_status["status"] == "eventful" + assert actual_load_status["snapshot_id"] is not None + + expected_release_id = "1e2e7226ac9136f2eb7ce28f32ca08fff28590b1" + + release = swh_storage.release_get([hash_to_bytes(expected_release_id)])[0] + + release_swhid = CoreSWHID( + object_type=OType.RELEASE, object_id=hash_to_bytes(expected_release_id) + ) + directory_swhid = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=release.target + ) + metadata_authority = MetadataAuthority( + type=MetadataAuthorityType.FORGE, + url="https://pub.dev/", + ) + + expected_metadata = [ + RawExtrinsicMetadata( + target=directory_swhid, + authority=metadata_authority, + fetcher=MetadataFetcher( + name="swh.loader.package.pubdev.loader.PubDevLoader", + version=__version__, + metadata=None, + ), + discovery_date=loader.visit_date, + format="original-artifacts-json", + metadata=json.dumps( + [ + { + "checksums": { + "sha256": "ca6149c2bb566b07beaf731930ade8b77fad86055b3f37b6eb2f17aca2fbc1b1", # noqa: B950 + }, + "filename": "Autolinker-0.1.1.tar.gz", + "url": "https://pub.dartlang.org/packages/Autolinker/versions/0.1.1.tar.gz", # noqa: B950 + } + ] + ).encode(), + origin=EXPECTED_PACKAGES[0]["url"], + release=release_swhid, + ), + RawExtrinsicMetadata( + target=directory_swhid, + authority=metadata_authority, + fetcher=MetadataFetcher( + name="swh.loader.package.pubdev.loader.PubDevLoader", + version=__version__, + metadata=None, + ), + discovery_date=loader.visit_date, + format="pubdev-pubspec-json", + metadata=json.dumps( + [ + { + "version": "0.1.1", + "homepage": "https://github.com/hackcave", + "description": "Port of Autolinker.js to dart", + "name": "Autolinker", + "author": "hackcave ", + } + ] + ).encode(), + origin=EXPECTED_PACKAGES[0]["url"], + release=release_swhid, + ), + ] + + raw = swh_storage.raw_extrinsic_metadata_get(directory_swhid, metadata_authority) + expected_raw = PagedResult( + next_page_token=None, + results=expected_metadata, + ) + for result in raw.results: + assert result in expected_raw.results