diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py --- a/swh/loader/package/pypi/loader.py +++ b/swh/loader/package/pypi/loader.py @@ -13,6 +13,8 @@ from pkginfo import UnpackedSDist from swh.model.model import ( + MetadataAuthority, + MetadataAuthorityType, Person, Sha1Git, TimestampWithTimezone, @@ -20,7 +22,11 @@ RevisionType, ) -from swh.loader.package.loader import BasePackageInfo, PackageLoader +from swh.loader.package.loader import ( + BasePackageInfo, + PackageLoader, + RawExtrinsicMetadataCore, +) from swh.loader.package.utils import api_info, release_name, EMPTY_AUTHOR logger = logging.getLogger(__name__) @@ -64,7 +70,8 @@ """ if not self._info: - self._info = json.loads(api_info(self.provider_url)) + self._raw_info = api_info(self.provider_url) + self._info = json.loads(self._raw_info) return self._info def get_versions(self) -> Sequence[str]: @@ -73,6 +80,21 @@ def get_default_version(self) -> str: return self.info["info"]["version"] + def get_metadata_authority(self): + p_url = urlparse(self.url) + return MetadataAuthority( + type=MetadataAuthorityType.FORGE, + url=f"{p_url.scheme}://{p_url.netloc}/", + metadata={}, + ) + + def get_extrinsic_snapshot_metadata(self): + return [ + RawExtrinsicMetadataCore( + format="pypi-project-json", metadata=self._raw_info, + ), + ] + def get_package_info(self, version: str) -> Iterator[Tuple[str, PyPIPackageInfo]]: res = [] for meta in self.info["releases"][version]: diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py --- a/swh/loader/package/pypi/tests/test_pypi.py +++ b/swh/loader/package/pypi/tests/test_pypi.py @@ -13,9 +13,22 @@ from swh.core.tarball import uncompress from swh.core.pytest_plugin import requests_mock_datadir_factory -from swh.model.hashutil import hash_to_bytes -from swh.model.model import Person, Snapshot, SnapshotBranch, TargetType +from swh.model.hashutil import hash_to_bytes, hash_to_hex +from swh.model.identifiers import SWHID +from swh.model.model import ( + MetadataAuthority, + MetadataAuthorityType, + MetadataFetcher, + MetadataTargetType, + Person, + RawExtrinsicMetadata, + Snapshot, + SnapshotBranch, + TargetType, +) +from swh.storage.interface import PagedResult +from swh.loader.package import __version__ from swh.loader.package.pypi.loader import ( PyPILoader, pypi_api_url, @@ -31,6 +44,14 @@ ) +@pytest.fixture +def _0805nexter_api_info(datadir) -> bytes: + with open( + os.path.join(datadir, "https_pypi.org", "pypi_0805nexter_json"), "rb", + ) as f: + return f.read() + + def test_author_basic(): data = { "author": "i-am-groot", @@ -315,6 +336,41 @@ ) +def test_snapshot_metadata(swh_config, requests_mock_datadir, _0805nexter_api_info): + url = "https://pypi.org/project/0805nexter" + loader = PyPILoader(url) + + actual_load_status = loader.load() + assert actual_load_status["status"] == "eventful" + assert actual_load_status["snapshot_id"] is not None + + snapshot_swhid = SWHID( + object_type="snapshot", object_id=hash_to_hex(actual_load_status["snapshot_id"]) + ) + metadata_authority = MetadataAuthority( + type=MetadataAuthorityType.FORGE, url="https://pypi.org/", + ) + expected_metadata = [ + RawExtrinsicMetadata( + type=MetadataTargetType.SNAPSHOT, + id=snapshot_swhid, + authority=metadata_authority, + fetcher=MetadataFetcher( + name="swh.loader.package.pypi.loader.PyPILoader", version=__version__, + ), + discovery_date=loader.visit_date, + format="pypi-project-json", + metadata=_0805nexter_api_info, + origin=url, + ) + ] + assert loader.storage.raw_extrinsic_metadata_get( + type=MetadataTargetType.SNAPSHOT, + id=snapshot_swhid, + authority=metadata_authority, + ) == PagedResult(next_page_token=None, results=expected_metadata,) + + def test_visit_with_missing_artifact(swh_config, requests_mock_datadir_missing_one): """Load a pypi project with some missing artifacts ends up with 1 snapshot