Page MenuHomeSoftware Heritage

D3677.id12947.diff
No OneTemporary

D3677.id12947.diff

diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py
--- a/swh/loader/package/pypi/loader.py
+++ b/swh/loader/package/pypi/loader.py
@@ -13,6 +13,8 @@
from pkginfo import UnpackedSDist
from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
Person,
Sha1Git,
TimestampWithTimezone,
@@ -20,7 +22,11 @@
RevisionType,
)
-from swh.loader.package.loader import BasePackageInfo, PackageLoader
+from swh.loader.package.loader import (
+ BasePackageInfo,
+ PackageLoader,
+ RawExtrinsicMetadataCore,
+)
from swh.loader.package.utils import api_info, release_name, EMPTY_AUTHOR
logger = logging.getLogger(__name__)
@@ -64,7 +70,8 @@
"""
if not self._info:
- self._info = json.loads(api_info(self.provider_url))
+ self._raw_info = api_info(self.provider_url)
+ self._info = json.loads(self._raw_info)
return self._info
def get_versions(self) -> Sequence[str]:
@@ -73,6 +80,21 @@
def get_default_version(self) -> str:
return self.info["info"]["version"]
+ def get_metadata_authority(self):
+ p_url = urlparse(self.url)
+ return MetadataAuthority(
+ type=MetadataAuthorityType.FORGE,
+ url=f"{p_url.scheme}://{p_url.netloc}/",
+ metadata={},
+ )
+
+ def get_extrinsic_snapshot_metadata(self):
+ return [
+ RawExtrinsicMetadataCore(
+ format="pypi-project-json", metadata=self._raw_info,
+ ),
+ ]
+
def get_package_info(self, version: str) -> Iterator[Tuple[str, PyPIPackageInfo]]:
res = []
for meta in self.info["releases"][version]:
diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py
--- a/swh/loader/package/pypi/tests/test_pypi.py
+++ b/swh/loader/package/pypi/tests/test_pypi.py
@@ -13,9 +13,22 @@
from swh.core.tarball import uncompress
from swh.core.pytest_plugin import requests_mock_datadir_factory
-from swh.model.hashutil import hash_to_bytes
-from swh.model.model import Person, Snapshot, SnapshotBranch, TargetType
+from swh.model.hashutil import hash_to_bytes, hash_to_hex
+from swh.model.identifiers import SWHID
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ Person,
+ RawExtrinsicMetadata,
+ Snapshot,
+ SnapshotBranch,
+ TargetType,
+)
+from swh.storage.interface import PagedResult
+from swh.loader.package import __version__
from swh.loader.package.pypi.loader import (
PyPILoader,
pypi_api_url,
@@ -31,6 +44,14 @@
)
+@pytest.fixture
+def _0805nexter_api_info(datadir) -> bytes:
+ with open(
+ os.path.join(datadir, "https_pypi.org", "pypi_0805nexter_json"), "rb",
+ ) as f:
+ return f.read()
+
+
def test_author_basic():
data = {
"author": "i-am-groot",
@@ -315,6 +336,41 @@
)
+def test_snapshot_metadata(swh_config, requests_mock_datadir, _0805nexter_api_info):
+ url = "https://pypi.org/project/0805nexter"
+ loader = PyPILoader(url)
+
+ actual_load_status = loader.load()
+ assert actual_load_status["status"] == "eventful"
+ assert actual_load_status["snapshot_id"] is not None
+
+ snapshot_swhid = SWHID(
+ object_type="snapshot", object_id=hash_to_hex(actual_load_status["snapshot_id"])
+ )
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.FORGE, url="https://pypi.org/",
+ )
+ expected_metadata = [
+ RawExtrinsicMetadata(
+ type=MetadataTargetType.SNAPSHOT,
+ id=snapshot_swhid,
+ authority=metadata_authority,
+ fetcher=MetadataFetcher(
+ name="swh.loader.package.pypi.loader.PyPILoader", version=__version__,
+ ),
+ discovery_date=loader.visit_date,
+ format="pypi-project-json",
+ metadata=_0805nexter_api_info,
+ origin=url,
+ )
+ ]
+ assert loader.storage.raw_extrinsic_metadata_get(
+ type=MetadataTargetType.SNAPSHOT,
+ id=snapshot_swhid,
+ authority=metadata_authority,
+ ) == PagedResult(next_page_token=None, results=expected_metadata,)
+
+
def test_visit_with_missing_artifact(swh_config, requests_mock_datadir_missing_one):
"""Load a pypi project with some missing artifacts ends up with 1 snapshot

File Metadata

Mime Type
text/plain
Expires
Wed, Dec 18, 10:44 PM (22 h, 30 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220668

Event Timeline