diff --git a/swh/loader/package/cpan/loader.py b/swh/loader/package/cpan/loader.py index 37d3ee5..cfbcec9 100644 --- a/swh/loader/package/cpan/loader.py +++ b/swh/loader/package/cpan/loader.py @@ -1,135 +1,180 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime import logging from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple import attr import iso8601 from packaging.version import parse as parse_version - -from swh.loader.package.loader import BasePackageInfo, PackageLoader -from swh.loader.package.utils import EMPTY_AUTHOR, Person, release_name -from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone +from requests import HTTPError + +from swh.loader.package.loader import ( + BasePackageInfo, + PackageLoader, + RawExtrinsicMetadataCore, +) +from swh.loader.package.utils import EMPTY_AUTHOR, Person, get_url_body, release_name +from swh.model.model import ( + MetadataAuthority, + MetadataAuthorityType, + ObjectType, + Release, + Sha1Git, + TimestampWithTimezone, +) from swh.storage.interface import StorageInterface logger = logging.getLogger(__name__) @attr.s class CpanPackageInfo(BasePackageInfo): name = attr.ib(type=str) """Name of the package""" version = attr.ib(type=str) """Current version""" last_modified = attr.ib(type=datetime) """File last modified date as release date.""" author = attr.ib(type=Person) """Author""" class CpanLoader(PackageLoader[CpanPackageInfo]): visit_type = "cpan" + EXTRINSIC_METADATA_URL_PATTERN = "{api_base_url}/release/{author}/{release_name}" + def __init__( self, storage: StorageInterface, url: str, api_base_url: str, artifacts: List[Dict[str, Any]], module_metadata: List[Dict[str, Any]], **kwargs, ): super().__init__(storage=storage, url=url, **kwargs) self.url = url self.api_base_url = api_base_url self.artifacts: Dict[str, Dict] = { artifact["version"]: {k: v for k, v in artifact.items() if k != "version"} for artifact in artifacts } self.module_metadata: Dict[str, Dict] = { meta["version"]: meta for meta in module_metadata } + def get_metadata_authority(self): + return MetadataAuthority( + type=MetadataAuthorityType.FORGE, + url="https://metacpan.org/", + ) + def get_versions(self) -> Sequence[str]: """Get all released versions of a Perl package Returns: A sequence of versions Example:: ["0.1.1", "0.10.2"] """ versions = list(self.artifacts.keys()) versions.sort(key=parse_version) return versions def get_default_version(self) -> str: """Get the newest release version of a Perl package Returns: A string representing a version Example:: "0.10.2" """ return self.get_versions()[-1] def get_package_info(self, version: str) -> Iterator[Tuple[str, CpanPackageInfo]]: """Get release name and package information from version Args: version: Package version (e.g: "0.1.0") Returns: Iterator of tuple (release_name, p_info) """ artifact = self.artifacts[version] metadata = self.module_metadata[version] last_modified = iso8601.parse_date(metadata["date"]) author = ( Person.from_fullname(metadata["author"].encode()) if metadata["author"] else EMPTY_AUTHOR ) + try: + extrinsic_metadata_url = self.EXTRINSIC_METADATA_URL_PATTERN.format( + api_base_url=self.api_base_url, + author=metadata["cpan_author"], + release_name=metadata["release_name"], + ) + version_extrinsic_metadata = get_url_body(extrinsic_metadata_url) + except HTTPError: + logger.warning( + "Could not fetch extrinsic_metadata for module %s version %s", + metadata["name"], + version, + ) + version_extrinsic_metadata = None + + directory_extrinsic_metadata = [] + if version_extrinsic_metadata: + directory_extrinsic_metadata.append( + RawExtrinsicMetadataCore( + format="cpan-release-json", + metadata=version_extrinsic_metadata, + ) + ) + p_info = CpanPackageInfo( name=metadata["name"], filename=artifact["filename"], url=artifact["url"], version=version, last_modified=last_modified, author=author, checksums=artifact["checksums"], + directory_extrinsic_metadata=directory_extrinsic_metadata, ) yield release_name(version), p_info def build_release( self, p_info: CpanPackageInfo, uncompressed_path: str, directory: Sha1Git ) -> Optional[Release]: message = ( f"Synthetic release for Perl source package {p_info.name} " f"version {p_info.version}\n" ) return Release( name=p_info.version.encode(), author=p_info.author, date=TimestampWithTimezone.from_datetime(p_info.last_modified), message=message.encode(), target_type=ObjectType.DIRECTORY, target=directory, synthetic=True, ) diff --git a/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_JJORE_Internals-CountObjects-0.01 b/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_JJORE_Internals-CountObjects-0.01 new file mode 100644 index 0000000..92b2ead --- /dev/null +++ b/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_JJORE_Internals-CountObjects-0.01 @@ -0,0 +1,89 @@ +{ + "total" : 1, + "took" : 2, + "release" : { + "date" : "2011-06-05T18:44:02", + "download_url" : "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.01.tar.gz", + "status" : "cpan", + "deprecated" : false, + "archive" : "Internals-CountObjects-0.01.tar.gz", + "stat" : { + "mode" : 33188, + "mtime" : 1307299442, + "gid" : 1009, + "uid" : 1009, + "size" : 52541 + }, + "version_numified" : 0.01, + "abstract" : "Report all allocated perl objects", + "maturity" : "released", + "checksum_sha256" : "c4904fc34954f18783b15bc1424eda032090fb20efa98d8f2e42c3d3ad153376", + "id" : "_YhX3DZlS7XliSAheYHR8UibjPg", + "metadata" : { + "no_index" : { + "directory" : [ + "t", + "xt", + "inc", + "local", + "perl5", + "fatlib", + "example", + "blib", + "examples", + "eg" + ] + }, + "generated_by" : "Dist::Zilla version 4.200000, CPAN::Meta::Converter version 2.102400, CPAN::Meta::Converter version 2.150005", + "license" : [ + "perl_5" + ], + "release_status" : "stable", + "name" : "Internals-CountObjects", + "author" : [ + "Josh Jore " + ], + "dynamic_config" : "0", + "abstract" : "Report all allocated perl objects", + "prereqs" : { + "configure" : { + "requires" : { + "ExtUtils::MakeMaker" : "6.31" + } + }, + "build" : { + "requires" : {} + } + }, + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : "2" + }, + "version" : "0.01" + }, + "tests" : { + "na" : 0, + "pass" : 115, + "fail" : 0, + "unknown" : 34 + }, + "dependency" : { + "relationship" : "requires", + "version" : "6.31", + "module" : "ExtUtils::MakeMaker", + "phase" : "configure" + }, + "main_module" : "Internals::CountObjects", + "authorized" : true, + "license" : "perl_5", + "provides" : "Internals::CountObjects", + "distribution" : "Internals-CountObjects", + "checksum_md5" : "f178444dad69f126db79ebd76c4e95bd", + "changes_file" : "", + "version" : "0.01", + "author" : "JJORE", + "name" : "Internals-CountObjects-0.01", + "first" : true, + "resources" : {} + } +} diff --git a/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_JJORE_Internals-CountObjects-0.05 b/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_JJORE_Internals-CountObjects-0.05 new file mode 100644 index 0000000..325b4ce --- /dev/null +++ b/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_JJORE_Internals-CountObjects-0.05 @@ -0,0 +1,109 @@ +{ + "release" : { + "provides" : "Internals::CountObjects", + "distribution" : "Internals-CountObjects", + "authorized" : true, + "license" : "perl_5", + "author" : "JJORE", + "name" : "Internals-CountObjects-0.05", + "first" : false, + "resources" : { + "bugtracker" : { + "web" : "http://rt.cpan.org/NoAuth/Bugs.html?Dist=Internals-CountObjects", + "mailto" : "bug-Internals-CountObjects@rt.cpan.org" + }, + "repository" : { + "web" : "http://github.com/jbenjore/Internals-CountObjects", + "url" : "git://github.com/jbenjore/Internals-CountObjects.git", + "type" : "git" + }, + "homepage" : "http://search.cpan.org/dist/Internals-CountObjects" + }, + "checksum_md5" : "ee751810f504b5a463397f22634467a7", + "version" : "0.05", + "changes_file" : "", + "stat" : { + "size" : 54473, + "mtime" : 1307769811, + "uid" : 1009, + "gid" : 1009, + "mode" : 33188 + }, + "status" : "latest", + "archive" : "Internals-CountObjects-0.05.tar.gz", + "deprecated" : false, + "date" : "2011-06-11T05:23:31", + "download_url" : "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.05.tar.gz", + "metadata" : { + "license" : [ + "perl_5" + ], + "release_status" : "stable", + "generated_by" : "Dist::Zilla version 4.200000, CPAN::Meta::Converter version 2.102400, CPAN::Meta::Converter version 2.150005", + "no_index" : { + "directory" : [ + "t", + "xt", + "inc", + "local", + "perl5", + "fatlib", + "example", + "blib", + "examples", + "eg" + ] + }, + "version" : "0.05", + "meta-spec" : { + "version" : "2", + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec" + }, + "prereqs" : { + "build" : { + "requires" : { + "ExtUtils::CBuilder" : "0" + } + } + }, + "abstract" : "Report all allocated perl objects", + "resources" : { + "repository" : { + "url" : "git://github.com/jbenjore/Internals-CountObjects.git", + "web" : "http://github.com/jbenjore/Internals-CountObjects", + "type" : "git" + }, + "homepage" : "http://search.cpan.org/dist/Internals-CountObjects", + "bugtracker" : { + "web" : "http://rt.cpan.org/NoAuth/Bugs.html?Dist=Internals-CountObjects", + "mailto" : "bug-Internals-CountObjects@rt.cpan.org" + } + }, + "author" : [ + "Josh Jore " + ], + "dynamic_config" : 0, + "name" : "Internals-CountObjects" + }, + "dependency" : { + "phase" : "build", + "relationship" : "requires", + "version" : "0", + "module" : "ExtUtils::CBuilder" + }, + "tests" : { + "pass" : 491, + "fail" : 0, + "unknown" : 1, + "na" : 0 + }, + "main_module" : "Internals::CountObjects", + "abstract" : "Report all allocated perl objects", + "version_numified" : 0.05, + "maturity" : "released", + "id" : "D3RywifomVjSA3VV8eM_huWKfrk", + "checksum_sha256" : "bbf65021207a7a51c8f8475bc25c4735f49d62744a75d33595e9720731b2b02f" + }, + "took" : 2, + "total" : 1 +} diff --git a/swh/loader/package/cpan/tests/test_cpan.py b/swh/loader/package/cpan/tests/test_cpan.py index afd809f..f865372 100644 --- a/swh/loader/package/cpan/tests/test_cpan.py +++ b/swh/loader/package/cpan/tests/test_cpan.py @@ -1,156 +1,209 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # flake8: noqa: B950 +import json +from pathlib import Path + import pytest from swh.loader.package import __version__ from swh.loader.package.cpan.loader import CpanLoader from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats from swh.model.hashutil import hash_to_bytes from swh.model.model import ( Person, + RawExtrinsicMetadata, Release, Snapshot, SnapshotBranch, TargetType, TimestampWithTimezone, ) +from swh.model.model import MetadataFetcher from swh.model.model import ObjectType as ModelObjectType +from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType ORIGIN_URL = "https://metacpan.org/dist/Internals-CountObjects" API_BASE_URL = "https://fastapi.metacpan.org/v1" ORIGIN_ARTIFACTS = [ { "url": "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.05.tar.gz", "filename": "CountObjects-0.05.tar.gz", "version": "0.05", "length": 632, "checksums": { "sha256": "e0ecf6ab4873fa55ff74da22a3c4ae0ab6a1409635c9cd2d6059abbb32be3a6a" }, }, { "url": "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.01.tar.gz", "filename": "CountObjects-0.01.tar.gz", "version": "0.01", "length": 453, "checksums": { "sha256": "a368004ab98c5860a8fd87e0a4c44e4ee2d1b95d9b13597519a0e644c167468a" }, }, ] ORIGIN_MODULE_METADATA = [ { "name": "Internals-CountObjects", "version": "0.05", "author": "Josh Jore ", "cpan_author": "JJORE", "date": "2011-06-11T05:23:31", "release_name": "Internals-CountObjects-0.05", }, { "name": "Internals-CountObjects", "version": "0.01", "author": "Josh Jore ", "cpan_author": "JJORE", "date": "2011-06-05T18:44:02", "release_name": "Internals-CountObjects-0.01", }, ] +@pytest.fixture +def head_release_original_artifacts_metadata(): + return json.dumps( + [{k: v for k, v in ORIGIN_ARTIFACTS[0].items() if k != "version"}] + ).encode() + + +@pytest.fixture +def head_release_extrinsic_metadata(datadir): + return Path( + datadir, + "https_fastapi.metacpan.org", + "v1_release_JJORE_Internals-CountObjects-0.05", + ).read_bytes() + + @pytest.fixture def cpan_loader(requests_mock_datadir, swh_storage): return CpanLoader( swh_storage, url=ORIGIN_URL, api_base_url=API_BASE_URL, artifacts=ORIGIN_ARTIFACTS, module_metadata=ORIGIN_MODULE_METADATA, ) def test_get_versions(cpan_loader): assert cpan_loader.get_versions() == ["0.01", "0.05"] def test_get_default_version(cpan_loader): assert cpan_loader.get_default_version() == "0.05" -def test_cpan_loader_load_multiple_version(cpan_loader): +def test_cpan_loader_load_multiple_version( + cpan_loader, + head_release_original_artifacts_metadata, + head_release_extrinsic_metadata, +): load_status = cpan_loader.load() assert load_status["status"] == "eventful" assert load_status["snapshot_id"] is not None expected_snapshot_id = "848ee8d69d33481c88ab81f6794f6504190f011f" expected_head_release = "07382fd255ec0fc293b92aeb7e68b3fe31c174f9" assert expected_snapshot_id == load_status["snapshot_id"] expected_snapshot = Snapshot( id=hash_to_bytes(load_status["snapshot_id"]), branches={ b"releases/0.01": SnapshotBranch( target=hash_to_bytes("e73aced4cc3d56b32a328d3248b25b052f029df4"), target_type=TargetType.RELEASE, ), b"releases/0.05": SnapshotBranch( target=hash_to_bytes(expected_head_release), target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( target=b"releases/0.05", target_type=TargetType.ALIAS, ), }, ) storage = cpan_loader.storage check_snapshot(expected_snapshot, storage) stats = get_stats(storage) assert { "content": 2, "directory": 4, "origin": 1, "origin_visit": 1, "release": 2, "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats head_release = storage.release_get([hash_to_bytes(expected_head_release)])[0] assert head_release == Release( name=b"0.05", message=b"Synthetic release for Perl source package Internals-CountObjects version 0.05\n", target=hash_to_bytes("af3f6a43eaf4b26dbcadb1101e8d81db6d6151e0"), target_type=ModelObjectType.DIRECTORY, synthetic=True, author=Person( fullname=b"Josh Jore ", name=b"Josh Jore", email=b"jjore@cpan.org", ), date=TimestampWithTimezone.from_iso8601("2011-06-11T05:23:31+00:00"), id=hash_to_bytes(expected_head_release), ) assert_last_visit_matches( storage, url=ORIGIN_URL, status="full", type="cpan", snapshot=expected_snapshot.id, ) + + release_swhid = CoreSWHID(object_type=ObjectType.RELEASE, object_id=head_release.id) + directory_swhid = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=head_release.target + ) + expected_metadata = [ + RawExtrinsicMetadata( + target=directory_swhid, + authority=cpan_loader.get_metadata_authority(), + fetcher=MetadataFetcher( + name="swh.loader.package.cpan.loader.CpanLoader", + version=__version__, + ), + discovery_date=cpan_loader.visit_date, + format="cpan-release-json", + metadata=head_release_extrinsic_metadata, + origin=ORIGIN_URL, + release=release_swhid, + ), + ] + assert ( + cpan_loader.storage.raw_extrinsic_metadata_get( + directory_swhid, + cpan_loader.get_metadata_authority(), + ).results + == expected_metadata + )