diff --git a/swh/loader/package/cpan/loader.py b/swh/loader/package/cpan/loader.py --- a/swh/loader/package/cpan/loader.py +++ b/swh/loader/package/cpan/loader.py @@ -10,10 +10,22 @@ import attr import iso8601 from packaging.version import parse as parse_version - -from swh.loader.package.loader import BasePackageInfo, PackageLoader -from swh.loader.package.utils import EMPTY_AUTHOR, Person, release_name -from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone +from requests import HTTPError + +from swh.loader.package.loader import ( + BasePackageInfo, + PackageLoader, + RawExtrinsicMetadataCore, +) +from swh.loader.package.utils import EMPTY_AUTHOR, Person, get_url_body, release_name +from swh.model.model import ( + MetadataAuthority, + MetadataAuthorityType, + ObjectType, + Release, + Sha1Git, + TimestampWithTimezone, +) from swh.storage.interface import StorageInterface logger = logging.getLogger(__name__) @@ -38,6 +50,8 @@ class CpanLoader(PackageLoader[CpanPackageInfo]): visit_type = "cpan" + EXTRINSIC_METADATA_URL_PATTERN = "{api_base_url}/release/{author}/{release_name}" + def __init__( self, storage: StorageInterface, @@ -59,6 +73,12 @@ meta["version"]: meta for meta in module_metadata } + def get_metadata_authority(self): + return MetadataAuthority( + type=MetadataAuthorityType.FORGE, + url="https://metacpan.org/", + ) + def get_versions(self) -> Sequence[str]: """Get all released versions of a Perl package @@ -104,6 +124,30 @@ else EMPTY_AUTHOR ) + try: + extrinsic_metadata_url = self.EXTRINSIC_METADATA_URL_PATTERN.format( + api_base_url=self.api_base_url, + author=metadata["cpan_author"], + release_name=metadata["release_name"], + ) + version_extrinsic_metadata = get_url_body(extrinsic_metadata_url) + except HTTPError: + logger.warning( + "Could not fetch extrinsic_metadata for module %s version %s", + metadata["name"], + version, + ) + version_extrinsic_metadata = None + + directory_extrinsic_metadata = [] + if version_extrinsic_metadata: + directory_extrinsic_metadata.append( + RawExtrinsicMetadataCore( + format="cpan-release-json", + metadata=version_extrinsic_metadata, + ) + ) + p_info = CpanPackageInfo( name=metadata["name"], filename=artifact["filename"], @@ -112,6 +156,7 @@ last_modified=last_modified, author=author, checksums=artifact["checksums"], + directory_extrinsic_metadata=directory_extrinsic_metadata, ) yield release_name(version), p_info diff --git a/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_JJORE_Internals-CountObjects-0.01 b/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_JJORE_Internals-CountObjects-0.01 new file mode 100644 --- /dev/null +++ b/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_JJORE_Internals-CountObjects-0.01 @@ -0,0 +1,89 @@ +{ + "total" : 1, + "took" : 2, + "release" : { + "date" : "2011-06-05T18:44:02", + "download_url" : "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.01.tar.gz", + "status" : "cpan", + "deprecated" : false, + "archive" : "Internals-CountObjects-0.01.tar.gz", + "stat" : { + "mode" : 33188, + "mtime" : 1307299442, + "gid" : 1009, + "uid" : 1009, + "size" : 52541 + }, + "version_numified" : 0.01, + "abstract" : "Report all allocated perl objects", + "maturity" : "released", + "checksum_sha256" : "c4904fc34954f18783b15bc1424eda032090fb20efa98d8f2e42c3d3ad153376", + "id" : "_YhX3DZlS7XliSAheYHR8UibjPg", + "metadata" : { + "no_index" : { + "directory" : [ + "t", + "xt", + "inc", + "local", + "perl5", + "fatlib", + "example", + "blib", + "examples", + "eg" + ] + }, + "generated_by" : "Dist::Zilla version 4.200000, CPAN::Meta::Converter version 2.102400, CPAN::Meta::Converter version 2.150005", + "license" : [ + "perl_5" + ], + "release_status" : "stable", + "name" : "Internals-CountObjects", + "author" : [ + "Josh Jore " + ], + "dynamic_config" : "0", + "abstract" : "Report all allocated perl objects", + "prereqs" : { + "configure" : { + "requires" : { + "ExtUtils::MakeMaker" : "6.31" + } + }, + "build" : { + "requires" : {} + } + }, + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : "2" + }, + "version" : "0.01" + }, + "tests" : { + "na" : 0, + "pass" : 115, + "fail" : 0, + "unknown" : 34 + }, + "dependency" : { + "relationship" : "requires", + "version" : "6.31", + "module" : "ExtUtils::MakeMaker", + "phase" : "configure" + }, + "main_module" : "Internals::CountObjects", + "authorized" : true, + "license" : "perl_5", + "provides" : "Internals::CountObjects", + "distribution" : "Internals-CountObjects", + "checksum_md5" : "f178444dad69f126db79ebd76c4e95bd", + "changes_file" : "", + "version" : "0.01", + "author" : "JJORE", + "name" : "Internals-CountObjects-0.01", + "first" : true, + "resources" : {} + } +} diff --git a/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_JJORE_Internals-CountObjects-0.05 b/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_JJORE_Internals-CountObjects-0.05 new file mode 100644 --- /dev/null +++ b/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_JJORE_Internals-CountObjects-0.05 @@ -0,0 +1,109 @@ +{ + "release" : { + "provides" : "Internals::CountObjects", + "distribution" : "Internals-CountObjects", + "authorized" : true, + "license" : "perl_5", + "author" : "JJORE", + "name" : "Internals-CountObjects-0.05", + "first" : false, + "resources" : { + "bugtracker" : { + "web" : "http://rt.cpan.org/NoAuth/Bugs.html?Dist=Internals-CountObjects", + "mailto" : "bug-Internals-CountObjects@rt.cpan.org" + }, + "repository" : { + "web" : "http://github.com/jbenjore/Internals-CountObjects", + "url" : "git://github.com/jbenjore/Internals-CountObjects.git", + "type" : "git" + }, + "homepage" : "http://search.cpan.org/dist/Internals-CountObjects" + }, + "checksum_md5" : "ee751810f504b5a463397f22634467a7", + "version" : "0.05", + "changes_file" : "", + "stat" : { + "size" : 54473, + "mtime" : 1307769811, + "uid" : 1009, + "gid" : 1009, + "mode" : 33188 + }, + "status" : "latest", + "archive" : "Internals-CountObjects-0.05.tar.gz", + "deprecated" : false, + "date" : "2011-06-11T05:23:31", + "download_url" : "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.05.tar.gz", + "metadata" : { + "license" : [ + "perl_5" + ], + "release_status" : "stable", + "generated_by" : "Dist::Zilla version 4.200000, CPAN::Meta::Converter version 2.102400, CPAN::Meta::Converter version 2.150005", + "no_index" : { + "directory" : [ + "t", + "xt", + "inc", + "local", + "perl5", + "fatlib", + "example", + "blib", + "examples", + "eg" + ] + }, + "version" : "0.05", + "meta-spec" : { + "version" : "2", + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec" + }, + "prereqs" : { + "build" : { + "requires" : { + "ExtUtils::CBuilder" : "0" + } + } + }, + "abstract" : "Report all allocated perl objects", + "resources" : { + "repository" : { + "url" : "git://github.com/jbenjore/Internals-CountObjects.git", + "web" : "http://github.com/jbenjore/Internals-CountObjects", + "type" : "git" + }, + "homepage" : "http://search.cpan.org/dist/Internals-CountObjects", + "bugtracker" : { + "web" : "http://rt.cpan.org/NoAuth/Bugs.html?Dist=Internals-CountObjects", + "mailto" : "bug-Internals-CountObjects@rt.cpan.org" + } + }, + "author" : [ + "Josh Jore " + ], + "dynamic_config" : 0, + "name" : "Internals-CountObjects" + }, + "dependency" : { + "phase" : "build", + "relationship" : "requires", + "version" : "0", + "module" : "ExtUtils::CBuilder" + }, + "tests" : { + "pass" : 491, + "fail" : 0, + "unknown" : 1, + "na" : 0 + }, + "main_module" : "Internals::CountObjects", + "abstract" : "Report all allocated perl objects", + "version_numified" : 0.05, + "maturity" : "released", + "id" : "D3RywifomVjSA3VV8eM_huWKfrk", + "checksum_sha256" : "bbf65021207a7a51c8f8475bc25c4735f49d62744a75d33595e9720731b2b02f" + }, + "took" : 2, + "total" : 1 +} diff --git a/swh/loader/package/cpan/tests/test_cpan.py b/swh/loader/package/cpan/tests/test_cpan.py --- a/swh/loader/package/cpan/tests/test_cpan.py +++ b/swh/loader/package/cpan/tests/test_cpan.py @@ -5,6 +5,9 @@ # flake8: noqa: B950 +import json +from pathlib import Path + import pytest from swh.loader.package import __version__ @@ -13,13 +16,16 @@ from swh.model.hashutil import hash_to_bytes from swh.model.model import ( Person, + RawExtrinsicMetadata, Release, Snapshot, SnapshotBranch, TargetType, TimestampWithTimezone, ) +from swh.model.model import MetadataFetcher from swh.model.model import ObjectType as ModelObjectType +from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType ORIGIN_URL = "https://metacpan.org/dist/Internals-CountObjects" @@ -66,6 +72,22 @@ ] +@pytest.fixture +def head_release_original_artifacts_metadata(): + return json.dumps( + [{k: v for k, v in ORIGIN_ARTIFACTS[0].items() if k != "version"}] + ).encode() + + +@pytest.fixture +def head_release_extrinsic_metadata(datadir): + return Path( + datadir, + "https_fastapi.metacpan.org", + "v1_release_JJORE_Internals-CountObjects-0.05", + ).read_bytes() + + @pytest.fixture def cpan_loader(requests_mock_datadir, swh_storage): return CpanLoader( @@ -85,7 +107,11 @@ assert cpan_loader.get_default_version() == "0.05" -def test_cpan_loader_load_multiple_version(cpan_loader): +def test_cpan_loader_load_multiple_version( + cpan_loader, + head_release_original_artifacts_metadata, + head_release_extrinsic_metadata, +): load_status = cpan_loader.load() assert load_status["status"] == "eventful" @@ -154,3 +180,30 @@ type="cpan", snapshot=expected_snapshot.id, ) + + release_swhid = CoreSWHID(object_type=ObjectType.RELEASE, object_id=head_release.id) + directory_swhid = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=head_release.target + ) + expected_metadata = [ + RawExtrinsicMetadata( + target=directory_swhid, + authority=cpan_loader.get_metadata_authority(), + fetcher=MetadataFetcher( + name="swh.loader.package.cpan.loader.CpanLoader", + version=__version__, + ), + discovery_date=cpan_loader.visit_date, + format="cpan-release-json", + metadata=head_release_extrinsic_metadata, + origin=ORIGIN_URL, + release=release_swhid, + ), + ] + assert ( + cpan_loader.storage.raw_extrinsic_metadata_get( + directory_swhid, + cpan_loader.get_metadata_authority(), + ).results + == expected_metadata + )