Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F11023601
D8557.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
18 KB
Subscribers
None
D8557.diff
View Options
diff --git a/docs/package-loader-specifications.rst b/docs/package-loader-specifications.rst
--- a/docs/package-loader-specifications.rst
+++ b/docs/package-loader-specifications.rst
@@ -47,6 +47,15 @@
- ""
- from extra_loader_arguments['aur_metadata']
- Intrinsic metadata extracted from .SRCINFO file of the package
+ * - cpan
+ - ``p_info.version``
+ - ``release_name(version)``
+ - =version
+ - Synthetic release for Perl source package {name} version {version} {description}
+ - true
+ - from intrinsic metadata if any else from extrinsic
+ - from extrinsic metadata
+ - name, version and description from intrinsic metadata
* - cran
- ``metadata.get("Version", passed as arg)``
- ``release_name(version)``
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -58,6 +58,7 @@
loader.arch=swh.loader.package.arch:register
loader.archive=swh.loader.package.archive:register
loader.aur=swh.loader.package.aur:register
+ loader.cpan=swh.loader.package.cpan:register
loader.cran=swh.loader.package.cran:register
loader.crates=swh.loader.package.crates:register
loader.debian=swh.loader.package.debian:register
diff --git a/swh/loader/package/cpan/__init__.py b/swh/loader/package/cpan/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/cpan/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+from typing import Any, Mapping
+
+
+def register() -> Mapping[str, Any]:
+ """Register the current worker module's definition"""
+ from .loader import CpanLoader
+
+ return {
+ "task_modules": [f"{__name__}.tasks"],
+ "loader": CpanLoader,
+ }
diff --git a/swh/loader/package/cpan/loader.py b/swh/loader/package/cpan/loader.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/cpan/loader.py
@@ -0,0 +1,196 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from datetime import datetime
+import json
+from pathlib import Path
+from typing import Any, Dict, Iterator, Optional, Sequence, Tuple
+
+import attr
+import iso8601
+from packaging.version import parse as parse_version
+import yaml
+
+from swh.loader.package.loader import BasePackageInfo, PackageLoader
+from swh.loader.package.utils import (
+ EMPTY_AUTHOR,
+ Person,
+ cached_method,
+ get_url_body,
+ release_name,
+)
+from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone
+from swh.storage.interface import StorageInterface
+
+
+@attr.s
+class CpanPackageInfo(BasePackageInfo):
+
+ name = attr.ib(type=str)
+ """Name of the package"""
+
+ filename = attr.ib(type=str)
+ """Archive (tar.gz) file name"""
+
+ version = attr.ib(type=str)
+ """Current version"""
+
+ last_modified = attr.ib(type=datetime)
+ """File last modified date as release date."""
+
+ author = attr.ib(type=Person)
+ """Author"""
+
+
+def extract_intrinsic_metadata(dir_path: Path) -> Dict[str, Any]:
+ """Extract intrinsic metadata from META.json file at dir_path.
+
+ Each Perl package version has a META.json file at the root of the archive,
+ or a META.yml for older version.
+
+ See https://perldoc.perl.org/CPAN::Meta for META specifications.
+
+ Args:
+ dir_path: A directory on disk where a META.json|.yml can be found
+
+ Returns:
+ A dict mapping from yaml parser
+ """
+ meta_json_path = dir_path / "META.json"
+ metadata: Dict[str, Any] = {}
+ if meta_json_path.exists():
+ metadata = json.loads(meta_json_path.read_text())
+
+ meta_yml_path = dir_path / "META.yml"
+ if meta_yml_path.exists():
+ metadata = yaml.safe_load(meta_yml_path.read_text())
+
+ return metadata
+
+
+class CpanLoader(PackageLoader[CpanPackageInfo]):
+ visit_type = "cpan"
+
+ def __init__(
+ self,
+ storage: StorageInterface,
+ url: str,
+ **kwargs,
+ ):
+
+ super().__init__(storage=storage, url=url, **kwargs)
+ self.url = url
+
+ @cached_method
+ def info_versions(self) -> Dict:
+ """Return the package versions (fetched from
+ ``https://fastapi.metacpan.org/v1/release/versions/{pkgname}``)
+
+ Api documentation https://cpan.haskell.org/api
+ """
+ pkgname = self.url.split("/")[-1]
+ url = f"https://fastapi.metacpan.org/v1/release/versions/{pkgname}"
+ data = json.loads(get_url_body(url=url, headers={"Accept": "application/json"}))
+ return {release["version"]: release for release in data["releases"]}
+
+ def get_versions(self) -> Sequence[str]:
+ """Get all released versions of a Perl package
+
+ Returns:
+ A sequence of versions
+
+ Example::
+
+ ["0.1.1", "0.10.2"]
+ """
+ versions = list(self.info_versions().keys())
+ versions.sort(key=parse_version)
+ return versions
+
+ def get_default_version(self) -> str:
+ """Get the newest release version of a Perl package
+
+ Returns:
+ A string representing a version
+
+ Example::
+
+ "0.10.2"
+ """
+ return self.get_versions()[-1]
+
+ def get_package_info(self, version: str) -> Iterator[Tuple[str, CpanPackageInfo]]:
+ """Get release name and package information from version
+
+ Args:
+ version: Package version (e.g: "0.1.0")
+
+ Returns:
+ Iterator of tuple (release_name, p_info)
+ """
+ data = self.info_versions()[version]
+ pkgname: str = self.url.split("/")[-1]
+ url: str = data["download_url"]
+ filename: str = url.split("/")[-1]
+ # The api does not provide an explicit timezone, defaults to UTC
+ last_modified = iso8601.parse_date(data["date"])
+
+ if "author" in data:
+ author = Person.from_fullname(data["author"].encode())
+ else:
+ author = EMPTY_AUTHOR
+
+ p_info = CpanPackageInfo(
+ name=pkgname,
+ filename=filename,
+ url=url,
+ version=version,
+ last_modified=last_modified,
+ author=author,
+ )
+ yield release_name(version), p_info
+
+ def build_release(
+ self, p_info: CpanPackageInfo, uncompressed_path: str, directory: Sha1Git
+ ) -> Optional[Release]:
+
+ # Extract intrinsic metadata from uncompressed_path/META.json|.yml
+ intrinsic_metadata = extract_intrinsic_metadata(
+ Path(uncompressed_path) / f"{p_info.name}-{p_info.version}"
+ )
+
+ name: str = intrinsic_metadata["name"]
+ assert name == p_info.name
+ version: str = str(intrinsic_metadata["version"])
+ assert version == p_info.version
+
+ description = intrinsic_metadata["abstract"]
+
+ # author data from http endpoint are less complete than from META
+ if "author" in intrinsic_metadata:
+ author_data = intrinsic_metadata["author"]
+ if type(author_data) is list:
+ author = author_data[0]
+ else:
+ author = author_data
+ author = Person.from_fullname(author.encode())
+ else:
+ author = p_info.author
+
+ message = (
+ f"Synthetic release for Perl source package {name} "
+ f"version {version}\n\n"
+ f"{description}\n"
+ )
+
+ return Release(
+ name=version.encode(),
+ author=author,
+ date=TimestampWithTimezone.from_datetime(p_info.last_modified),
+ message=message.encode(),
+ target_type=ObjectType.DIRECTORY,
+ target=directory,
+ synthetic=True,
+ )
diff --git a/swh/loader/package/cpan/tasks.py b/swh/loader/package/cpan/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/cpan/tasks.py
@@ -0,0 +1,14 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from celery import shared_task
+
+from swh.loader.package.cpan.loader import CpanLoader
+
+
+@shared_task(name=__name__ + ".LoadCpan")
+def load_cpan(**kwargs):
+ """Load packages from Cpan (The Comprehensive Perl Archive Network)"""
+ return CpanLoader.from_configfile(**kwargs).load()
diff --git a/swh/loader/package/cpan/tests/__init__.py b/swh/loader/package/cpan/tests/__init__.py
new file mode 100644
diff --git a/swh/loader/package/cpan/tests/data/fake_cpan.sh b/swh/loader/package/cpan/tests/data/fake_cpan.sh
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/cpan/tests/data/fake_cpan.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+
+# Script to generate fake Perl package archives as .tar.gz.
+
+set -euo pipefail
+
+# Create directories
+readonly TMP=tmp_dir/cpan
+readonly BASE_PATH=https_cpan.metacpan.org
+
+mkdir -p $TMP
+
+# tar.gz package archives
+# Perl package tar.gz archive needs at least one directory with a META.json or META.yml file
+mkdir -p ${TMP}/Internals-CountObjects-0.01
+mkdir -p ${TMP}/Internals-CountObjects-0.05
+mkdir -p $BASE_PATH
+
+echo -e """---
+abstract: 'Report all allocated perl objects'
+author:
+ - 'Josh Jore <jjore@cpan.org>'
+build_requires: {}
+configure_requires:
+ ExtUtils::MakeMaker: 6.31
+dynamic_config: 0
+generated_by: 'Dist::Zilla version 4.200000, CPAN::Meta::Converter version 2.102400'
+license: perl
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.4.html
+ version: 1.4
+name: Internals-CountObjects
+version: 0.01
+""" > ${TMP}/Internals-CountObjects-0.01/META.yml
+
+echo -e '''{
+ "abstract" : "Report all allocated perl objects",
+ "author" : [
+ "Josh Jore <jjore@cpan.org>"
+ ],
+ "dynamic_config" : 0,
+ "generated_by" : "Dist::Zilla version 4.200000, CPAN::Meta::Converter version 2.102400",
+ "license" : [
+ "perl_5"
+ ],
+ "meta-spec" : {
+ "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
+ "version" : "2"
+ },
+ "name" : "Internals-CountObjects",
+ "prereqs" : {
+ "build" : {
+ "requires" : {
+ "ExtUtils::CBuilder" : 0
+ }
+ }
+ },
+ "release_status" : "stable",
+ "resources" : {
+ "bugtracker" : {
+ "mailto" : "bug-Internals-CountObjects@rt.cpan.org",
+ "web" : "http://rt.cpan.org/NoAuth/Bugs.html?Dist=Internals-CountObjects"
+ },
+ "homepage" : "http://search.cpan.org/dist/Internals-CountObjects",
+ "repository" : {
+ "type" : "git",
+ "url" : "git://github.com/jbenjore/Internals-CountObjects.git",
+ "web" : "http://github.com/jbenjore/Internals-CountObjects"
+ }
+ },
+ "version" : "0.05"
+}
+''' > ${TMP}/Internals-CountObjects-0.05/META.json
+
+cd $TMP
+
+# Tar compress
+tar -czf authors_id_J_JJ_JJORE_Internals-CountObjects-0.01.tar.gz Internals-CountObjects-0.01
+tar -czf authors_id_J_JJ_JJORE_Internals-CountObjects-0.05.tar.gz Internals-CountObjects-0.05
+
+# Move .tar.gz archives to a servable directory
+mv *.tar.gz ../../$BASE_PATH
+
+# Clean up removing tmp_dir
+cd ../../
+rm -r tmp_dir/
diff --git a/swh/loader/package/cpan/tests/data/https_cpan.metacpan.org/authors_id_J_JJ_JJORE_Internals-CountObjects-0.01.tar.gz b/swh/loader/package/cpan/tests/data/https_cpan.metacpan.org/authors_id_J_JJ_JJORE_Internals-CountObjects-0.01.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/package/cpan/tests/data/https_cpan.metacpan.org/authors_id_J_JJ_JJORE_Internals-CountObjects-0.05.tar.gz b/swh/loader/package/cpan/tests/data/https_cpan.metacpan.org/authors_id_J_JJ_JJORE_Internals-CountObjects-0.05.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_versions_Internals-CountObjects b/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_versions_Internals-CountObjects
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_versions_Internals-CountObjects
@@ -0,0 +1,26 @@
+{
+ "took" : 3,
+ "releases" : [
+ {
+ "maturity" : "released",
+ "authorized" : true,
+ "date" : "2011-06-11T05:23:31",
+ "name" : "Internals-CountObjects-0.05",
+ "version" : "0.05",
+ "author" : "JJORE",
+ "download_url" : "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.05.tar.gz",
+ "status" : "latest"
+ },
+ {
+ "authorized" : true,
+ "date" : "2011-06-05T18:44:02",
+ "maturity" : "released",
+ "name" : "Internals-CountObjects-0.01",
+ "version" : "0.01",
+ "author" : "JJORE",
+ "download_url" : "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.01.tar.gz",
+ "status" : "cpan"
+ }
+ ],
+ "total" : 4
+}
diff --git a/swh/loader/package/cpan/tests/test_cpan.py b/swh/loader/package/cpan/tests/test_cpan.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/cpan/tests/test_cpan.py
@@ -0,0 +1,109 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.loader.package.cpan.loader import CpanLoader
+from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
+from swh.model.hashutil import hash_to_bytes
+from swh.model.model import (
+ ObjectType,
+ Person,
+ Release,
+ Snapshot,
+ SnapshotBranch,
+ TargetType,
+ TimestampWithTimezone,
+)
+
+ORIGINS = [
+ "https://metacpan.org/dist/Internals-CountObjects",
+]
+
+
+def test_get_versions(requests_mock_datadir, swh_storage):
+ loader = CpanLoader(
+ swh_storage,
+ url=ORIGINS[0],
+ )
+ assert loader.get_versions() == ["0.01", "0.05"]
+
+
+def test_get_default_version(requests_mock_datadir, swh_storage):
+ loader = CpanLoader(
+ swh_storage,
+ url=ORIGINS[0],
+ )
+ assert loader.get_default_version() == "0.05"
+
+
+def test_cpan_loader_load_multiple_version(datadir, requests_mock_datadir, swh_storage):
+ loader = CpanLoader(
+ swh_storage,
+ url=ORIGINS[0],
+ )
+ load_status = loader.load()
+ assert load_status["status"] == "eventful"
+ assert load_status["snapshot_id"] is not None
+
+ expected_snapshot_id = "2b1f606033ef5ccfed78aeb94baf5a8b901b2306"
+
+ assert expected_snapshot_id == load_status["snapshot_id"]
+
+ expected_snapshot = Snapshot(
+ id=hash_to_bytes(load_status["snapshot_id"]),
+ branches={
+ b"releases/0.01": SnapshotBranch(
+ target=hash_to_bytes("3b31ce005c364de6c1b8caca8bf12487d5debf38"),
+ target_type=TargetType.RELEASE,
+ ),
+ b"releases/0.05": SnapshotBranch(
+ target=hash_to_bytes("2901106d99de31f71380b6c3b5e92799ce3a1a5e"),
+ target_type=TargetType.RELEASE,
+ ),
+ b"HEAD": SnapshotBranch(
+ target=b"releases/0.05",
+ target_type=TargetType.ALIAS,
+ ),
+ },
+ )
+
+ check_snapshot(expected_snapshot, swh_storage)
+
+ stats = get_stats(swh_storage)
+ assert {
+ "content": 2,
+ "directory": 4,
+ "origin": 1,
+ "origin_visit": 1,
+ "release": 2,
+ "revision": 0,
+ "skipped_content": 0,
+ "snapshot": 1,
+ } == stats
+
+ assert swh_storage.release_get(
+ [hash_to_bytes("2901106d99de31f71380b6c3b5e92799ce3a1a5e")]
+ )[0] == Release(
+ name=b"0.05",
+ message=b"Synthetic release for Perl source package Internals-CountObjects"
+ b" version 0.05\n\nReport all allocated perl objects\n",
+ target=hash_to_bytes("af3f6a43eaf4b26dbcadb1101e8d81db6d6151e0"),
+ target_type=ObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person(
+ fullname=b"Josh Jore <jjore@cpan.org>",
+ name=b"Josh Jore",
+ email=b"jjore@cpan.org",
+ ),
+ date=TimestampWithTimezone.from_iso8601("2011-06-11T05:23:31+00:00"),
+ id=hash_to_bytes("2901106d99de31f71380b6c3b5e92799ce3a1a5e"),
+ )
+
+ assert_last_visit_matches(
+ swh_storage,
+ url=ORIGINS[0],
+ status="full",
+ type="cpan",
+ snapshot=expected_snapshot.id,
+ )
diff --git a/swh/loader/package/cpan/tests/test_tasks.py b/swh/loader/package/cpan/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/cpan/tests/test_tasks.py
@@ -0,0 +1,23 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def test_tasks_cpan_loader(
+ mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
+):
+ mock_load = mocker.patch("swh.loader.package.cpan.loader.CpanLoader.load")
+ mock_load.return_value = {"status": "eventful"}
+
+ res = swh_scheduler_celery_app.send_task(
+ "swh.loader.package.cpan.tasks.LoadCpan",
+ kwargs=dict(
+ url="some-url/api/packages/some-package",
+ ),
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+ assert mock_load.called
+ assert res.result == {"status": "eventful"}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Sep 17, 4:51 PM (5 h, 5 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220351
Attached To
D8557: Cpan: Cpan loader loads Perl modules from cpan.org
Event Timeline
Log In to Comment