Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9348291
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
19 KB
Subscribers
None
View Options
diff --git a/swh/loader/package/cpan/loader.py b/swh/loader/package/cpan/loader.py
index 830362e..b9d7fab 100644
--- a/swh/loader/package/cpan/loader.py
+++ b/swh/loader/package/cpan/loader.py
@@ -1,192 +1,179 @@
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime
import json
+import logging
from pathlib import Path
-from typing import Any, Dict, Iterator, Optional, Sequence, Tuple
+from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple
import attr
import iso8601
from packaging.version import parse as parse_version
import yaml
from swh.loader.package.loader import BasePackageInfo, PackageLoader
-from swh.loader.package.utils import (
- EMPTY_AUTHOR,
- Person,
- cached_method,
- get_url_body,
- release_name,
-)
+from swh.loader.package.utils import EMPTY_AUTHOR, Person, release_name
from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone
from swh.storage.interface import StorageInterface
+logger = logging.getLogger(__name__)
+
@attr.s
class CpanPackageInfo(BasePackageInfo):
name = attr.ib(type=str)
"""Name of the package"""
- filename = attr.ib(type=str)
- """Archive (tar.gz) file name"""
-
version = attr.ib(type=str)
"""Current version"""
last_modified = attr.ib(type=datetime)
"""File last modified date as release date."""
author = attr.ib(type=Person)
"""Author"""
def extract_intrinsic_metadata(dir_path: Path) -> Dict[str, Any]:
"""Extract intrinsic metadata from META.json file at dir_path.
- Each Perl package version has a META.json file at the root of the archive,
+ Most Perl package version have a META.json file at the root of the archive,
or a META.yml for older version.
See https://perldoc.perl.org/CPAN::Meta for META specifications.
Args:
dir_path: A directory on disk where a META.json|.yml can be found
Returns:
A dict mapping from yaml parser
"""
meta_json_path = dir_path / "META.json"
+ meta_yml_path = dir_path / "META.yml"
metadata: Dict[str, Any] = {}
if meta_json_path.exists():
metadata = json.loads(meta_json_path.read_text())
-
- meta_yml_path = dir_path / "META.yml"
- if meta_yml_path.exists():
+ elif meta_yml_path.exists():
metadata = yaml.safe_load(meta_yml_path.read_text())
return metadata
class CpanLoader(PackageLoader[CpanPackageInfo]):
visit_type = "cpan"
def __init__(
self,
storage: StorageInterface,
url: str,
+ api_base_url: str,
+ artifacts: List[Dict[str, Any]],
+ module_metadata: List[Dict[str, Any]],
**kwargs,
):
super().__init__(storage=storage, url=url, **kwargs)
self.url = url
-
- @cached_method
- def info_versions(self) -> Dict:
- """Return the package versions (fetched from
- ``https://fastapi.metacpan.org/v1/release/versions/{pkgname}``)
-
- Api documentation https://cpan.haskell.org/api
- """
- pkgname = self.url.split("/")[-1]
- url = f"https://fastapi.metacpan.org/v1/release/versions/{pkgname}"
- data = json.loads(get_url_body(url=url, headers={"Accept": "application/json"}))
- return {release["version"]: release for release in data["releases"]}
+ self.api_base_url = api_base_url
+ self.artifacts: Dict[str, Dict] = {
+ artifact["version"]: {k: v for k, v in artifact.items() if k != "version"}
+ for artifact in artifacts
+ }
+ self.module_metadata: Dict[str, Dict] = {
+ meta["version"]: meta for meta in module_metadata
+ }
def get_versions(self) -> Sequence[str]:
"""Get all released versions of a Perl package
Returns:
A sequence of versions
Example::
["0.1.1", "0.10.2"]
"""
- versions = list(self.info_versions().keys())
+ versions = list(self.artifacts.keys())
versions.sort(key=parse_version)
return versions
def get_default_version(self) -> str:
"""Get the newest release version of a Perl package
Returns:
A string representing a version
Example::
"0.10.2"
"""
return self.get_versions()[-1]
def get_package_info(self, version: str) -> Iterator[Tuple[str, CpanPackageInfo]]:
"""Get release name and package information from version
Args:
version: Package version (e.g: "0.1.0")
Returns:
Iterator of tuple (release_name, p_info)
"""
- data = self.info_versions()[version]
- pkgname: str = self.url.split("/")[-1]
- url: str = data["download_url"]
- filename: str = url.split("/")[-1]
- # The api does not provide an explicit timezone, defaults to UTC
- last_modified = iso8601.parse_date(data["date"])
-
- if "author" in data:
- author = Person.from_fullname(data["author"].encode())
- else:
- author = EMPTY_AUTHOR
+ artifact = self.artifacts[version]
+ metadata = self.module_metadata[version]
+
+ last_modified = iso8601.parse_date(metadata["date"])
+ author = (
+ Person.from_fullname(metadata["author"].encode())
+ if metadata["author"]
+ else EMPTY_AUTHOR
+ )
p_info = CpanPackageInfo(
- name=pkgname,
- filename=filename,
- url=url,
+ name=metadata["name"],
+ filename=artifact["filename"],
+ url=artifact["url"],
version=version,
last_modified=last_modified,
author=author,
+ checksums=artifact["checksums"],
)
yield release_name(version), p_info
def build_release(
self, p_info: CpanPackageInfo, uncompressed_path: str, directory: Sha1Git
) -> Optional[Release]:
# Extract intrinsic metadata from uncompressed_path/META.json|.yml
intrinsic_metadata = extract_intrinsic_metadata(
Path(uncompressed_path) / f"{p_info.name}-{p_info.version}"
)
- name: str = intrinsic_metadata["name"]
- assert name == p_info.name
- version: str = str(intrinsic_metadata["version"])
- assert version == p_info.version
-
# author data from http endpoint are less complete than from META
if "author" in intrinsic_metadata:
author_data = intrinsic_metadata["author"]
if type(author_data) is list:
author = author_data[0]
else:
author = author_data
author = Person.from_fullname(author.encode())
else:
author = p_info.author
message = (
- f"Synthetic release for Perl source package {name} version {version}\n"
+ f"Synthetic release for Perl source package {p_info.name} "
+ f"version {p_info.version}\n"
)
return Release(
- name=version.encode(),
+ name=p_info.version.encode(),
author=author,
date=TimestampWithTimezone.from_datetime(p_info.last_modified),
message=message.encode(),
target_type=ObjectType.DIRECTORY,
target=directory,
synthetic=True,
)
diff --git a/swh/loader/package/cpan/tests/data/fake_cpan.sh b/swh/loader/package/cpan/tests/data/fake_cpan.sh
deleted file mode 100644
index 5a23d47..0000000
--- a/swh/loader/package/cpan/tests/data/fake_cpan.sh
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/env bash
-
-# Script to generate fake Perl package archives as .tar.gz.
-
-set -euo pipefail
-
-# Create directories
-readonly TMP=tmp_dir/cpan
-readonly BASE_PATH=https_cpan.metacpan.org
-
-mkdir -p $TMP
-
-# tar.gz package archives
-# Perl package tar.gz archive needs at least one directory with a META.json or META.yml file
-mkdir -p ${TMP}/Internals-CountObjects-0.01
-mkdir -p ${TMP}/Internals-CountObjects-0.05
-mkdir -p $BASE_PATH
-
-echo -e """---
-abstract: 'Report all allocated perl objects'
-author:
- - 'Josh Jore <jjore@cpan.org>'
-build_requires: {}
-configure_requires:
- ExtUtils::MakeMaker: 6.31
-dynamic_config: 0
-generated_by: 'Dist::Zilla version 4.200000, CPAN::Meta::Converter version 2.102400'
-license: perl
-meta-spec:
- url: http://module-build.sourceforge.net/META-spec-v1.4.html
- version: 1.4
-name: Internals-CountObjects
-version: 0.01
-""" > ${TMP}/Internals-CountObjects-0.01/META.yml
-
-echo -e '''{
- "abstract" : "Report all allocated perl objects",
- "author" : [
- "Josh Jore <jjore@cpan.org>"
- ],
- "dynamic_config" : 0,
- "generated_by" : "Dist::Zilla version 4.200000, CPAN::Meta::Converter version 2.102400",
- "license" : [
- "perl_5"
- ],
- "meta-spec" : {
- "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
- "version" : "2"
- },
- "name" : "Internals-CountObjects",
- "prereqs" : {
- "build" : {
- "requires" : {
- "ExtUtils::CBuilder" : 0
- }
- }
- },
- "release_status" : "stable",
- "resources" : {
- "bugtracker" : {
- "mailto" : "bug-Internals-CountObjects@rt.cpan.org",
- "web" : "http://rt.cpan.org/NoAuth/Bugs.html?Dist=Internals-CountObjects"
- },
- "homepage" : "http://search.cpan.org/dist/Internals-CountObjects",
- "repository" : {
- "type" : "git",
- "url" : "git://github.com/jbenjore/Internals-CountObjects.git",
- "web" : "http://github.com/jbenjore/Internals-CountObjects"
- }
- },
- "version" : "0.05"
-}
-''' > ${TMP}/Internals-CountObjects-0.05/META.json
-
-cd $TMP
-
-# Tar compress
-tar -czf authors_id_J_JJ_JJORE_Internals-CountObjects-0.01.tar.gz Internals-CountObjects-0.01
-tar -czf authors_id_J_JJ_JJORE_Internals-CountObjects-0.05.tar.gz Internals-CountObjects-0.05
-
-# Move .tar.gz archives to a servable directory
-mv *.tar.gz ../../$BASE_PATH
-
-# Clean up removing tmp_dir
-cd ../../
-rm -r tmp_dir/
diff --git a/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_versions_Internals-CountObjects b/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_versions_Internals-CountObjects
deleted file mode 100644
index df9422b..0000000
--- a/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_versions_Internals-CountObjects
+++ /dev/null
@@ -1,26 +0,0 @@
-{
- "took" : 3,
- "releases" : [
- {
- "maturity" : "released",
- "authorized" : true,
- "date" : "2011-06-11T05:23:31",
- "name" : "Internals-CountObjects-0.05",
- "version" : "0.05",
- "author" : "JJORE",
- "download_url" : "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.05.tar.gz",
- "status" : "latest"
- },
- {
- "authorized" : true,
- "date" : "2011-06-05T18:44:02",
- "maturity" : "released",
- "name" : "Internals-CountObjects-0.01",
- "version" : "0.01",
- "author" : "JJORE",
- "download_url" : "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.01.tar.gz",
- "status" : "cpan"
- }
- ],
- "total" : 4
-}
diff --git a/swh/loader/package/cpan/tests/test_cpan.py b/swh/loader/package/cpan/tests/test_cpan.py
index 7904fe7..afd809f 100644
--- a/swh/loader/package/cpan/tests/test_cpan.py
+++ b/swh/loader/package/cpan/tests/test_cpan.py
@@ -1,109 +1,156 @@
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+# flake8: noqa: B950
+
+import pytest
+
+from swh.loader.package import __version__
from swh.loader.package.cpan.loader import CpanLoader
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
from swh.model.hashutil import hash_to_bytes
from swh.model.model import (
- ObjectType,
Person,
Release,
Snapshot,
SnapshotBranch,
TargetType,
TimestampWithTimezone,
)
+from swh.model.model import ObjectType as ModelObjectType
+
+ORIGIN_URL = "https://metacpan.org/dist/Internals-CountObjects"
-ORIGINS = [
- "https://metacpan.org/dist/Internals-CountObjects",
+API_BASE_URL = "https://fastapi.metacpan.org/v1"
+
+ORIGIN_ARTIFACTS = [
+ {
+ "url": "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.05.tar.gz",
+ "filename": "CountObjects-0.05.tar.gz",
+ "version": "0.05",
+ "length": 632,
+ "checksums": {
+ "sha256": "e0ecf6ab4873fa55ff74da22a3c4ae0ab6a1409635c9cd2d6059abbb32be3a6a"
+ },
+ },
+ {
+ "url": "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.01.tar.gz",
+ "filename": "CountObjects-0.01.tar.gz",
+ "version": "0.01",
+ "length": 453,
+ "checksums": {
+ "sha256": "a368004ab98c5860a8fd87e0a4c44e4ee2d1b95d9b13597519a0e644c167468a"
+ },
+ },
+]
+
+ORIGIN_MODULE_METADATA = [
+ {
+ "name": "Internals-CountObjects",
+ "version": "0.05",
+ "author": "Josh Jore <jjore@cpan.org>",
+ "cpan_author": "JJORE",
+ "date": "2011-06-11T05:23:31",
+ "release_name": "Internals-CountObjects-0.05",
+ },
+ {
+ "name": "Internals-CountObjects",
+ "version": "0.01",
+ "author": "Josh Jore <jjore@cpan.org>",
+ "cpan_author": "JJORE",
+ "date": "2011-06-05T18:44:02",
+ "release_name": "Internals-CountObjects-0.01",
+ },
]
-def test_get_versions(requests_mock_datadir, swh_storage):
- loader = CpanLoader(
+@pytest.fixture
+def cpan_loader(requests_mock_datadir, swh_storage):
+ return CpanLoader(
swh_storage,
- url=ORIGINS[0],
+ url=ORIGIN_URL,
+ api_base_url=API_BASE_URL,
+ artifacts=ORIGIN_ARTIFACTS,
+ module_metadata=ORIGIN_MODULE_METADATA,
)
- assert loader.get_versions() == ["0.01", "0.05"]
-def test_get_default_version(requests_mock_datadir, swh_storage):
- loader = CpanLoader(
- swh_storage,
- url=ORIGINS[0],
- )
- assert loader.get_default_version() == "0.05"
+def test_get_versions(cpan_loader):
+ assert cpan_loader.get_versions() == ["0.01", "0.05"]
-def test_cpan_loader_load_multiple_version(datadir, requests_mock_datadir, swh_storage):
- loader = CpanLoader(
- swh_storage,
- url=ORIGINS[0],
- )
- load_status = loader.load()
+def test_get_default_version(cpan_loader):
+ assert cpan_loader.get_default_version() == "0.05"
+
+
+def test_cpan_loader_load_multiple_version(cpan_loader):
+
+ load_status = cpan_loader.load()
assert load_status["status"] == "eventful"
assert load_status["snapshot_id"] is not None
expected_snapshot_id = "848ee8d69d33481c88ab81f6794f6504190f011f"
+ expected_head_release = "07382fd255ec0fc293b92aeb7e68b3fe31c174f9"
assert expected_snapshot_id == load_status["snapshot_id"]
expected_snapshot = Snapshot(
id=hash_to_bytes(load_status["snapshot_id"]),
branches={
b"releases/0.01": SnapshotBranch(
target=hash_to_bytes("e73aced4cc3d56b32a328d3248b25b052f029df4"),
target_type=TargetType.RELEASE,
),
b"releases/0.05": SnapshotBranch(
- target=hash_to_bytes("07382fd255ec0fc293b92aeb7e68b3fe31c174f9"),
+ target=hash_to_bytes(expected_head_release),
target_type=TargetType.RELEASE,
),
b"HEAD": SnapshotBranch(
target=b"releases/0.05",
target_type=TargetType.ALIAS,
),
},
)
- check_snapshot(expected_snapshot, swh_storage)
+ storage = cpan_loader.storage
+
+ check_snapshot(expected_snapshot, storage)
- stats = get_stats(swh_storage)
+ stats = get_stats(storage)
assert {
"content": 2,
"directory": 4,
"origin": 1,
"origin_visit": 1,
"release": 2,
"revision": 0,
"skipped_content": 0,
"snapshot": 1,
} == stats
- assert swh_storage.release_get(
- [hash_to_bytes("07382fd255ec0fc293b92aeb7e68b3fe31c174f9")]
- )[0] == Release(
+ head_release = storage.release_get([hash_to_bytes(expected_head_release)])[0]
+
+ assert head_release == Release(
name=b"0.05",
- message=b"Synthetic release for Perl source package Internals-CountObjects"
- b" version 0.05\n",
+ message=b"Synthetic release for Perl source package Internals-CountObjects version 0.05\n",
target=hash_to_bytes("af3f6a43eaf4b26dbcadb1101e8d81db6d6151e0"),
- target_type=ObjectType.DIRECTORY,
+ target_type=ModelObjectType.DIRECTORY,
synthetic=True,
author=Person(
fullname=b"Josh Jore <jjore@cpan.org>",
name=b"Josh Jore",
email=b"jjore@cpan.org",
),
date=TimestampWithTimezone.from_iso8601("2011-06-11T05:23:31+00:00"),
- id=hash_to_bytes("07382fd255ec0fc293b92aeb7e68b3fe31c174f9"),
+ id=hash_to_bytes(expected_head_release),
)
assert_last_visit_matches(
- swh_storage,
- url=ORIGINS[0],
+ storage,
+ url=ORIGIN_URL,
status="full",
type="cpan",
snapshot=expected_snapshot.id,
)
diff --git a/swh/loader/package/cpan/tests/test_tasks.py b/swh/loader/package/cpan/tests/test_tasks.py
index dc8cf1e..55cfa31 100644
--- a/swh/loader/package/cpan/tests/test_tasks.py
+++ b/swh/loader/package/cpan/tests/test_tasks.py
@@ -1,40 +1,52 @@
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import uuid
import pytest
from swh.scheduler.model import ListedOrigin, Lister
+from .test_cpan import (
+ API_BASE_URL,
+ ORIGIN_ARTIFACTS,
+ ORIGIN_MODULE_METADATA,
+ ORIGIN_URL,
+)
+
NAMESPACE = "swh.loader.package.cpan"
@pytest.fixture
def cpan_lister():
return Lister(name="cpan", instance_name="example", id=uuid.uuid4())
@pytest.fixture
def cpan_listed_origin(cpan_lister):
return ListedOrigin(
lister_id=cpan_lister.id,
- url="https://metacpan.org/dist/Software-Packager",
+ url=ORIGIN_URL,
visit_type="cpan",
+ extra_loader_arguments={
+ "api_base_url": API_BASE_URL,
+ "artifacts": ORIGIN_ARTIFACTS,
+ "module_metadata": ORIGIN_MODULE_METADATA,
+ },
)
def test_cpan_loader_task_for_listed_origin(
loading_task_creation_for_listed_origin_test,
cpan_lister,
cpan_listed_origin,
):
loading_task_creation_for_listed_origin_test(
loader_class_name=f"{NAMESPACE}.loader.CpanLoader",
task_function_name=f"{NAMESPACE}.tasks.LoadCpan",
lister=cpan_lister,
listed_origin=cpan_listed_origin,
)
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jul 4 2025, 6:22 PM (5 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3266102
Attached To
rDLDBASE Generic VCS/Package Loader
Event Timeline
Log In to Comment