Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123413
D8616.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
16 KB
Subscribers
None
D8616.diff
View Options
diff --git a/swh/loader/package/cpan/loader.py b/swh/loader/package/cpan/loader.py
--- a/swh/loader/package/cpan/loader.py
+++ b/swh/loader/package/cpan/loader.py
@@ -5,8 +5,9 @@
from datetime import datetime
import json
+import logging
from pathlib import Path
-from typing import Any, Dict, Iterator, Optional, Sequence, Tuple
+from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple
import attr
import iso8601
@@ -14,16 +15,12 @@
import yaml
from swh.loader.package.loader import BasePackageInfo, PackageLoader
-from swh.loader.package.utils import (
- EMPTY_AUTHOR,
- Person,
- cached_method,
- get_url_body,
- release_name,
-)
+from swh.loader.package.utils import EMPTY_AUTHOR, Person, release_name
from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone
from swh.storage.interface import StorageInterface
+logger = logging.getLogger(__name__)
+
@attr.s
class CpanPackageInfo(BasePackageInfo):
@@ -31,9 +28,6 @@
name = attr.ib(type=str)
"""Name of the package"""
- filename = attr.ib(type=str)
- """Archive (tar.gz) file name"""
-
version = attr.ib(type=str)
"""Current version"""
@@ -47,7 +41,7 @@
def extract_intrinsic_metadata(dir_path: Path) -> Dict[str, Any]:
"""Extract intrinsic metadata from META.json file at dir_path.
- Each Perl package version has a META.json file at the root of the archive,
+ Most Perl package version have a META.json file at the root of the archive,
or a META.yml for older version.
See https://perldoc.perl.org/CPAN::Meta for META specifications.
@@ -59,12 +53,11 @@
A dict mapping from yaml parser
"""
meta_json_path = dir_path / "META.json"
+ meta_yml_path = dir_path / "META.yml"
metadata: Dict[str, Any] = {}
if meta_json_path.exists():
metadata = json.loads(meta_json_path.read_text())
-
- meta_yml_path = dir_path / "META.yml"
- if meta_yml_path.exists():
+ elif meta_yml_path.exists():
metadata = yaml.safe_load(meta_yml_path.read_text())
return metadata
@@ -77,23 +70,22 @@
self,
storage: StorageInterface,
url: str,
+ api_base_url: str,
+ artifacts: List[Dict[str, Any]],
+ module_metadata: List[Dict[str, Any]],
**kwargs,
):
super().__init__(storage=storage, url=url, **kwargs)
self.url = url
-
- @cached_method
- def info_versions(self) -> Dict:
- """Return the package versions (fetched from
- ``https://fastapi.metacpan.org/v1/release/versions/{pkgname}``)
-
- Api documentation https://cpan.haskell.org/api
- """
- pkgname = self.url.split("/")[-1]
- url = f"https://fastapi.metacpan.org/v1/release/versions/{pkgname}"
- data = json.loads(get_url_body(url=url, headers={"Accept": "application/json"}))
- return {release["version"]: release for release in data["releases"]}
+ self.api_base_url = api_base_url
+ self.artifacts: Dict[str, Dict] = {
+ artifact["version"]: {k: v for k, v in artifact.items() if k != "version"}
+ for artifact in artifacts
+ }
+ self.module_metadata: Dict[str, Dict] = {
+ meta["version"]: meta for meta in module_metadata
+ }
def get_versions(self) -> Sequence[str]:
"""Get all released versions of a Perl package
@@ -105,7 +97,7 @@
["0.1.1", "0.10.2"]
"""
- versions = list(self.info_versions().keys())
+ versions = list(self.artifacts.keys())
versions.sort(key=parse_version)
return versions
@@ -130,25 +122,24 @@
Returns:
Iterator of tuple (release_name, p_info)
"""
- data = self.info_versions()[version]
- pkgname: str = self.url.split("/")[-1]
- url: str = data["download_url"]
- filename: str = url.split("/")[-1]
- # The api does not provide an explicit timezone, defaults to UTC
- last_modified = iso8601.parse_date(data["date"])
-
- if "author" in data:
- author = Person.from_fullname(data["author"].encode())
- else:
- author = EMPTY_AUTHOR
+ artifact = self.artifacts[version]
+ metadata = self.module_metadata[version]
+
+ last_modified = iso8601.parse_date(metadata["date"])
+ author = (
+ Person.from_fullname(metadata["author"].encode())
+ if metadata["author"]
+ else EMPTY_AUTHOR
+ )
p_info = CpanPackageInfo(
- name=pkgname,
- filename=filename,
- url=url,
+ name=metadata["name"],
+ filename=artifact["filename"],
+ url=artifact["url"],
version=version,
last_modified=last_modified,
author=author,
+ checksums=artifact["checksums"],
)
yield release_name(version), p_info
@@ -161,11 +152,6 @@
Path(uncompressed_path) / f"{p_info.name}-{p_info.version}"
)
- name: str = intrinsic_metadata["name"]
- assert name == p_info.name
- version: str = str(intrinsic_metadata["version"])
- assert version == p_info.version
-
# author data from http endpoint are less complete than from META
if "author" in intrinsic_metadata:
author_data = intrinsic_metadata["author"]
@@ -178,11 +164,12 @@
author = p_info.author
message = (
- f"Synthetic release for Perl source package {name} version {version}\n"
+ f"Synthetic release for Perl source package {p_info.name} "
+ f"version {p_info.version}\n"
)
return Release(
- name=version.encode(),
+ name=p_info.version.encode(),
author=author,
date=TimestampWithTimezone.from_datetime(p_info.last_modified),
message=message.encode(),
diff --git a/swh/loader/package/cpan/tests/data/fake_cpan.sh b/swh/loader/package/cpan/tests/data/fake_cpan.sh
deleted file mode 100644
--- a/swh/loader/package/cpan/tests/data/fake_cpan.sh
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/env bash
-
-# Script to generate fake Perl package archives as .tar.gz.
-
-set -euo pipefail
-
-# Create directories
-readonly TMP=tmp_dir/cpan
-readonly BASE_PATH=https_cpan.metacpan.org
-
-mkdir -p $TMP
-
-# tar.gz package archives
-# Perl package tar.gz archive needs at least one directory with a META.json or META.yml file
-mkdir -p ${TMP}/Internals-CountObjects-0.01
-mkdir -p ${TMP}/Internals-CountObjects-0.05
-mkdir -p $BASE_PATH
-
-echo -e """---
-abstract: 'Report all allocated perl objects'
-author:
- - 'Josh Jore <jjore@cpan.org>'
-build_requires: {}
-configure_requires:
- ExtUtils::MakeMaker: 6.31
-dynamic_config: 0
-generated_by: 'Dist::Zilla version 4.200000, CPAN::Meta::Converter version 2.102400'
-license: perl
-meta-spec:
- url: http://module-build.sourceforge.net/META-spec-v1.4.html
- version: 1.4
-name: Internals-CountObjects
-version: 0.01
-""" > ${TMP}/Internals-CountObjects-0.01/META.yml
-
-echo -e '''{
- "abstract" : "Report all allocated perl objects",
- "author" : [
- "Josh Jore <jjore@cpan.org>"
- ],
- "dynamic_config" : 0,
- "generated_by" : "Dist::Zilla version 4.200000, CPAN::Meta::Converter version 2.102400",
- "license" : [
- "perl_5"
- ],
- "meta-spec" : {
- "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
- "version" : "2"
- },
- "name" : "Internals-CountObjects",
- "prereqs" : {
- "build" : {
- "requires" : {
- "ExtUtils::CBuilder" : 0
- }
- }
- },
- "release_status" : "stable",
- "resources" : {
- "bugtracker" : {
- "mailto" : "bug-Internals-CountObjects@rt.cpan.org",
- "web" : "http://rt.cpan.org/NoAuth/Bugs.html?Dist=Internals-CountObjects"
- },
- "homepage" : "http://search.cpan.org/dist/Internals-CountObjects",
- "repository" : {
- "type" : "git",
- "url" : "git://github.com/jbenjore/Internals-CountObjects.git",
- "web" : "http://github.com/jbenjore/Internals-CountObjects"
- }
- },
- "version" : "0.05"
-}
-''' > ${TMP}/Internals-CountObjects-0.05/META.json
-
-cd $TMP
-
-# Tar compress
-tar -czf authors_id_J_JJ_JJORE_Internals-CountObjects-0.01.tar.gz Internals-CountObjects-0.01
-tar -czf authors_id_J_JJ_JJORE_Internals-CountObjects-0.05.tar.gz Internals-CountObjects-0.05
-
-# Move .tar.gz archives to a servable directory
-mv *.tar.gz ../../$BASE_PATH
-
-# Clean up removing tmp_dir
-cd ../../
-rm -r tmp_dir/
diff --git a/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_versions_Internals-CountObjects b/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_versions_Internals-CountObjects
deleted file mode 100644
--- a/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_versions_Internals-CountObjects
+++ /dev/null
@@ -1,26 +0,0 @@
-{
- "took" : 3,
- "releases" : [
- {
- "maturity" : "released",
- "authorized" : true,
- "date" : "2011-06-11T05:23:31",
- "name" : "Internals-CountObjects-0.05",
- "version" : "0.05",
- "author" : "JJORE",
- "download_url" : "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.05.tar.gz",
- "status" : "latest"
- },
- {
- "authorized" : true,
- "date" : "2011-06-05T18:44:02",
- "maturity" : "released",
- "name" : "Internals-CountObjects-0.01",
- "version" : "0.01",
- "author" : "JJORE",
- "download_url" : "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.01.tar.gz",
- "status" : "cpan"
- }
- ],
- "total" : 4
-}
diff --git a/swh/loader/package/cpan/tests/test_cpan.py b/swh/loader/package/cpan/tests/test_cpan.py
--- a/swh/loader/package/cpan/tests/test_cpan.py
+++ b/swh/loader/package/cpan/tests/test_cpan.py
@@ -3,11 +3,15 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+# flake8: noqa: B950
+
+import pytest
+
+from swh.loader.package import __version__
from swh.loader.package.cpan.loader import CpanLoader
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
from swh.model.hashutil import hash_to_bytes
from swh.model.model import (
- ObjectType,
Person,
Release,
Snapshot,
@@ -15,38 +19,80 @@
TargetType,
TimestampWithTimezone,
)
+from swh.model.model import ObjectType as ModelObjectType
+
+ORIGIN_URL = "https://metacpan.org/dist/Internals-CountObjects"
-ORIGINS = [
- "https://metacpan.org/dist/Internals-CountObjects",
+API_BASE_URL = "https://fastapi.metacpan.org/v1"
+
+ORIGIN_ARTIFACTS = [
+ {
+ "url": "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.05.tar.gz",
+ "filename": "CountObjects-0.05.tar.gz",
+ "version": "0.05",
+ "length": 632,
+ "checksums": {
+ "sha256": "e0ecf6ab4873fa55ff74da22a3c4ae0ab6a1409635c9cd2d6059abbb32be3a6a"
+ },
+ },
+ {
+ "url": "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.01.tar.gz",
+ "filename": "CountObjects-0.01.tar.gz",
+ "version": "0.01",
+ "length": 453,
+ "checksums": {
+ "sha256": "a368004ab98c5860a8fd87e0a4c44e4ee2d1b95d9b13597519a0e644c167468a"
+ },
+ },
+]
+
+ORIGIN_MODULE_METADATA = [
+ {
+ "name": "Internals-CountObjects",
+ "version": "0.05",
+ "author": "Josh Jore <jjore@cpan.org>",
+ "cpan_author": "JJORE",
+ "date": "2011-06-11T05:23:31",
+ "release_name": "Internals-CountObjects-0.05",
+ },
+ {
+ "name": "Internals-CountObjects",
+ "version": "0.01",
+ "author": "Josh Jore <jjore@cpan.org>",
+ "cpan_author": "JJORE",
+ "date": "2011-06-05T18:44:02",
+ "release_name": "Internals-CountObjects-0.01",
+ },
]
-def test_get_versions(requests_mock_datadir, swh_storage):
- loader = CpanLoader(
+@pytest.fixture
+def cpan_loader(requests_mock_datadir, swh_storage):
+ return CpanLoader(
swh_storage,
- url=ORIGINS[0],
+ url=ORIGIN_URL,
+ api_base_url=API_BASE_URL,
+ artifacts=ORIGIN_ARTIFACTS,
+ module_metadata=ORIGIN_MODULE_METADATA,
)
- assert loader.get_versions() == ["0.01", "0.05"]
-def test_get_default_version(requests_mock_datadir, swh_storage):
- loader = CpanLoader(
- swh_storage,
- url=ORIGINS[0],
- )
- assert loader.get_default_version() == "0.05"
+def test_get_versions(cpan_loader):
+ assert cpan_loader.get_versions() == ["0.01", "0.05"]
-def test_cpan_loader_load_multiple_version(datadir, requests_mock_datadir, swh_storage):
- loader = CpanLoader(
- swh_storage,
- url=ORIGINS[0],
- )
- load_status = loader.load()
+def test_get_default_version(cpan_loader):
+ assert cpan_loader.get_default_version() == "0.05"
+
+
+def test_cpan_loader_load_multiple_version(cpan_loader):
+
+ load_status = cpan_loader.load()
assert load_status["status"] == "eventful"
assert load_status["snapshot_id"] is not None
expected_snapshot_id = "848ee8d69d33481c88ab81f6794f6504190f011f"
+ expected_head_release = "07382fd255ec0fc293b92aeb7e68b3fe31c174f9"
assert expected_snapshot_id == load_status["snapshot_id"]
@@ -58,7 +104,7 @@
target_type=TargetType.RELEASE,
),
b"releases/0.05": SnapshotBranch(
- target=hash_to_bytes("07382fd255ec0fc293b92aeb7e68b3fe31c174f9"),
+ target=hash_to_bytes(expected_head_release),
target_type=TargetType.RELEASE,
),
b"HEAD": SnapshotBranch(
@@ -68,9 +114,11 @@
},
)
- check_snapshot(expected_snapshot, swh_storage)
+ storage = cpan_loader.storage
+
+ check_snapshot(expected_snapshot, storage)
- stats = get_stats(swh_storage)
+ stats = get_stats(storage)
assert {
"content": 2,
"directory": 4,
@@ -82,14 +130,13 @@
"snapshot": 1,
} == stats
- assert swh_storage.release_get(
- [hash_to_bytes("07382fd255ec0fc293b92aeb7e68b3fe31c174f9")]
- )[0] == Release(
+ head_release = storage.release_get([hash_to_bytes(expected_head_release)])[0]
+
+ assert head_release == Release(
name=b"0.05",
- message=b"Synthetic release for Perl source package Internals-CountObjects"
- b" version 0.05\n",
+ message=b"Synthetic release for Perl source package Internals-CountObjects version 0.05\n",
target=hash_to_bytes("af3f6a43eaf4b26dbcadb1101e8d81db6d6151e0"),
- target_type=ObjectType.DIRECTORY,
+ target_type=ModelObjectType.DIRECTORY,
synthetic=True,
author=Person(
fullname=b"Josh Jore <jjore@cpan.org>",
@@ -97,12 +144,12 @@
email=b"jjore@cpan.org",
),
date=TimestampWithTimezone.from_iso8601("2011-06-11T05:23:31+00:00"),
- id=hash_to_bytes("07382fd255ec0fc293b92aeb7e68b3fe31c174f9"),
+ id=hash_to_bytes(expected_head_release),
)
assert_last_visit_matches(
- swh_storage,
- url=ORIGINS[0],
+ storage,
+ url=ORIGIN_URL,
status="full",
type="cpan",
snapshot=expected_snapshot.id,
diff --git a/swh/loader/package/cpan/tests/test_tasks.py b/swh/loader/package/cpan/tests/test_tasks.py
--- a/swh/loader/package/cpan/tests/test_tasks.py
+++ b/swh/loader/package/cpan/tests/test_tasks.py
@@ -9,6 +9,13 @@
from swh.scheduler.model import ListedOrigin, Lister
+from .test_cpan import (
+ API_BASE_URL,
+ ORIGIN_ARTIFACTS,
+ ORIGIN_MODULE_METADATA,
+ ORIGIN_URL,
+)
+
NAMESPACE = "swh.loader.package.cpan"
@@ -21,8 +28,13 @@
def cpan_listed_origin(cpan_lister):
return ListedOrigin(
lister_id=cpan_lister.id,
- url="https://metacpan.org/dist/Software-Packager",
+ url=ORIGIN_URL,
visit_type="cpan",
+ extra_loader_arguments={
+ "api_base_url": API_BASE_URL,
+ "artifacts": ORIGIN_ARTIFACTS,
+ "module_metadata": ORIGIN_MODULE_METADATA,
+ },
)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Dec 19, 8:09 AM (11 h, 18 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226384
Attached To
D8616: cpan: Align loader implementation with latest lister improvements
Event Timeline
Log In to Comment