Page MenuHomeSoftware Heritage

D8616.id31294.diff
No OneTemporary

D8616.id31294.diff

diff --git a/swh/loader/package/cpan/loader.py b/swh/loader/package/cpan/loader.py
--- a/swh/loader/package/cpan/loader.py
+++ b/swh/loader/package/cpan/loader.py
@@ -5,8 +5,9 @@
from datetime import datetime
import json
+import logging
from pathlib import Path
-from typing import Any, Dict, Iterator, Optional, Sequence, Tuple
+from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple
import attr
import iso8601
@@ -14,16 +15,12 @@
import yaml
from swh.loader.package.loader import BasePackageInfo, PackageLoader
-from swh.loader.package.utils import (
- EMPTY_AUTHOR,
- Person,
- cached_method,
- get_url_body,
- release_name,
-)
+from swh.loader.package.utils import EMPTY_AUTHOR, Person, release_name
from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone
from swh.storage.interface import StorageInterface
+logger = logging.getLogger(__name__)
+
@attr.s
class CpanPackageInfo(BasePackageInfo):
@@ -31,9 +28,6 @@
name = attr.ib(type=str)
"""Name of the package"""
- filename = attr.ib(type=str)
- """Archive (tar.gz) file name"""
-
version = attr.ib(type=str)
"""Current version"""
@@ -47,7 +41,7 @@
def extract_intrinsic_metadata(dir_path: Path) -> Dict[str, Any]:
"""Extract intrinsic metadata from META.json file at dir_path.
- Each Perl package version has a META.json file at the root of the archive,
+ Most Perl package version have a META.json file at the root of the archive,
or a META.yml for older version.
See https://perldoc.perl.org/CPAN::Meta for META specifications.
@@ -59,12 +53,11 @@
A dict mapping from yaml parser
"""
meta_json_path = dir_path / "META.json"
+ meta_yml_path = dir_path / "META.yml"
metadata: Dict[str, Any] = {}
if meta_json_path.exists():
metadata = json.loads(meta_json_path.read_text())
-
- meta_yml_path = dir_path / "META.yml"
- if meta_yml_path.exists():
+ elif meta_yml_path.exists():
metadata = yaml.safe_load(meta_yml_path.read_text())
return metadata
@@ -77,23 +70,22 @@
self,
storage: StorageInterface,
url: str,
+ api_base_url: str,
+ artifacts: List[Dict[str, Any]],
+ module_metadata: List[Dict[str, Any]],
**kwargs,
):
super().__init__(storage=storage, url=url, **kwargs)
self.url = url
-
- @cached_method
- def info_versions(self) -> Dict:
- """Return the package versions (fetched from
- ``https://fastapi.metacpan.org/v1/release/versions/{pkgname}``)
-
- Api documentation https://cpan.haskell.org/api
- """
- pkgname = self.url.split("/")[-1]
- url = f"https://fastapi.metacpan.org/v1/release/versions/{pkgname}"
- data = json.loads(get_url_body(url=url, headers={"Accept": "application/json"}))
- return {release["version"]: release for release in data["releases"]}
+ self.api_base_url = api_base_url
+ self.artifacts: Dict[str, Dict] = {
+ artifact["version"]: {k: v for k, v in artifact.items() if k != "version"}
+ for artifact in artifacts
+ }
+ self.module_metadata: Dict[str, Dict] = {
+ meta["version"]: meta for meta in module_metadata
+ }
def get_versions(self) -> Sequence[str]:
"""Get all released versions of a Perl package
@@ -105,7 +97,7 @@
["0.1.1", "0.10.2"]
"""
- versions = list(self.info_versions().keys())
+ versions = list(self.artifacts.keys())
versions.sort(key=parse_version)
return versions
@@ -130,25 +122,24 @@
Returns:
Iterator of tuple (release_name, p_info)
"""
- data = self.info_versions()[version]
- pkgname: str = self.url.split("/")[-1]
- url: str = data["download_url"]
- filename: str = url.split("/")[-1]
- # The api does not provide an explicit timezone, defaults to UTC
- last_modified = iso8601.parse_date(data["date"])
-
- if "author" in data:
- author = Person.from_fullname(data["author"].encode())
- else:
- author = EMPTY_AUTHOR
+ artifact = self.artifacts[version]
+ metadata = self.module_metadata[version]
+
+ last_modified = iso8601.parse_date(metadata["date"])
+ author = (
+ Person.from_fullname(metadata["author"].encode())
+ if metadata["author"]
+ else EMPTY_AUTHOR
+ )
p_info = CpanPackageInfo(
- name=pkgname,
- filename=filename,
- url=url,
+ name=metadata["name"],
+ filename=artifact["filename"],
+ url=artifact["url"],
version=version,
last_modified=last_modified,
author=author,
+ checksums=artifact["checksums"],
)
yield release_name(version), p_info
@@ -161,11 +152,6 @@
Path(uncompressed_path) / f"{p_info.name}-{p_info.version}"
)
- name: str = intrinsic_metadata["name"]
- assert name == p_info.name
- version: str = str(intrinsic_metadata["version"])
- assert version == p_info.version
-
# author data from http endpoint are less complete than from META
if "author" in intrinsic_metadata:
author_data = intrinsic_metadata["author"]
@@ -178,11 +164,12 @@
author = p_info.author
message = (
- f"Synthetic release for Perl source package {name} version {version}\n"
+ f"Synthetic release for Perl source package {p_info.name} "
+ f"version {p_info.version}\n"
)
return Release(
- name=version.encode(),
+ name=p_info.version.encode(),
author=author,
date=TimestampWithTimezone.from_datetime(p_info.last_modified),
message=message.encode(),
diff --git a/swh/loader/package/cpan/tests/data/fake_cpan.sh b/swh/loader/package/cpan/tests/data/fake_cpan.sh
deleted file mode 100644
--- a/swh/loader/package/cpan/tests/data/fake_cpan.sh
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/env bash
-
-# Script to generate fake Perl package archives as .tar.gz.
-
-set -euo pipefail
-
-# Create directories
-readonly TMP=tmp_dir/cpan
-readonly BASE_PATH=https_cpan.metacpan.org
-
-mkdir -p $TMP
-
-# tar.gz package archives
-# Perl package tar.gz archive needs at least one directory with a META.json or META.yml file
-mkdir -p ${TMP}/Internals-CountObjects-0.01
-mkdir -p ${TMP}/Internals-CountObjects-0.05
-mkdir -p $BASE_PATH
-
-echo -e """---
-abstract: 'Report all allocated perl objects'
-author:
- - 'Josh Jore <jjore@cpan.org>'
-build_requires: {}
-configure_requires:
- ExtUtils::MakeMaker: 6.31
-dynamic_config: 0
-generated_by: 'Dist::Zilla version 4.200000, CPAN::Meta::Converter version 2.102400'
-license: perl
-meta-spec:
- url: http://module-build.sourceforge.net/META-spec-v1.4.html
- version: 1.4
-name: Internals-CountObjects
-version: 0.01
-""" > ${TMP}/Internals-CountObjects-0.01/META.yml
-
-echo -e '''{
- "abstract" : "Report all allocated perl objects",
- "author" : [
- "Josh Jore <jjore@cpan.org>"
- ],
- "dynamic_config" : 0,
- "generated_by" : "Dist::Zilla version 4.200000, CPAN::Meta::Converter version 2.102400",
- "license" : [
- "perl_5"
- ],
- "meta-spec" : {
- "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
- "version" : "2"
- },
- "name" : "Internals-CountObjects",
- "prereqs" : {
- "build" : {
- "requires" : {
- "ExtUtils::CBuilder" : 0
- }
- }
- },
- "release_status" : "stable",
- "resources" : {
- "bugtracker" : {
- "mailto" : "bug-Internals-CountObjects@rt.cpan.org",
- "web" : "http://rt.cpan.org/NoAuth/Bugs.html?Dist=Internals-CountObjects"
- },
- "homepage" : "http://search.cpan.org/dist/Internals-CountObjects",
- "repository" : {
- "type" : "git",
- "url" : "git://github.com/jbenjore/Internals-CountObjects.git",
- "web" : "http://github.com/jbenjore/Internals-CountObjects"
- }
- },
- "version" : "0.05"
-}
-''' > ${TMP}/Internals-CountObjects-0.05/META.json
-
-cd $TMP
-
-# Tar compress
-tar -czf authors_id_J_JJ_JJORE_Internals-CountObjects-0.01.tar.gz Internals-CountObjects-0.01
-tar -czf authors_id_J_JJ_JJORE_Internals-CountObjects-0.05.tar.gz Internals-CountObjects-0.05
-
-# Move .tar.gz archives to a servable directory
-mv *.tar.gz ../../$BASE_PATH
-
-# Clean up removing tmp_dir
-cd ../../
-rm -r tmp_dir/
diff --git a/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_versions_Internals-CountObjects b/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_versions_Internals-CountObjects
deleted file mode 100644
--- a/swh/loader/package/cpan/tests/data/https_fastapi.metacpan.org/v1_release_versions_Internals-CountObjects
+++ /dev/null
@@ -1,26 +0,0 @@
-{
- "took" : 3,
- "releases" : [
- {
- "maturity" : "released",
- "authorized" : true,
- "date" : "2011-06-11T05:23:31",
- "name" : "Internals-CountObjects-0.05",
- "version" : "0.05",
- "author" : "JJORE",
- "download_url" : "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.05.tar.gz",
- "status" : "latest"
- },
- {
- "authorized" : true,
- "date" : "2011-06-05T18:44:02",
- "maturity" : "released",
- "name" : "Internals-CountObjects-0.01",
- "version" : "0.01",
- "author" : "JJORE",
- "download_url" : "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.01.tar.gz",
- "status" : "cpan"
- }
- ],
- "total" : 4
-}
diff --git a/swh/loader/package/cpan/tests/test_cpan.py b/swh/loader/package/cpan/tests/test_cpan.py
--- a/swh/loader/package/cpan/tests/test_cpan.py
+++ b/swh/loader/package/cpan/tests/test_cpan.py
@@ -3,11 +3,15 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+# flake8: noqa: B950
+
+import pytest
+
+from swh.loader.package import __version__
from swh.loader.package.cpan.loader import CpanLoader
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
from swh.model.hashutil import hash_to_bytes
from swh.model.model import (
- ObjectType,
Person,
Release,
Snapshot,
@@ -15,38 +19,80 @@
TargetType,
TimestampWithTimezone,
)
+from swh.model.model import ObjectType as ModelObjectType
+
+ORIGIN_URL = "https://metacpan.org/dist/Internals-CountObjects"
-ORIGINS = [
- "https://metacpan.org/dist/Internals-CountObjects",
+API_BASE_URL = "https://fastapi.metacpan.org/v1"
+
+ORIGIN_ARTIFACTS = [
+ {
+ "url": "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.05.tar.gz",
+ "filename": "CountObjects-0.05.tar.gz",
+ "version": "0.05",
+ "length": 632,
+ "checksums": {
+ "sha256": "e0ecf6ab4873fa55ff74da22a3c4ae0ab6a1409635c9cd2d6059abbb32be3a6a"
+ },
+ },
+ {
+ "url": "https://cpan.metacpan.org/authors/id/J/JJ/JJORE/Internals-CountObjects-0.01.tar.gz",
+ "filename": "CountObjects-0.01.tar.gz",
+ "version": "0.01",
+ "length": 453,
+ "checksums": {
+ "sha256": "a368004ab98c5860a8fd87e0a4c44e4ee2d1b95d9b13597519a0e644c167468a"
+ },
+ },
+]
+
+ORIGIN_MODULE_METADATA = [
+ {
+ "name": "Internals-CountObjects",
+ "version": "0.05",
+ "author": "Josh Jore <jjore@cpan.org>",
+ "cpan_author": "JJORE",
+ "date": "2011-06-11T05:23:31",
+ "release_name": "Internals-CountObjects-0.05",
+ },
+ {
+ "name": "Internals-CountObjects",
+ "version": "0.01",
+ "author": "Josh Jore <jjore@cpan.org>",
+ "cpan_author": "JJORE",
+ "date": "2011-06-05T18:44:02",
+ "release_name": "Internals-CountObjects-0.01",
+ },
]
-def test_get_versions(requests_mock_datadir, swh_storage):
- loader = CpanLoader(
+@pytest.fixture
+def cpan_loader(requests_mock_datadir, swh_storage):
+ return CpanLoader(
swh_storage,
- url=ORIGINS[0],
+ url=ORIGIN_URL,
+ api_base_url=API_BASE_URL,
+ artifacts=ORIGIN_ARTIFACTS,
+ module_metadata=ORIGIN_MODULE_METADATA,
)
- assert loader.get_versions() == ["0.01", "0.05"]
-def test_get_default_version(requests_mock_datadir, swh_storage):
- loader = CpanLoader(
- swh_storage,
- url=ORIGINS[0],
- )
- assert loader.get_default_version() == "0.05"
+def test_get_versions(cpan_loader):
+ assert cpan_loader.get_versions() == ["0.01", "0.05"]
-def test_cpan_loader_load_multiple_version(datadir, requests_mock_datadir, swh_storage):
- loader = CpanLoader(
- swh_storage,
- url=ORIGINS[0],
- )
- load_status = loader.load()
+def test_get_default_version(cpan_loader):
+ assert cpan_loader.get_default_version() == "0.05"
+
+
+def test_cpan_loader_load_multiple_version(cpan_loader):
+
+ load_status = cpan_loader.load()
assert load_status["status"] == "eventful"
assert load_status["snapshot_id"] is not None
expected_snapshot_id = "848ee8d69d33481c88ab81f6794f6504190f011f"
+ expected_head_release = "07382fd255ec0fc293b92aeb7e68b3fe31c174f9"
assert expected_snapshot_id == load_status["snapshot_id"]
@@ -58,7 +104,7 @@
target_type=TargetType.RELEASE,
),
b"releases/0.05": SnapshotBranch(
- target=hash_to_bytes("07382fd255ec0fc293b92aeb7e68b3fe31c174f9"),
+ target=hash_to_bytes(expected_head_release),
target_type=TargetType.RELEASE,
),
b"HEAD": SnapshotBranch(
@@ -68,9 +114,11 @@
},
)
- check_snapshot(expected_snapshot, swh_storage)
+ storage = cpan_loader.storage
+
+ check_snapshot(expected_snapshot, storage)
- stats = get_stats(swh_storage)
+ stats = get_stats(storage)
assert {
"content": 2,
"directory": 4,
@@ -82,14 +130,13 @@
"snapshot": 1,
} == stats
- assert swh_storage.release_get(
- [hash_to_bytes("07382fd255ec0fc293b92aeb7e68b3fe31c174f9")]
- )[0] == Release(
+ head_release = storage.release_get([hash_to_bytes(expected_head_release)])[0]
+
+ assert head_release == Release(
name=b"0.05",
- message=b"Synthetic release for Perl source package Internals-CountObjects"
- b" version 0.05\n",
+ message=b"Synthetic release for Perl source package Internals-CountObjects version 0.05\n",
target=hash_to_bytes("af3f6a43eaf4b26dbcadb1101e8d81db6d6151e0"),
- target_type=ObjectType.DIRECTORY,
+ target_type=ModelObjectType.DIRECTORY,
synthetic=True,
author=Person(
fullname=b"Josh Jore <jjore@cpan.org>",
@@ -97,12 +144,12 @@
email=b"jjore@cpan.org",
),
date=TimestampWithTimezone.from_iso8601("2011-06-11T05:23:31+00:00"),
- id=hash_to_bytes("07382fd255ec0fc293b92aeb7e68b3fe31c174f9"),
+ id=hash_to_bytes(expected_head_release),
)
assert_last_visit_matches(
- swh_storage,
- url=ORIGINS[0],
+ storage,
+ url=ORIGIN_URL,
status="full",
type="cpan",
snapshot=expected_snapshot.id,
diff --git a/swh/loader/package/cpan/tests/test_tasks.py b/swh/loader/package/cpan/tests/test_tasks.py
--- a/swh/loader/package/cpan/tests/test_tasks.py
+++ b/swh/loader/package/cpan/tests/test_tasks.py
@@ -9,6 +9,13 @@
from swh.scheduler.model import ListedOrigin, Lister
+from .test_cpan import (
+ API_BASE_URL,
+ ORIGIN_ARTIFACTS,
+ ORIGIN_MODULE_METADATA,
+ ORIGIN_URL,
+)
+
NAMESPACE = "swh.loader.package.cpan"
@@ -21,8 +28,13 @@
def cpan_listed_origin(cpan_lister):
return ListedOrigin(
lister_id=cpan_lister.id,
- url="https://metacpan.org/dist/Software-Packager",
+ url=ORIGIN_URL,
visit_type="cpan",
+ extra_loader_arguments={
+ "api_base_url": API_BASE_URL,
+ "artifacts": ORIGIN_ARTIFACTS,
+ "module_metadata": ORIGIN_MODULE_METADATA,
+ },
)

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 19, 10:53 AM (20 h, 2 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226384

Event Timeline