Page MenuHomeSoftware Heritage

D8649.id31232.diff
No OneTemporary

D8649.id31232.diff

diff --git a/swh/lister/cpan/lister.py b/swh/lister/cpan/lister.py
--- a/swh/lister/cpan/lister.py
+++ b/swh/lister/cpan/lister.py
@@ -50,7 +50,9 @@
"version",
]
OPTIONAL_DOC_FIELDS = ["date", "author", "stat.size", "name", "metadata.author"]
- ORIGIN_URL_PATTERN = "https://metacpan.org/dist/{module_name}"
+ # CPAN hosts legacy modules known as backpan that do not have an HTML landing page
+ # so use fake origin URL pattern below instead
+ ORIGIN_URL_PATTERN = "cpan://{author}/{module_name}"
EXTRINSIC_METADATA_URL_PATTERN = BASE_URL + "/release/{author}/{release_name}"
def __init__(
@@ -175,10 +177,13 @@
for module_name in module_names:
module_metadata = self.module_metadata[module_name]
+ author = module_metadata[0]["cpan_author"]
yield ListedOrigin(
lister_id=self.lister_obj.id,
visit_type=self.VISIT_TYPE,
- url=self.ORIGIN_URL_PATTERN.format(module_name=module_name),
+ url=self.ORIGIN_URL_PATTERN.format(
+ author=author, module_name=module_name
+ ),
last_update=max(self.release_dates[module_name]),
extra_loader_arguments={
"artifacts": self.artifacts[module_name],
diff --git a/swh/lister/cpan/tests/test_lister.py b/swh/lister/cpan/tests/test_lister.py
--- a/swh/lister/cpan/tests/test_lister.py
+++ b/swh/lister/cpan/tests/test_lister.py
@@ -129,7 +129,7 @@
author = release["_source"]["author"]
author_fullname = release["_source"]["metadata"]["author"][0]
date = release["_source"]["date"]
- origin_url = f"https://metacpan.org/dist/{distribution}"
+ origin_url = f"cpan://{author}/{distribution}"
version = get_module_version(distribution, version, release_name)

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 19, 9:31 AM (15 h, 19 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3215720

Event Timeline