diff --git a/swh/lister/cpan/lister.py b/swh/lister/cpan/lister.py --- a/swh/lister/cpan/lister.py +++ b/swh/lister/cpan/lister.py @@ -50,7 +50,9 @@ "version", ] OPTIONAL_DOC_FIELDS = ["date", "author", "stat.size", "name", "metadata.author"] - ORIGIN_URL_PATTERN = "https://metacpan.org/dist/{module_name}" + # CPAN hosts legacy modules known as backpan that do not have an HTML landing page + # so use fake origin URL pattern below instead + ORIGIN_URL_PATTERN = "cpan://{author}/{module_name}" EXTRINSIC_METADATA_URL_PATTERN = BASE_URL + "/release/{author}/{release_name}" def __init__( @@ -175,10 +177,13 @@ for module_name in module_names: module_metadata = self.module_metadata[module_name] + author = module_metadata[0]["cpan_author"] yield ListedOrigin( lister_id=self.lister_obj.id, visit_type=self.VISIT_TYPE, - url=self.ORIGIN_URL_PATTERN.format(module_name=module_name), + url=self.ORIGIN_URL_PATTERN.format( + author=author, module_name=module_name + ), last_update=max(self.release_dates[module_name]), extra_loader_arguments={ "artifacts": self.artifacts[module_name], diff --git a/swh/lister/cpan/tests/test_lister.py b/swh/lister/cpan/tests/test_lister.py --- a/swh/lister/cpan/tests/test_lister.py +++ b/swh/lister/cpan/tests/test_lister.py @@ -129,7 +129,7 @@ author = release["_source"]["author"] author_fullname = release["_source"]["metadata"]["author"][0] date = release["_source"]["date"] - origin_url = f"https://metacpan.org/dist/{distribution}" + origin_url = f"cpan://{author}/{distribution}" version = get_module_version(distribution, version, release_name)