Page MenuHomeSoftware Heritage

D8648.id31259.diff
No OneTemporary

D8648.id31259.diff

diff --git a/swh/lister/cpan/lister.py b/swh/lister/cpan/lister.py
--- a/swh/lister/cpan/lister.py
+++ b/swh/lister/cpan/lister.py
@@ -6,7 +6,7 @@
from collections import defaultdict
from datetime import datetime
import logging
-from typing import Any, Dict, Iterator, List, Optional, Set
+from typing import Any, Dict, Iterator, List, Optional, Set, Union
import iso8601
@@ -46,6 +46,18 @@
return field_value
+def get_module_version(
+ module_name: str, module_version: Union[str, float, int], release_name: str
+) -> str:
+ # some old versions fail to be parsed and cpan api set version to 0
+ if module_version == 0:
+ prefix = f"{module_name}-"
+ if release_name.startswith(prefix):
+ # extract version from release name
+ module_version = release_name.replace(prefix, "", 1)
+ return str(module_version)
+
+
class CpanLister(StatelessLister[CpanListerPage]):
"""The Cpan lister list origins from 'Cpan', the Comprehensive Perl Archive
Network."""
@@ -103,6 +115,10 @@
module_author_fullname = get_field_value(entry, "metadata.author")
release_name = get_field_value(entry, "name")
+ module_version = get_module_version(
+ module_name, module_version, release_name
+ )
+
self.artifacts[module_name].append(
{
"url": module_download_url,
diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page1
rename from swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll
rename to swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page1
diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page2 b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page2
new file mode 100644
--- /dev/null
+++ b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page2
@@ -0,0 +1,39 @@
+{
+ "_shards": {
+ "successful": 3,
+ "failed": 0,
+ "total": 3
+ },
+ "hits": {
+ "max_score": 16.105877,
+ "hits": [
+ {
+ "_id": "FM3U2W_LR4pgKJepBaDKUb4WEy0",
+ "_index": "cpan_v1_01",
+ "_type": "release",
+ "_source": {
+ "distribution": "UDPServersAndClients",
+ "date": "2006-04-20T00:03:25",
+ "checksum_sha256": "763da87c32e65cc7ff72d70a503b4e9497f6b506c174b82c97671af8667c1922",
+ "stat": {
+ "size": 5576
+ },
+ "author": "ROBINBANK",
+ "version": 0,
+ "download_url": "https://cpan.metacpan.org/authors/id/R/RO/ROBINBANK/UDPServersAndClients.zip",
+ "metadata": {
+ "author": [
+ "unknown"
+ ]
+ },
+ "name": "UDPServersAndClients"
+ },
+ "_score": 16.105877
+ }
+ ],
+ "total": 1
+ },
+ "took": 2,
+ "timed_out": false,
+ "_scroll_id": "cXVlcnlUaGVuRmV0Y2g7Mzs5OTQ2NzY3ODU6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTQ2NzY3ODQ6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTUyMzQzMTA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw=="
+}
\ No newline at end of file
diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page3 b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page3
new file mode 100644
--- /dev/null
+++ b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page3
@@ -0,0 +1,85 @@
+{
+ "took": 3,
+ "_shards": {
+ "successful": 3,
+ "failed": 0,
+ "total": 3
+ },
+ "timed_out": false,
+ "hits": {
+ "max_score": 13.962857,
+ "hits": [
+ {
+ "_score": 13.962857,
+ "_type": "release",
+ "_source": {
+ "version": 0,
+ "checksum_sha256": "a19fa7e735ea3406dfeb9c72f35fb2b64fda1e8035ce6ba0fabc15ce1c1e2f41",
+ "metadata": {
+ "author": [
+ "unknown"
+ ]
+ },
+ "author": "MICB",
+ "name": "Compiler-a3",
+ "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MICB/Compiler-a3.tar.gz",
+ "date": "1996-09-02T14:04:00",
+ "stat": {
+ "size": 89134
+ },
+ "distribution": "Compiler"
+ },
+ "_id": "aBI9p6X_yq6r9e8pk7U17pbZMPM",
+ "_index": "cpan_v1_01"
+ },
+ {
+ "_score": 13.707853,
+ "_source": {
+ "checksum_sha256": "def01b544d23c76ec19cc2288a3295b39abcdbdea6dbded5b7fe6d17cd4525de",
+ "version": 0,
+ "name": "Compiler-a2",
+ "author": "MICB",
+ "metadata": {
+ "author": [
+ "unknown"
+ ]
+ },
+ "date": "1996-08-22T14:30:00",
+ "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MICB/Compiler-a2.tar.gz",
+ "distribution": "Compiler",
+ "stat": {
+ "size": 85123
+ }
+ },
+ "_type": "release",
+ "_id": "fG9UelWPReQei13FQ4EAHytuZCo",
+ "_index": "cpan_v1_01"
+ },
+ {
+ "_source": {
+ "checksum_sha256": "b1f7afd4fa8825adf2c17a0cbd8706484e6d2da5294786a5e6e49c205708ee41",
+ "version": 0,
+ "name": "Compiler-a1",
+ "metadata": {
+ "author": [
+ "unknown"
+ ]
+ },
+ "author": "MICB",
+ "date": "1996-05-13T11:39:00",
+ "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MICB/Compiler-a1.tar.gz",
+ "stat": {
+ "size": 61093
+ },
+ "distribution": "Compiler"
+ },
+ "_type": "release",
+ "_id": "8H7BRLllDoyILyqsjjV8sqkBpQY",
+ "_index": "cpan_v1_01",
+ "_score": 13.572314
+ }
+ ],
+ "total": 3
+ },
+ "_scroll_id": "cXVlcnlUaGVuRmV0Y2g7Mzs5OTQ2NzY3ODU6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTQ2NzY3ODQ6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTUyMzQzMTA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw=="
+}
\ No newline at end of file
diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page4 b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page4
new file mode 100644
--- /dev/null
+++ b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page4
@@ -0,0 +1,131 @@
+{
+ "timed_out": false,
+ "_shards": {
+ "failed": 0,
+ "total": 3,
+ "successful": 3
+ },
+ "took": 14,
+ "hits": {
+ "total": 5,
+ "hits": [
+ {
+ "_score": 14.460719,
+ "_type": "release",
+ "_source": {
+ "stat": {
+ "size": 10738
+ },
+ "download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.03-TRIAL1.tar.gz",
+ "distribution": "Call-Context",
+ "author": "FELIPE",
+ "version": "0.03-TRIAL1",
+ "checksum_sha256": "82aa854d6ae68342b58361b089c7f480b5b75e94f0c85c1d311f8cace1bfadea",
+ "metadata": {
+ "author": [
+ "Felipe Gasper (FELIPE)"
+ ]
+ },
+ "name": "Call-Context-0.03-TRIAL1",
+ "date": "2018-10-25T03:47:31"
+ },
+ "_index": "cpan_v1_01",
+ "_id": "Cjw1voci7z74uflSPriBTT_A_5c"
+ },
+ {
+ "_id": "VdVDByg5PHxbDh9HnvKAzf8QOws",
+ "_index": "cpan_v1_01",
+ "_source": {
+ "download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.01.tar.gz",
+ "stat": {
+ "size": 10019
+ },
+ "author": "FELIPE",
+ "distribution": "Call-Context",
+ "version": 0.01,
+ "date": "2016-11-12T23:12:54",
+ "checksum_sha256": "21bf762ef5b3cbf1047192c2a3c499e9bd315b11e5530bd133856cdf87187b24",
+ "name": "Call-Context-0.01",
+ "metadata": {
+ "author": [
+ "Felipe Gasper (FELIPE)"
+ ]
+ }
+ },
+ "_type": "release",
+ "_score": 14.460719
+ },
+ {
+ "_score": 14.314282,
+ "_id": "_MA6FD8SOhOmTG8JUhvl3CN186I",
+ "_type": "release",
+ "_source": {
+ "stat": {
+ "size": 10046
+ },
+ "download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.02.tar.gz",
+ "distribution": "Call-Context",
+ "author": "FELIPE",
+ "version": 0.02,
+ "metadata": {
+ "author": [
+ "Felipe Gasper (FELIPE)"
+ ]
+ },
+ "checksum_sha256": "b80d977f1df0e08bda2808124cd7218ad83f802e1a54aa258e17748ff5c02a0a",
+ "name": "Call-Context-0.02",
+ "date": "2016-11-13T01:07:43"
+ },
+ "_index": "cpan_v1_01"
+ },
+ {
+ "_id": "veMmCu9wirwpTX7czbuQq6SnKQQ",
+ "_type": "release",
+ "_source": {
+ "stat": {
+ "size": 10741
+ },
+ "download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.03-TRIAL2.tar.gz",
+ "distribution": "Call-Context",
+ "author": "FELIPE",
+ "version": "0.03-TRIAL2",
+ "name": "Call-Context-0.03-TRIAL2",
+ "metadata": {
+ "author": [
+ "Felipe Gasper (FELIPE)"
+ ]
+ },
+ "checksum_sha256": "4ca799d81fc96a774f4f315c38eb3e53616322c332d47f1e3f756814b5bf4b5e",
+ "date": "2018-10-26T13:56:41"
+ },
+ "_index": "cpan_v1_01",
+ "_score": 14.291793
+ },
+ {
+ "_type": "release",
+ "_source": {
+ "version": "0.03",
+ "date": "2018-10-27T00:20:13",
+ "checksum_sha256": "0ee6bf46bc72755adb7a6b08e79d12e207de5f7809707b3c353b58cb2f0b5a26",
+ "metadata": {
+ "author": [
+ "Felipe Gasper (FELIPE)"
+ ]
+ },
+ "name": "Call-Context-0.03",
+ "download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.03.tar.gz",
+ "stat": {
+ "size": 10730
+ },
+ "author": "FELIPE",
+ "distribution": "Call-Context"
+ },
+ "_index": "cpan_v1_01",
+ "_id": "CAAVfGh_7XpKnzpnLVaBKg8IPMM",
+ "_score": 14.291793
+ }
+ ],
+ "max_score": 14.460719
+ },
+ "_scroll_id": "cXVlcnlUaGVuRmV0Y2g7Mzs5OTQ2NzY3ODU6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTQ2NzY3ODQ6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTUyMzQzMTA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw=="
+}
\ No newline at end of file
diff --git a/swh/lister/cpan/tests/test_lister.py b/swh/lister/cpan/tests/test_lister.py
--- a/swh/lister/cpan/tests/test_lister.py
+++ b/swh/lister/cpan/tests/test_lister.py
@@ -10,7 +10,7 @@
import pytest
-from swh.lister.cpan.lister import CpanLister
+from swh.lister.cpan.lister import CpanLister, get_module_version
@pytest.fixture
@@ -20,16 +20,42 @@
)
-@pytest.fixture
-def release_scroll_first_response(datadir):
+def release_scroll_response(datadir, page):
return json.loads(
- Path(datadir, "https_fastapi.metacpan.org", "v1__search_scroll").read_bytes()
+ Path(
+ datadir, "https_fastapi.metacpan.org", f"v1__search_scroll_page{page}"
+ ).read_bytes()
)
+@pytest.fixture
+def release_scroll_first_response(datadir):
+ return release_scroll_response(datadir, page=1)
+
+
+@pytest.fixture
+def release_scroll_second_response(datadir):
+ return release_scroll_response(datadir, page=2)
+
+
+@pytest.fixture
+def release_scroll_third_response(datadir):
+ return release_scroll_response(datadir, page=3)
+
+
+@pytest.fixture
+def release_scroll_fourth_response(datadir):
+ return release_scroll_response(datadir, page=4)
+
+
@pytest.fixture(autouse=True)
def mock_network_requests(
- requests_mock, release_search_response, release_scroll_first_response
+ requests_mock,
+ release_search_response,
+ release_scroll_first_response,
+ release_scroll_second_response,
+ release_scroll_third_response,
+ release_scroll_fourth_response,
):
requests_mock.get(
"https://fastapi.metacpan.org/v1/release/_search",
@@ -41,13 +67,45 @@
{
"json": release_scroll_first_response,
},
+ {
+ "json": release_scroll_second_response,
+ },
+ {
+ "json": release_scroll_third_response,
+ },
+ {
+ "json": release_scroll_fourth_response,
+ },
{"json": {"hits": {"hits": []}, "_scroll_id": ""}},
],
)
+@pytest.mark.parametrize(
+ "module_name,module_version,release_name,expected_version",
+ [
+ ("Validator-Custom", "0.1207", "Validator-Custom-0.1207", "0.1207"),
+ ("UDPServersAndClients", 0, "UDPServersAndClients", "0"),
+ ("Compiler", 0, "Compiler-a1", "a1"),
+ ("Call-Context", 0.01, "Call-Context-0.01", "0.01"),
+ ],
+)
+def test_get_module_version(
+ module_name, module_version, release_name, expected_version
+):
+ assert (
+ get_module_version(module_name, module_version, release_name)
+ == expected_version
+ )
+
+
def test_cpan_lister(
- swh_scheduler, release_search_response, release_scroll_first_response
+ swh_scheduler,
+ release_search_response,
+ release_scroll_first_response,
+ release_scroll_second_response,
+ release_scroll_third_response,
+ release_scroll_fourth_response,
):
lister = CpanLister(scheduler=swh_scheduler)
res = lister.run()
@@ -58,6 +116,9 @@
for release in chain(
release_search_response["hits"]["hits"],
release_scroll_first_response["hits"]["hits"],
+ release_scroll_second_response["hits"]["hits"],
+ release_scroll_third_response["hits"]["hits"],
+ release_scroll_fourth_response["hits"]["hits"],
):
distribution = release["_source"]["distribution"]
release_name = release["_source"]["name"]
@@ -69,6 +130,9 @@
author_fullname = release["_source"]["metadata"]["author"][0]
date = release["_source"]["date"]
origin_url = f"https://metacpan.org/dist/{distribution}"
+
+ version = get_module_version(distribution, version, release_name)
+
expected_origins.add(origin_url)
expected_artifacts[origin_url].append(
{

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 19, 9:57 AM (19 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224962

Event Timeline