diff --git a/swh/lister/cpan/lister.py b/swh/lister/cpan/lister.py --- a/swh/lister/cpan/lister.py +++ b/swh/lister/cpan/lister.py @@ -6,7 +6,7 @@ from collections import defaultdict from datetime import datetime import logging -from typing import Any, Dict, Iterator, List, Optional, Set +from typing import Any, Dict, Iterator, List, Optional, Set, Union import iso8601 @@ -21,6 +21,19 @@ CpanListerPage = Set[str] +def get_module_version( + module_name: str, module_version: Union[str, float, int], release_name: str +) -> str: + # some old versions fail to be parsed and cpan api set version to 0 + if module_version == 0: + prefix = f"{module_name}-" + if release_name.startswith(prefix): + # extract version from release name + module_version = release_name.replace(prefix, "") + # ensure str type + return str(module_version) + + class CpanLister(StatelessLister[CpanListerPage]): """The Cpan lister list origins from 'Cpan', the Comprehensive Perl Archive Network.""" @@ -89,6 +102,10 @@ module_author_fullname = self.get_field_value(entry, "metadata.author") release_name = self.get_field_value(entry, "name") + module_version = get_module_version( + module_name, module_version, release_name + ) + self.artifacts[module_name].append( { "url": module_download_url, diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page1 rename from swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll rename to swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page1 diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page2 b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page2 new file mode 100644 --- /dev/null +++ b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page2 @@ -0,0 +1,39 @@ +{ + "_shards": { + "successful": 3, + "failed": 0, + "total": 3 + }, + "hits": { + "max_score": 16.105877, + "hits": [ + { + "_id": "FM3U2W_LR4pgKJepBaDKUb4WEy0", + "_index": "cpan_v1_01", + "_type": "release", + "_source": { + "distribution": "UDPServersAndClients", + "date": "2006-04-20T00:03:25", + "checksum_sha256": "763da87c32e65cc7ff72d70a503b4e9497f6b506c174b82c97671af8667c1922", + "stat": { + "size": 5576 + }, + "author": "ROBINBANK", + "version": 0, + "download_url": "https://cpan.metacpan.org/authors/id/R/RO/ROBINBANK/UDPServersAndClients.zip", + "metadata": { + "author": [ + "unknown" + ] + }, + "name": "UDPServersAndClients" + }, + "_score": 16.105877 + } + ], + "total": 1 + }, + "took": 2, + "timed_out": false, + "_scroll_id": "cXVlcnlUaGVuRmV0Y2g7Mzs5OTQ2NzY3ODU6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTQ2NzY3ODQ6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTUyMzQzMTA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw==" +} \ No newline at end of file diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page3 b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page3 new file mode 100644 --- /dev/null +++ b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page3 @@ -0,0 +1,85 @@ +{ + "took": 3, + "_shards": { + "successful": 3, + "failed": 0, + "total": 3 + }, + "timed_out": false, + "hits": { + "max_score": 13.962857, + "hits": [ + { + "_score": 13.962857, + "_type": "release", + "_source": { + "version": 0, + "checksum_sha256": "a19fa7e735ea3406dfeb9c72f35fb2b64fda1e8035ce6ba0fabc15ce1c1e2f41", + "metadata": { + "author": [ + "unknown" + ] + }, + "author": "MICB", + "name": "Compiler-a3", + "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MICB/Compiler-a3.tar.gz", + "date": "1996-09-02T14:04:00", + "stat": { + "size": 89134 + }, + "distribution": "Compiler" + }, + "_id": "aBI9p6X_yq6r9e8pk7U17pbZMPM", + "_index": "cpan_v1_01" + }, + { + "_score": 13.707853, + "_source": { + "checksum_sha256": "def01b544d23c76ec19cc2288a3295b39abcdbdea6dbded5b7fe6d17cd4525de", + "version": 0, + "name": "Compiler-a2", + "author": "MICB", + "metadata": { + "author": [ + "unknown" + ] + }, + "date": "1996-08-22T14:30:00", + "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MICB/Compiler-a2.tar.gz", + "distribution": "Compiler", + "stat": { + "size": 85123 + } + }, + "_type": "release", + "_id": "fG9UelWPReQei13FQ4EAHytuZCo", + "_index": "cpan_v1_01" + }, + { + "_source": { + "checksum_sha256": "b1f7afd4fa8825adf2c17a0cbd8706484e6d2da5294786a5e6e49c205708ee41", + "version": 0, + "name": "Compiler-a1", + "metadata": { + "author": [ + "unknown" + ] + }, + "author": "MICB", + "date": "1996-05-13T11:39:00", + "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MICB/Compiler-a1.tar.gz", + "stat": { + "size": 61093 + }, + "distribution": "Compiler" + }, + "_type": "release", + "_id": "8H7BRLllDoyILyqsjjV8sqkBpQY", + "_index": "cpan_v1_01", + "_score": 13.572314 + } + ], + "total": 3 + }, + "_scroll_id": "cXVlcnlUaGVuRmV0Y2g7Mzs5OTQ2NzY3ODU6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTQ2NzY3ODQ6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTUyMzQzMTA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw==" +} \ No newline at end of file diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page4 b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page4 new file mode 100644 --- /dev/null +++ b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll_page4 @@ -0,0 +1,131 @@ +{ + "timed_out": false, + "_shards": { + "failed": 0, + "total": 3, + "successful": 3 + }, + "took": 14, + "hits": { + "total": 5, + "hits": [ + { + "_score": 14.460719, + "_type": "release", + "_source": { + "stat": { + "size": 10738 + }, + "download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.03-TRIAL1.tar.gz", + "distribution": "Call-Context", + "author": "FELIPE", + "version": "0.03-TRIAL1", + "checksum_sha256": "82aa854d6ae68342b58361b089c7f480b5b75e94f0c85c1d311f8cace1bfadea", + "metadata": { + "author": [ + "Felipe Gasper (FELIPE)" + ] + }, + "name": "Call-Context-0.03-TRIAL1", + "date": "2018-10-25T03:47:31" + }, + "_index": "cpan_v1_01", + "_id": "Cjw1voci7z74uflSPriBTT_A_5c" + }, + { + "_id": "VdVDByg5PHxbDh9HnvKAzf8QOws", + "_index": "cpan_v1_01", + "_source": { + "download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.01.tar.gz", + "stat": { + "size": 10019 + }, + "author": "FELIPE", + "distribution": "Call-Context", + "version": 0.01, + "date": "2016-11-12T23:12:54", + "checksum_sha256": "21bf762ef5b3cbf1047192c2a3c499e9bd315b11e5530bd133856cdf87187b24", + "name": "Call-Context-0.01", + "metadata": { + "author": [ + "Felipe Gasper (FELIPE)" + ] + } + }, + "_type": "release", + "_score": 14.460719 + }, + { + "_score": 14.314282, + "_id": "_MA6FD8SOhOmTG8JUhvl3CN186I", + "_type": "release", + "_source": { + "stat": { + "size": 10046 + }, + "download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.02.tar.gz", + "distribution": "Call-Context", + "author": "FELIPE", + "version": 0.02, + "metadata": { + "author": [ + "Felipe Gasper (FELIPE)" + ] + }, + "checksum_sha256": "b80d977f1df0e08bda2808124cd7218ad83f802e1a54aa258e17748ff5c02a0a", + "name": "Call-Context-0.02", + "date": "2016-11-13T01:07:43" + }, + "_index": "cpan_v1_01" + }, + { + "_id": "veMmCu9wirwpTX7czbuQq6SnKQQ", + "_type": "release", + "_source": { + "stat": { + "size": 10741 + }, + "download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.03-TRIAL2.tar.gz", + "distribution": "Call-Context", + "author": "FELIPE", + "version": "0.03-TRIAL2", + "name": "Call-Context-0.03-TRIAL2", + "metadata": { + "author": [ + "Felipe Gasper (FELIPE)" + ] + }, + "checksum_sha256": "4ca799d81fc96a774f4f315c38eb3e53616322c332d47f1e3f756814b5bf4b5e", + "date": "2018-10-26T13:56:41" + }, + "_index": "cpan_v1_01", + "_score": 14.291793 + }, + { + "_type": "release", + "_source": { + "version": "0.03", + "date": "2018-10-27T00:20:13", + "checksum_sha256": "0ee6bf46bc72755adb7a6b08e79d12e207de5f7809707b3c353b58cb2f0b5a26", + "metadata": { + "author": [ + "Felipe Gasper (FELIPE)" + ] + }, + "name": "Call-Context-0.03", + "download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.03.tar.gz", + "stat": { + "size": 10730 + }, + "author": "FELIPE", + "distribution": "Call-Context" + }, + "_index": "cpan_v1_01", + "_id": "CAAVfGh_7XpKnzpnLVaBKg8IPMM", + "_score": 14.291793 + } + ], + "max_score": 14.460719 + }, + "_scroll_id": "cXVlcnlUaGVuRmV0Y2g7Mzs5OTQ2NzY3ODU6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTQ2NzY3ODQ6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTUyMzQzMTA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw==" +} \ No newline at end of file diff --git a/swh/lister/cpan/tests/test_lister.py b/swh/lister/cpan/tests/test_lister.py --- a/swh/lister/cpan/tests/test_lister.py +++ b/swh/lister/cpan/tests/test_lister.py @@ -10,7 +10,7 @@ import pytest -from swh.lister.cpan.lister import CpanLister +from swh.lister.cpan.lister import CpanLister, get_module_version @pytest.fixture @@ -20,16 +20,42 @@ ) -@pytest.fixture -def release_scroll_first_response(datadir): +def release_scroll_response(datadir, page): return json.loads( - Path(datadir, "https_fastapi.metacpan.org", "v1__search_scroll").read_bytes() + Path( + datadir, "https_fastapi.metacpan.org", f"v1__search_scroll_page{page}" + ).read_bytes() ) +@pytest.fixture +def release_scroll_first_response(datadir): + return release_scroll_response(datadir, page=1) + + +@pytest.fixture +def release_scroll_second_response(datadir): + return release_scroll_response(datadir, page=2) + + +@pytest.fixture +def release_scroll_third_response(datadir): + return release_scroll_response(datadir, page=3) + + +@pytest.fixture +def release_scroll_fourth_response(datadir): + return release_scroll_response(datadir, page=4) + + @pytest.fixture(autouse=True) def mock_network_requests( - requests_mock, release_search_response, release_scroll_first_response + requests_mock, + release_search_response, + release_scroll_first_response, + release_scroll_second_response, + release_scroll_third_response, + release_scroll_fourth_response, ): requests_mock.get( "https://fastapi.metacpan.org/v1/release/_search", @@ -41,13 +67,45 @@ { "json": release_scroll_first_response, }, + { + "json": release_scroll_second_response, + }, + { + "json": release_scroll_third_response, + }, + { + "json": release_scroll_fourth_response, + }, {"json": {"hits": {"hits": []}, "_scroll_id": ""}}, ], ) +@pytest.mark.parametrize( + "module_name,module_version,release_name,expected_version", + [ + ("Validator-Custom", "0.1207", "Validator-Custom-0.1207", "0.1207"), + ("UDPServersAndClients", 0, "UDPServersAndClients", "0"), + ("Compiler", 0, "Compiler-a1", "a1"), + ("Call-Context", 0.01, "Call-Context-0.01", "0.01"), + ], +) +def test_get_module_version( + module_name, module_version, release_name, expected_version +): + assert ( + get_module_version(module_name, module_version, release_name) + == expected_version + ) + + def test_cpan_lister( - swh_scheduler, release_search_response, release_scroll_first_response + swh_scheduler, + release_search_response, + release_scroll_first_response, + release_scroll_second_response, + release_scroll_third_response, + release_scroll_fourth_response, ): lister = CpanLister(scheduler=swh_scheduler) res = lister.run() @@ -58,6 +116,9 @@ for release in chain( release_search_response["hits"]["hits"], release_scroll_first_response["hits"]["hits"], + release_scroll_second_response["hits"]["hits"], + release_scroll_third_response["hits"]["hits"], + release_scroll_fourth_response["hits"]["hits"], ): distribution = release["_source"]["distribution"] release_name = release["_source"]["name"] @@ -69,6 +130,9 @@ author_fullname = release["_source"]["metadata"]["author"][0] date = release["_source"]["date"] origin_url = f"https://metacpan.org/dist/{distribution}" + + version = get_module_version(distribution, version, release_name) + expected_origins.add(origin_url) expected_artifacts[origin_url].append( {