Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123464
D8615.id31113.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
31 KB
Subscribers
None
D8615.id31113.diff
View Options
diff --git a/swh/lister/cpan/lister.py b/swh/lister/cpan/lister.py
--- a/swh/lister/cpan/lister.py
+++ b/swh/lister/cpan/lister.py
@@ -3,9 +3,13 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from collections import defaultdict
+from datetime import datetime
import logging
from typing import Any, Dict, Iterator, List, Optional
+import iso8601
+
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@@ -26,6 +30,12 @@
INSTANCE = "cpan"
BASE_URL = "https://fastapi.metacpan.org/v1/"
+ REQUIRED_DOC_FIELDS = [
+ "download_url",
+ "checksum_sha256",
+ "distribution",
+ "version",
+ ]
def __init__(
self,
@@ -39,17 +49,22 @@
url=self.BASE_URL,
)
+ self.artifacts: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
+ self.artifacts_metadata: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
+ self.release_dates: Dict[str, List[datetime]] = defaultdict(list)
+
def get_pages(self) -> Iterator[CpanListerPage]:
"""Yield an iterator which returns 'page'"""
- endpoint = f"{self.BASE_URL}distribution/_search"
+ endpoint = f"{self.BASE_URL}release/_search"
scrollendpoint = f"{self.BASE_URL}_search/scroll"
- size: int = 1000
+ size = 1000
res = self.http_request(
endpoint,
params={
- "fields": ["name"],
+ "_source": self.REQUIRED_DOC_FIELDS
+ + ["date", "metadata.author", "stat.size"],
"size": size,
"scroll": "1m",
},
@@ -67,25 +82,72 @@
_scroll_id = scroll_res.json()["_scroll_id"]
yield data
+ def get_field_value(self, entry, field_name):
+ fields = field_name.split(".")
+ field_value = entry["_source"]
+ for i, field in enumerate(fields):
+ field_value = field_value.get(field, {} if i < len(fields) - 1 else None)
+ # scrolled results might have field value in a list
+ if type(field_value) == list:
+ field_value = field_value[0]
+ return field_value
+
def get_origins_from_page(self, page: CpanListerPage) -> Iterator[ListedOrigin]:
"""Iterate on all pages and yield ListedOrigin instances."""
assert self.lister_obj.id is not None
+ package_names = set()
for entry in page:
- # Skip the entry if 'fields' or 'name' keys are missing
- if "fields" not in entry or "name" not in entry["fields"]:
+
+ if "_source" not in entry or not all(
+ k in entry["_source"].keys() for k in self.REQUIRED_DOC_FIELDS
+ ):
+ logger.warning(
+ "Skipping release entry %s as some required fields are missing",
+ entry.get("_source"),
+ )
continue
- pkgname = entry["fields"]["name"]
- # TODO: Check why sometimes its a one value list
- if type(pkgname) != str:
- pkgname = pkgname[0]
+ package_name = self.get_field_value(entry, "distribution")
+ package_version = self.get_field_value(entry, "version")
+ package_download_url = self.get_field_value(entry, "download_url")
+ package_sha256_checksum = self.get_field_value(entry, "checksum_sha256")
+ package_date = self.get_field_value(entry, "date")
+ package_size = self.get_field_value(entry, "stat.size")
+ package_author = self.get_field_value(entry, "metadata.author")
+
+ self.artifacts[package_name].append(
+ {
+ "name": package_name,
+ "url": package_download_url,
+ "checksums": {"sha256": package_sha256_checksum},
+ "version": package_version,
+ "length": package_size,
+ }
+ )
+
+ self.artifacts_metadata[package_name].append(
+ {
+ "version": package_version,
+ "author": package_author,
+ "date": package_date,
+ }
+ )
+
+ self.release_dates[package_name].append(iso8601.parse_date(package_date))
+
+ package_names.add(package_name)
- url = f"https://metacpan.org/dist/{pkgname}"
+ for package_name in package_names:
+ origin_url = f"https://metacpan.org/dist/{package_name}"
yield ListedOrigin(
lister_id=self.lister_obj.id,
visit_type=self.VISIT_TYPE,
- url=url,
- last_update=None,
+ url=origin_url,
+ last_update=max(self.release_dates[package_name]),
+ extra_loader_arguments={
+ "artifacts": self.artifacts[package_name],
+ "artifacts_metadata": self.artifacts_metadata[package_name],
+ },
)
diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll
new file mode 100644
--- /dev/null
+++ b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll
@@ -0,0 +1,227 @@
+{
+ "_shards": {
+ "failed": 0,
+ "total": 3,
+ "successful": 3
+ },
+ "_scroll_id": "cXVlcnlUaGVuRmV0Y2g7Mzs5OTQ2NzY3ODU6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTQ2NzY3ODQ6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTUyMzQzMTA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw==",
+ "terminated_early": true,
+ "took": 3,
+ "hits": {
+ "max_score": 1.0,
+ "hits": [
+ {
+ "_score": 1.0,
+ "_source": {
+ "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/Validator-Custom-0.1210.tar.gz",
+ "version": "0.1210",
+ "distribution": "Validator-Custom",
+ "date": "2010-08-14T01:41:56",
+ "stat": {
+ "size": 17608
+ },
+ "checksum_sha256": "f7240f7793ced2952701f0ed28ecf43c07cc2fa4549cc505831eceb8424cba4a",
+ "metadata": {
+ "author": [
+ "Yuki Kimoto <kimoto.yuki@gmail.com>"
+ ]
+ }
+ },
+ "_type": "release",
+ "_index": "cpan_v1_01",
+ "_id": "VGApYqMT4TCxUzHcITn8ZhGHlxE"
+ },
+ {
+ "_type": "release",
+ "_id": "ilQN4bpIIdRl6DoiB3y47fgNIk8",
+ "_index": "cpan_v1_01",
+ "_score": 1.0,
+ "_source": {
+ "date": "2010-07-28T23:00:52",
+ "distribution": "Validator-Custom",
+ "version": "0.1208",
+ "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/Validator-Custom-0.1208.tar.gz",
+ "metadata": {
+ "author": [
+ "Yuki Kimoto <kimoto.yuki@gmail.com>"
+ ]
+ },
+ "checksum_sha256": "e33a860b026cad852eb919da4a3645007b47e5f414eb7272534b10cee279b52b",
+ "stat": {
+ "size": 17489
+ }
+ }
+ },
+ {
+ "_source": {
+ "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/DBIx-Custom-0.1619.tar.gz",
+ "version": "0.1619",
+ "distribution": "DBIx-Custom",
+ "date": "2010-10-20T15:01:35",
+ "stat": {
+ "size": 27195
+ },
+ "checksum_sha256": "83c295343f48ebc03029139082345c93527ffe5831820f99e4a72ee67ef186a5",
+ "metadata": {
+ "author": [
+ "unknown"
+ ]
+ }
+ },
+ "_score": 1.0,
+ "_id": "g7562_4h9d693lxvc_cgEOTJAZk",
+ "_index": "cpan_v1_01",
+ "_type": "release"
+ },
+ {
+ "_score": 1.0,
+ "_source": {
+ "version": "0.1401",
+ "distribution": "DBIx-Custom",
+ "date": "2010-05-01T23:29:22",
+ "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/DBIx-Custom-0.1401.tar.gz",
+ "checksum_sha256": "004be1d48b6819941b3cb3c53bf457799d811348e0bb15e7cf18211505637aba",
+ "metadata": {
+ "author": [
+ "Yuki Kimoto <kimoto.yuki@gmail.com>"
+ ]
+ },
+ "stat": {
+ "size": 22711
+ }
+ },
+ "_type": "release",
+ "_id": "bLRsOH2sevNQ6Q93exgkvCZONo0",
+ "_index": "cpan_v1_01"
+ },
+ {
+ "_type": "release",
+ "_index": "cpan_v1_01",
+ "_id": "D8L3qWKznn0IQZrZEeDi9uyXbJY",
+ "_score": 1.0,
+ "_source": {
+ "stat": {
+ "size": 22655
+ },
+ "metadata": {
+ "author": [
+ "Yuki Kimoto <kimoto.yuki@gmail.com>"
+ ]
+ },
+ "checksum_sha256": "6b39e3ad2bc98f06af3a75c96cd8c056a25f7501ed216a375472c8fe7bbb72be",
+ "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/DBIx-Custom-0.1301.tar.gz",
+ "distribution": "DBIx-Custom",
+ "version": "0.1301",
+ "date": "2010-05-01T13:02:19"
+ }
+ },
+ {
+ "_score": 1.0,
+ "_source": {
+ "metadata": {
+ "author": [
+ "Yuki Kimoto <kimoto.yuki@gmail.com>"
+ ]
+ },
+ "checksum_sha256": "7a7e18514e171a6c55ef4c8aef92bd548b15ffd7dec4c1fdc83c276a032f6b8a",
+ "stat": {
+ "size": 18999
+ },
+ "date": "2010-06-25T12:11:33",
+ "distribution": "DBIx-Custom",
+ "version": "0.1602",
+ "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/DBIx-Custom-0.1602.tar.gz"
+ },
+ "_type": "release",
+ "_index": "cpan_v1_01",
+ "_id": "kmzgsMLGdsuiHjrSW55lLwMRO4o"
+ },
+ {
+ "_source": {
+ "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/Validator-Custom-0.1204.tar.gz",
+ "version": "0.1204",
+ "distribution": "Validator-Custom",
+ "date": "2010-07-08T13:14:23",
+ "stat": {
+ "size": 13256
+ },
+ "checksum_sha256": "40800b3d92cebc09967b61725cecdd05de2b04649f095e3034c5dd82f3d4ad89",
+ "metadata": {
+ "author": [
+ "Yuki Kimoto <kimoto.yuki@gmail.com>"
+ ]
+ }
+ },
+ "_score": 1.0,
+ "_index": "cpan_v1_01",
+ "_id": "M_lLALu56mb_cDK_jAXwUB2PUlw",
+ "_type": "release"
+ },
+ {
+ "_id": "EVuvfiFcvtEr9Ne5Q4QoMAaxe7E",
+ "_index": "cpan_v1_01",
+ "_type": "release",
+ "_source": {
+ "stat": {
+ "size": 12572
+ },
+ "metadata": {
+ "author": [
+ "Yuki Kimoto <kimoto.yuki@gmail.com>"
+ ]
+ },
+ "checksum_sha256": "028a0b41c152c585143167464bed2ac6b6680c8006aa80867f9a8faa4ca5efe7",
+ "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/Validator-Custom-0.1203.tar.gz",
+ "date": "2010-07-07T13:29:41",
+ "distribution": "Validator-Custom",
+ "version": "0.1203"
+ },
+ "_score": 1.0
+ },
+ {
+ "_index": "cpan_v1_01",
+ "_id": "ZaT8bwXejVTHmrzZCqNJPRFImBY",
+ "_type": "release",
+ "_source": {
+ "metadata": {
+ "author": [
+ "Yuki Kimoto <kimoto.yuki@gmail.com>"
+ ]
+ },
+ "checksum_sha256": "940412af9b7faf4c946a5e4d57ca52e5b704e49c4d7d0aa5ecb6d2286477ebc6",
+ "stat": {
+ "size": 40480
+ },
+ "distribution": "DBIx-Custom",
+ "version": "0.1641",
+ "date": "2011-01-27T05:19:14",
+ "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/DBIx-Custom-0.1641.tar.gz"
+ },
+ "_score": 1.0
+ },
+ {
+ "_source": {
+ "version": "0.1646",
+ "distribution": "DBIx-Custom",
+ "date": "2011-02-18T17:48:52",
+ "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/DBIx-Custom-0.1646.tar.gz",
+ "metadata": {
+ "author": [
+ "Yuki Kimoto <kimoto.yuki@gmail.com>"
+ ]
+ },
+ "checksum_sha256": "7f729311e3e22d36b158e62b42ab2fbd29f08eabd57206e235db939d1ae57d24",
+ "stat": {
+ "size": 46577
+ }
+ },
+ "_score": 1.0,
+ "_index": "cpan_v1_01",
+ "_id": "j21QIzHRYZKz1vobyGAPa2BuO50",
+ "_type": "release"
+ }
+ ],
+ "total": 359941
+ },
+ "timed_out": false
+}
\ No newline at end of file
diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll,scroll=1m,scroll_id=cXVlcnlUaGVuRmV0Y2g7Mzs5NTU1MTQ1NTk6eXptdmszQUNUam1XbVJjRjRkRk9Udzs5NTQ5NjQ5NjI6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5NTU1MTQ1NjA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw== b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll,scroll=1m,scroll_id=cXVlcnlUaGVuRmV0Y2g7Mzs5NTU1MTQ1NTk6eXptdmszQUNUam1XbVJjRjRkRk9Udzs5NTQ5NjQ5NjI6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5NTU1MTQ1NjA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw==
deleted file mode 100644
--- a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll,scroll=1m,scroll_id=cXVlcnlUaGVuRmV0Y2g7Mzs5NTU1MTQ1NTk6eXptdmszQUNUam1XbVJjRjRkRk9Udzs5NTQ5NjQ5NjI6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5NTU1MTQ1NjA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw==
+++ /dev/null
@@ -1,50 +0,0 @@
-{
- "_shards" : {
- "successful" : 3,
- "total" : 3,
- "failed" : 0
- },
- "timed_out" : false,
- "hits" : {
- "max_score" : 1.0,
- "hits" : [
- {
- "_type" : "distribution",
- "fields" : {
- "name" : [
- "EventSource-Server"
- ]
- },
- "_id" : "EventSource-Server",
- "_index" : "cpan_v1_01",
- "_score" : 1.0
- },
- {
- "_score" : 1.0,
- "_index" : "cpan_v1_01",
- "_id" : "Interchange6",
- "fields" : {
- "name" : [
- "Interchange6"
- ]
- },
- "_type" : "distribution"
- },
- {
- "_score" : 1.0,
- "_index" : "cpan_v1_01",
- "_id" : "Internals-CountObjects",
- "fields" : {
- "name" : [
- "Internals-CountObjects"
- ]
- },
- "_type" : "distribution"
- }
- ],
- "total" : 43675
- },
- "took" : 72,
- "_scroll_id" : "cXVlcnlUaGVuRmV0Y2g7Mzs5NTU1MTQ1NTk6eXptdmszQUNUam1XbVJjRjRkRk9Udzs5NTQ5NjQ5NjI6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5NTU1MTQ1NjA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw==",
- "terminated_early" : true
-}
diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll,scroll=1m,scroll_id=cXVlcnlUaGVuRmV0Y2g7Mzs5NTU1MTQ1NTk6eXptdmszQUNUam1XbVJjRjRkRk9Udzs5NTQ5NjQ5NjI6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5NTU1MTQ1NjA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw==_visit1 b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll,scroll=1m,scroll_id=cXVlcnlUaGVuRmV0Y2g7Mzs5NTU1MTQ1NTk6eXptdmszQUNUam1XbVJjRjRkRk9Udzs5NTQ5NjQ5NjI6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5NTU1MTQ1NjA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw==_visit1
deleted file mode 100644
--- a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1__search_scroll,scroll=1m,scroll_id=cXVlcnlUaGVuRmV0Y2g7Mzs5NTU1MTQ1NTk6eXptdmszQUNUam1XbVJjRjRkRk9Udzs5NTQ5NjQ5NjI6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5NTU1MTQ1NjA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw==_visit1
+++ /dev/null
@@ -1,16 +0,0 @@
-{
- "_scroll_id" : "cXVlcnlUaGVuRmV0Y2g7Mzs5NTU1MTQ1NTk6eXptdmszQUNUam1XbVJjRjRkRk9Udzs5NTQ5NjQ5NjI6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5NTU1MTQ1NjA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw==",
- "took" : 1,
- "hits" : {
- "hits" : [],
- "total" : 43675,
- "max_score" : 1.0
- },
- "terminated_early" : true,
- "timed_out" : false,
- "_shards" : {
- "failed" : 0,
- "total" : 3,
- "successful" : 3
- }
-}
diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1_distribution__search,fields=name,size=1000,scroll=1m b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1_distribution__search,fields=name,size=1000,scroll=1m
deleted file mode 100644
--- a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1_distribution__search,fields=name,size=1000,scroll=1m
+++ /dev/null
@@ -1,52 +0,0 @@
-{
- "_shards" : {
- "successful" : 3,
- "failed" : 0,
- "total" : 3
- },
- "_scroll_id" : "cXVlcnlUaGVuRmV0Y2g7Mzs5NTU1MTQ1NTk6eXptdmszQUNUam1XbVJjRjRkRk9Udzs5NTQ5NjQ5NjI6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5NTU1MTQ1NjA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw==",
- "took" : 61,
- "hits" : {
- "max_score" : 1.0,
- "hits" : [
- {
- "_score" : 1.0,
- "_index" : "cpan_v1_01",
- "_id" : "openerserver_perl-master",
- "fields" : {
- "name" : "openerserver_perl-master"
- },
- "_type" : "distribution"
- },
- {
- "_score" : 1.0,
- "_type" : "distribution",
- "fields" : {
- "name" : "Getopt_Auto"
- },
- "_id" : "Getopt_Auto",
- "_index" : "cpan_v1_01"
- },
- {
- "_id" : "App-Booklist",
- "_index" : "cpan_v1_01",
- "_type" : "distribution",
- "fields" : {
- "name" : "App-Booklist"
- },
- "_score" : 1.0
- },
- {
- "fields" : {
- "name" : "EuclideanRhythm"
- },
- "_type" : "distribution",
- "_index" : "cpan_v1_01",
- "_id" : "EuclideanRhythm",
- "_score" : 1.0
- }
- ],
- "total" : 43675
- },
- "timed_out" : false
-}
diff --git a/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1_release__search b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1_release__search
new file mode 100644
--- /dev/null
+++ b/swh/lister/cpan/tests/data/https_fastapi.metacpan.org/v1_release__search
@@ -0,0 +1,226 @@
+{
+ "timed_out": false,
+ "_shards": {
+ "total": 3,
+ "failed": 0,
+ "successful": 3
+ },
+ "hits": {
+ "hits": [
+ {
+ "_index": "cpan_v1_01",
+ "_id": "40MmOvf_SQx_mr8Kj9Eush14a3E",
+ "_source": {
+ "date": "2011-03-02T00:46:14",
+ "download_url": "https://cpan.metacpan.org/authors/id/K/KR/KRYDE/math-image-46.tar.gz",
+ "checksum_sha256": "6bd988e3959feb1071d3b9953d16e723af66bdb7b5440ea17add8709d95f20fa",
+ "version": "46",
+ "stat": {
+ "size": 533502
+ },
+ "distribution": "math-image",
+ "metadata": {
+ "author": [
+ "Kevin Ryde <user42@zip.com.au>"
+ ]
+ }
+ },
+ "_type": "release",
+ "_score": 1.0
+ },
+ {
+ "_index": "cpan_v1_01",
+ "_source": {
+ "distribution": "Dist-Zilla-Plugin-ProgCriticTests",
+ "metadata": {
+ "author": [
+ "Christian Walde <mithaldu@yahoo.de>"
+ ]
+ },
+ "stat": {
+ "size": 16918
+ },
+ "checksum_sha256": "ef8c92d0fc55551392a6daeee20a1c13a3ee1bcd0fcacf611cbc2a6cc503f401",
+ "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MITHALDU/Dist-Zilla-Plugin-ProgCriticTests-1.101580-TRIAL.tar.gz",
+ "date": "2010-06-07T14:43:36",
+ "version": "1.101580"
+ },
+ "_id": "6df77_MLO_BG8YC_vQKsay7OFYM",
+ "_type": "release",
+ "_score": 1.0
+ },
+ {
+ "_index": "cpan_v1_01",
+ "_type": "release",
+ "_source": {
+ "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MITHUN/Net-Rapidshare-v0.04.tar.gz",
+ "date": "2009-07-28T05:57:26",
+ "checksum_sha256": "f01456a8f8c2b6806a8dd041cf848f330884573d363b28c8b3ff12e837fa8f4f",
+ "version": "v0.04",
+ "distribution": "Net-Rapidshare",
+ "metadata": {
+ "author": [
+ "unknown"
+ ]
+ },
+ "stat": {
+ "size": 15068
+ }
+ },
+ "_id": "jCs3ZLWuoetrkMLOFKV3YTSr_fM",
+ "_score": 1.0
+ },
+ {
+ "_index": "cpan_v1_01",
+ "_source": {
+ "version": "v0.05",
+ "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MITHUN/Net-Rapidshare-v0.05.tgz",
+ "date": "2009-12-21T00:29:48",
+ "checksum_sha256": "e1128d3b35616530d9722d0fe3f5f0e343fd914bc8f9c0df55c1a9ad6c7402fe",
+ "metadata": {
+ "author": [
+ "unknown"
+ ]
+ },
+ "distribution": "Net-Rapidshare",
+ "stat": {
+ "size": 15971
+ }
+ },
+ "_id": "pExMIwabhz_0S1rX7xAY_lq0GTY",
+ "_type": "release",
+ "_score": 1.0
+ },
+ {
+ "_type": "release",
+ "_source": {
+ "version": "v0.0.1",
+ "download_url": "https://cpan.metacpan.org/authors/id/M/MI/MITHUN/Net-Rapidshare-v0.0.1.tar.gz",
+ "checksum_sha256": "990de0a72753fa182e7a5867e55fd6755375b71280bb7e5b3a5f07c4de8af905",
+ "date": "2009-07-18T22:56:38",
+ "stat": {
+ "size": 15161
+ },
+ "metadata": {
+ "author": [
+ "unknown"
+ ]
+ },
+ "distribution": "Net-Rapidshare"
+ },
+ "_id": "eqkhDnj0efXHisWRrMZZ1EHFgug",
+ "_index": "cpan_v1_01",
+ "_score": 1.0
+ },
+ {
+ "_score": 1.0,
+ "_index": "cpan_v1_01",
+ "_type": "release",
+ "_source": {
+ "stat": {
+ "size": 3409
+ },
+ "distribution": "DBIx-Custom-Basic",
+ "metadata": {
+ "author": [
+ "Yuki Kimoto <kimoto.yuki@gmail.com>"
+ ]
+ },
+ "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/DBIx-Custom-Basic-0.0101.tar.gz",
+ "date": "2009-11-08T04:18:30",
+ "checksum_sha256": "86f68b2d0789934aa6b0202345e9807c5b650f8030b55d0d669ef25293fa3f1f",
+ "version": "0.0101"
+ },
+ "_id": "oKf3t0pXHXa6mZ_4sUZSaSMKuXg"
+ },
+ {
+ "_score": 1.0,
+ "_index": "cpan_v1_01",
+ "_source": {
+ "version": "0.0101",
+ "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/DBIx-Custom-SQLite-0.0101.tar.gz",
+ "date": "2009-11-08T04:20:31",
+ "checksum_sha256": "0af123551dff95f9654f4fbc24e945c5d6481b92e67b8e03ca91ef4c83088cc7",
+ "metadata": {
+ "author": [
+ "Yuki Kimoto <kimoto.yuki@gmail.com>"
+ ]
+ },
+ "distribution": "DBIx-Custom-SQLite",
+ "stat": {
+ "size": 3927
+ }
+ },
+ "_type": "release",
+ "_id": "zpVA3zMoUhx0mj8Cn4YC9CuFyA8"
+ },
+ {
+ "_index": "cpan_v1_01",
+ "_source": {
+ "checksum_sha256": "0911fe6ae65f9173c6eb68b6116600552b088939b94881be3c7275344b1cbdce",
+ "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/Validator-Custom-Ext-Mojolicious-0.0103.tar.gz",
+ "date": "2010-01-16T14:51:11",
+ "version": "0.0103",
+ "stat": {
+ "size": 4190
+ },
+ "distribution": "Validator-Custom-Ext-Mojolicious",
+ "metadata": {
+ "author": [
+ "Yuki Kimoto <kimoto.yuki@gmail.com>"
+ ]
+ }
+ },
+ "_id": "mY_jP2O7NnTtr3utv_xZQNu10Ic",
+ "_type": "release",
+ "_score": 1.0
+ },
+ {
+ "_source": {
+ "stat": {
+ "size": 4257
+ },
+ "distribution": "Validator-Custom-Ext-Mojolicious",
+ "metadata": {
+ "author": [
+ "Yuki Kimoto <kimoto.yuki@gmail.com>"
+ ]
+ },
+ "date": "2010-01-15T14:07:24",
+ "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/Validator-Custom-Ext-Mojolicious-0.0102.tar.gz",
+ "checksum_sha256": "a88d01504353223f7a3cb0d6a240debb9c6d6155858f1048a19007c3b366beed",
+ "version": "0.0102"
+ },
+ "_id": "WZm6hQ6mBfOqgVE6dPQOE0L8hg0",
+ "_type": "release",
+ "_index": "cpan_v1_01",
+ "_score": 1.0
+ },
+ {
+ "_index": "cpan_v1_01",
+ "_type": "release",
+ "_source": {
+ "download_url": "https://cpan.metacpan.org/authors/id/K/KI/KIMOTO/Validator-Custom-0.1207.tar.gz",
+ "date": "2010-07-28T13:42:23",
+ "checksum_sha256": "f599da2ecc17ac74443628eb84233ee6b25b204511f83ea778dad9efd0f558e0",
+ "version": "0.1207",
+ "stat": {
+ "size": 16985
+ },
+ "distribution": "Validator-Custom",
+ "metadata": {
+ "author": [
+ "Yuki Kimoto <kimoto.yuki@gmail.com>"
+ ]
+ }
+ },
+ "_id": "NWJOqmjEinjfJqawfpkEpEhu4d0",
+ "_score": 1.0
+ }
+ ],
+ "total": 359941,
+ "max_score": 1.0
+ },
+ "took": 14,
+ "_scroll_id": "cXVlcnlUaGVuRmV0Y2g7Mzs5OTQ2NzY3ODU6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTQ2NzY3ODQ6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTUyMzQzMTA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw=="
+}
\ No newline at end of file
diff --git a/swh/lister/cpan/tests/test_lister.py b/swh/lister/cpan/tests/test_lister.py
--- a/swh/lister/cpan/tests/test_lister.py
+++ b/swh/lister/cpan/tests/test_lister.py
@@ -3,25 +3,90 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from collections import defaultdict
+from itertools import chain
+import json
+from pathlib import Path
+
+import pytest
+
from swh.lister.cpan.lister import CpanLister
-expected_origins = [
- "https://metacpan.org/dist/App-Booklist",
- "https://metacpan.org/dist/EuclideanRhythm",
- "https://metacpan.org/dist/EventSource-Server",
- "https://metacpan.org/dist/Getopt_Auto",
- "https://metacpan.org/dist/Interchange6",
- "https://metacpan.org/dist/Internals-CountObjects",
- "https://metacpan.org/dist/openerserver_perl-master",
-]
+@pytest.fixture
+def release_search_response(datadir):
+ return json.loads(
+ Path(datadir, "https_fastapi.metacpan.org", "v1_release__search").read_bytes()
+ )
+
+
+@pytest.fixture
+def release_scroll_first_response(datadir):
+ return json.loads(
+ Path(datadir, "https_fastapi.metacpan.org", "v1__search_scroll").read_bytes()
+ )
-def test_cpan_lister(datadir, requests_mock_datadir_visits, swh_scheduler):
+
+@pytest.fixture(autouse=True)
+def mock_network_requests(
+ requests_mock, release_search_response, release_scroll_first_response
+):
+ requests_mock.get(
+ "https://fastapi.metacpan.org/v1/release/_search",
+ json=release_search_response,
+ )
+ requests_mock.get(
+ "https://fastapi.metacpan.org/v1/_search/scroll",
+ [
+ {
+ "json": release_scroll_first_response,
+ },
+ {"json": {"hits": {"hits": []}, "_scroll_id": ""}},
+ ],
+ )
+
+
+def test_cpan_lister(
+ swh_scheduler, release_search_response, release_scroll_first_response
+):
lister = CpanLister(scheduler=swh_scheduler)
res = lister.run()
+ expected_origins = set()
+ expected_artifacts = defaultdict(list)
+ expected_artifacts_metadata = defaultdict(list)
+ for release in chain(
+ release_search_response["hits"]["hits"],
+ release_scroll_first_response["hits"]["hits"],
+ ):
+ distribution = release["_source"]["distribution"]
+ checksum_sha256 = release["_source"]["checksum_sha256"]
+ download_url = release["_source"]["download_url"]
+ version = release["_source"]["version"]
+ size = release["_source"]["stat"]["size"]
+ author = release["_source"]["metadata"]["author"][0]
+ date = release["_source"]["date"]
+ origin_url = f"https://metacpan.org/dist/{distribution}"
+ expected_origins.add(origin_url)
+ expected_artifacts[origin_url].append(
+ {
+ "name": distribution,
+ "url": download_url,
+ "version": version,
+ "length": size,
+ "checksums": {"sha256": checksum_sha256},
+ }
+ )
+ expected_artifacts_metadata[origin_url].append(
+ {
+ "version": version,
+ "author": author,
+ "date": date,
+ }
+ )
+
assert res.pages == 3
- assert res.origins == 4 + 3 + 0
+ assert res.origins == len(expected_origins)
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
@@ -30,3 +95,7 @@
for origin in scheduler_origins:
assert origin.visit_type == "cpan"
assert origin.url in expected_origins
+ assert origin.extra_loader_arguments == {
+ "artifacts": expected_artifacts[origin.url],
+ "artifacts_metadata": expected_artifacts_metadata[origin.url],
+ }
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Dec 19, 9:24 AM (12 h, 31 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3230643
Attached To
D8615: cpan: Improve listing process by querying the metacpan release endpoint
Event Timeline
Log In to Comment