diff --git a/swh/lister/crates/lister.py b/swh/lister/crates/lister.py --- a/swh/lister/crates/lister.py +++ b/swh/lister/crates/lister.py @@ -8,6 +8,7 @@ from pathlib import Path import subprocess from typing import Any, Dict, Iterator, List +from urllib.parse import urlparse import iso8601 @@ -32,7 +33,7 @@ # Part of the lister API, that identifies this lister LISTER_NAME = "crates" # (Optional) CVS type of the origins listed by this lister, if constant - VISIT_TYPE = "rust-crate" + VISIT_TYPE = "crates" INSTANCE = "crates" INDEX_REPOSITORY_URL = "https://github.com/rust-lang/crates.io-index.git" @@ -40,6 +41,7 @@ CRATE_FILE_URL_PATTERN = ( "https://static.crates.io/crates/{crate}/{crate}-{version}.crate" ) + CRATE_API_URL_PATTERN = "https://crates.io/api/v1/crates/{crate}" def __init__( self, @@ -131,15 +133,30 @@ assert self.lister_obj.id is not None + url = self.CRATE_API_URL_PATTERN.format(crate=page[0]["name"]) + last_update = page[0]["last_update"] + artifacts = [] + for version in page: - yield ListedOrigin( - lister_id=self.lister_obj.id, - visit_type=self.VISIT_TYPE, - url=version["crate_file"], - last_update=version["last_update"], - extra_loader_arguments={ - "name": version["name"], - "version": version["version"], - "checksum": version["checksum"], + filename = urlparse(version["crate_file"]).path.split("/")[-1] + # Build an artifact entry following original-artifacts-json specification + # https://docs.softwareheritage.org/devel/swh-storage/extrinsic-metadata-specification.html#original-artifacts-json # noqa: B950 + artifact = { + "filename": f"{filename}", + "checksums": { + "sha256": f"{version['checksum']}", }, - ) + "url": version["crate_file"], + "version": version["version"], + } + artifacts.append(artifact) + + yield ListedOrigin( + lister_id=self.lister_obj.id, + visit_type=self.VISIT_TYPE, + url=url, + last_update=last_update, + extra_loader_arguments={ + "artifacts": artifacts, + }, + ) diff --git a/swh/lister/crates/tests/test_lister.py b/swh/lister/crates/tests/test_lister.py --- a/swh/lister/crates/tests/test_lister.py +++ b/swh/lister/crates/tests/test_lister.py @@ -10,46 +10,75 @@ expected_origins = [ { - "name": "rand", - "version": "0.1.1", - "checksum": "48a45b46c2a8c38348adb1205b13c3c5eb0174e0c0fec52cc88e9fb1de14c54d", - "url": "https://static.crates.io/crates/rand/rand-0.1.1.crate", + "url": "https://crates.io/api/v1/crates/rand", + "artifacts": [ + { + "checksums": { + "sha256": "48a45b46c2a8c38348adb1205b13c3c5eb0174e0c0fec52cc88e9fb1de14c54d", # noqa: B950 + }, + "filename": "rand-0.1.1.crate", + "url": "https://static.crates.io/crates/rand/rand-0.1.1.crate", + "version": "0.1.1", + }, + { + "checksums": { + "sha256": "6e229ed392842fa93c1d76018d197b7e1b74250532bafb37b0e1d121a92d4cf7", # noqa: B950 + }, + "filename": "rand-0.1.2.crate", + "url": "https://static.crates.io/crates/rand/rand-0.1.2.crate", + "version": "0.1.2", + }, + ], }, { - "name": "rand", - "version": "0.1.2", - "checksum": "6e229ed392842fa93c1d76018d197b7e1b74250532bafb37b0e1d121a92d4cf7", - "url": "https://static.crates.io/crates/rand/rand-0.1.2.crate", + "url": "https://crates.io/api/v1/crates/regex", + "artifacts": [ + { + "checksums": { + "sha256": "f0ff1ca641d3c9a2c30464dac30183a8b91cdcc959d616961be020cdea6255c5", # noqa: B950 + }, + "filename": "regex-0.1.0.crate", + "url": "https://static.crates.io/crates/regex/regex-0.1.0.crate", + "version": "0.1.0", + }, + { + "checksums": { + "sha256": "a07bef996bd38a73c21a8e345d2c16848b41aa7ec949e2fedffe9edf74cdfb36", # noqa: B950 + }, + "filename": "regex-0.1.1.crate", + "url": "https://static.crates.io/crates/regex/regex-0.1.1.crate", + "version": "0.1.1", + }, + { + "checksums": { + "sha256": "343bd0171ee23346506db6f4c64525de6d72f0e8cc533f83aea97f3e7488cbf9", # noqa: B950 + }, + "filename": "regex-0.1.2.crate", + "url": "https://static.crates.io/crates/regex/regex-0.1.2.crate", + "version": "0.1.2", + }, + { + "checksums": { + "sha256": "defb220c4054ca1b95fe8b0c9a6e782dda684c1bdf8694df291733ae8a3748e3", # noqa: B950 + }, + "filename": "regex-0.1.3.crate", + "url": "https://static.crates.io/crates/regex/regex-0.1.3.crate", + "version": "0.1.3", + }, + ], }, { - "name": "regex", - "version": "0.1.0", - "checksum": "f0ff1ca641d3c9a2c30464dac30183a8b91cdcc959d616961be020cdea6255c5", - "url": "https://static.crates.io/crates/regex/regex-0.1.0.crate", - }, - { - "name": "regex", - "version": "0.1.1", - "checksum": "a07bef996bd38a73c21a8e345d2c16848b41aa7ec949e2fedffe9edf74cdfb36", - "url": "https://static.crates.io/crates/regex/regex-0.1.1.crate", - }, - { - "name": "regex", - "version": "0.1.2", - "checksum": "343bd0171ee23346506db6f4c64525de6d72f0e8cc533f83aea97f3e7488cbf9", - "url": "https://static.crates.io/crates/regex/regex-0.1.2.crate", - }, - { - "name": "regex", - "version": "0.1.3", - "checksum": "defb220c4054ca1b95fe8b0c9a6e782dda684c1bdf8694df291733ae8a3748e3", - "url": "https://static.crates.io/crates/regex/regex-0.1.3.crate", - }, - { - "name": "regex-syntax", - "version": "0.1.0", - "checksum": "398952a2f6cd1d22bc1774fd663808e32cf36add0280dee5cdd84a8fff2db944", - "url": "https://static.crates.io/crates/regex-syntax/regex-syntax-0.1.0.crate", + "url": "https://crates.io/api/v1/crates/regex-syntax", + "artifacts": [ + { + "checksums": { + "sha256": "398952a2f6cd1d22bc1774fd663808e32cf36add0280dee5cdd84a8fff2db944", # noqa: B950 + }, + "filename": "regex-syntax-0.1.0.crate", + "url": "https://static.crates.io/crates/regex-syntax/regex-syntax-0.1.0.crate", + "version": "0.1.0", + }, + ], }, ] @@ -67,7 +96,7 @@ res = lister.run() assert res.pages == 3 - assert res.origins == 7 + assert res.origins == 3 expected_origins_sorted = sorted(expected_origins, key=lambda x: x.get("url")) scheduler_origins_sorted = sorted( @@ -76,14 +105,10 @@ ) for scheduled, expected in zip(scheduler_origins_sorted, expected_origins_sorted): - assert scheduled.visit_type == "rust-crate" + assert scheduled.visit_type == "crates" assert scheduled.url == expected.get("url") - assert scheduled.extra_loader_arguments.get("name") == expected.get("name") - assert scheduled.extra_loader_arguments.get("version") == expected.get( - "version" - ) - assert scheduled.extra_loader_arguments.get("checksum") == expected.get( - "checksum" + assert scheduled.extra_loader_arguments.get("artifacts") == expected.get( + "artifacts" ) assert len(scheduler_origins_sorted) == len(expected_origins_sorted)