Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066485
D7654.id27889.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
8 KB
Subscribers
None
D7654.id27889.diff
View Options
diff --git a/swh/lister/crates/lister.py b/swh/lister/crates/lister.py
--- a/swh/lister/crates/lister.py
+++ b/swh/lister/crates/lister.py
@@ -8,6 +8,7 @@
from pathlib import Path
import subprocess
from typing import Any, Dict, Iterator, List
+from urllib.parse import urlparse
import iso8601
@@ -32,7 +33,7 @@
# Part of the lister API, that identifies this lister
LISTER_NAME = "crates"
# (Optional) CVS type of the origins listed by this lister, if constant
- VISIT_TYPE = "rust-crate"
+ VISIT_TYPE = "crates"
INSTANCE = "crates"
INDEX_REPOSITORY_URL = "https://github.com/rust-lang/crates.io-index.git"
@@ -40,6 +41,7 @@
CRATE_FILE_URL_PATTERN = (
"https://static.crates.io/crates/{crate}/{crate}-{version}.crate"
)
+ CRATE_API_URL_PATTERN = "https://crates.io/api/v1/crates/{crate}"
def __init__(
self,
@@ -131,15 +133,30 @@
assert self.lister_obj.id is not None
+ url = self.CRATE_API_URL_PATTERN.format(crate=page[0]["name"])
+ last_update = page[0]["last_update"]
+ artifacts = []
+
for version in page:
- yield ListedOrigin(
- lister_id=self.lister_obj.id,
- visit_type=self.VISIT_TYPE,
- url=version["crate_file"],
- last_update=version["last_update"],
- extra_loader_arguments={
- "name": version["name"],
- "version": version["version"],
- "checksum": version["checksum"],
+ filename = urlparse(version["crate_file"]).path.split("/")[-1]
+ # Build an artifact entry following original-artifacts-json specification
+ # https://docs.softwareheritage.org/devel/swh-storage/extrinsic-metadata-specification.html#original-artifacts-json # noqa: B950
+ artifact = {
+ "filename": f"{filename}",
+ "checksums": {
+ "sha256": f"{version['checksum']}",
},
- )
+ "url": version["crate_file"],
+ "version": version["version"],
+ }
+ artifacts.append(artifact)
+
+ yield ListedOrigin(
+ lister_id=self.lister_obj.id,
+ visit_type=self.VISIT_TYPE,
+ url=url,
+ last_update=last_update,
+ extra_loader_arguments={
+ "artifacts": artifacts,
+ },
+ )
diff --git a/swh/lister/crates/tests/test_lister.py b/swh/lister/crates/tests/test_lister.py
--- a/swh/lister/crates/tests/test_lister.py
+++ b/swh/lister/crates/tests/test_lister.py
@@ -10,46 +10,75 @@
expected_origins = [
{
- "name": "rand",
- "version": "0.1.1",
- "checksum": "48a45b46c2a8c38348adb1205b13c3c5eb0174e0c0fec52cc88e9fb1de14c54d",
- "url": "https://static.crates.io/crates/rand/rand-0.1.1.crate",
+ "url": "https://crates.io/api/v1/crates/rand",
+ "artifacts": [
+ {
+ "checksums": {
+ "sha256": "48a45b46c2a8c38348adb1205b13c3c5eb0174e0c0fec52cc88e9fb1de14c54d", # noqa: B950
+ },
+ "filename": "rand-0.1.1.crate",
+ "url": "https://static.crates.io/crates/rand/rand-0.1.1.crate",
+ "version": "0.1.1",
+ },
+ {
+ "checksums": {
+ "sha256": "6e229ed392842fa93c1d76018d197b7e1b74250532bafb37b0e1d121a92d4cf7", # noqa: B950
+ },
+ "filename": "rand-0.1.2.crate",
+ "url": "https://static.crates.io/crates/rand/rand-0.1.2.crate",
+ "version": "0.1.2",
+ },
+ ],
},
{
- "name": "rand",
- "version": "0.1.2",
- "checksum": "6e229ed392842fa93c1d76018d197b7e1b74250532bafb37b0e1d121a92d4cf7",
- "url": "https://static.crates.io/crates/rand/rand-0.1.2.crate",
+ "url": "https://crates.io/api/v1/crates/regex",
+ "artifacts": [
+ {
+ "checksums": {
+ "sha256": "f0ff1ca641d3c9a2c30464dac30183a8b91cdcc959d616961be020cdea6255c5", # noqa: B950
+ },
+ "filename": "regex-0.1.0.crate",
+ "url": "https://static.crates.io/crates/regex/regex-0.1.0.crate",
+ "version": "0.1.0",
+ },
+ {
+ "checksums": {
+ "sha256": "a07bef996bd38a73c21a8e345d2c16848b41aa7ec949e2fedffe9edf74cdfb36", # noqa: B950
+ },
+ "filename": "regex-0.1.1.crate",
+ "url": "https://static.crates.io/crates/regex/regex-0.1.1.crate",
+ "version": "0.1.1",
+ },
+ {
+ "checksums": {
+ "sha256": "343bd0171ee23346506db6f4c64525de6d72f0e8cc533f83aea97f3e7488cbf9", # noqa: B950
+ },
+ "filename": "regex-0.1.2.crate",
+ "url": "https://static.crates.io/crates/regex/regex-0.1.2.crate",
+ "version": "0.1.2",
+ },
+ {
+ "checksums": {
+ "sha256": "defb220c4054ca1b95fe8b0c9a6e782dda684c1bdf8694df291733ae8a3748e3", # noqa: B950
+ },
+ "filename": "regex-0.1.3.crate",
+ "url": "https://static.crates.io/crates/regex/regex-0.1.3.crate",
+ "version": "0.1.3",
+ },
+ ],
},
{
- "name": "regex",
- "version": "0.1.0",
- "checksum": "f0ff1ca641d3c9a2c30464dac30183a8b91cdcc959d616961be020cdea6255c5",
- "url": "https://static.crates.io/crates/regex/regex-0.1.0.crate",
- },
- {
- "name": "regex",
- "version": "0.1.1",
- "checksum": "a07bef996bd38a73c21a8e345d2c16848b41aa7ec949e2fedffe9edf74cdfb36",
- "url": "https://static.crates.io/crates/regex/regex-0.1.1.crate",
- },
- {
- "name": "regex",
- "version": "0.1.2",
- "checksum": "343bd0171ee23346506db6f4c64525de6d72f0e8cc533f83aea97f3e7488cbf9",
- "url": "https://static.crates.io/crates/regex/regex-0.1.2.crate",
- },
- {
- "name": "regex",
- "version": "0.1.3",
- "checksum": "defb220c4054ca1b95fe8b0c9a6e782dda684c1bdf8694df291733ae8a3748e3",
- "url": "https://static.crates.io/crates/regex/regex-0.1.3.crate",
- },
- {
- "name": "regex-syntax",
- "version": "0.1.0",
- "checksum": "398952a2f6cd1d22bc1774fd663808e32cf36add0280dee5cdd84a8fff2db944",
- "url": "https://static.crates.io/crates/regex-syntax/regex-syntax-0.1.0.crate",
+ "url": "https://crates.io/api/v1/crates/regex-syntax",
+ "artifacts": [
+ {
+ "checksums": {
+ "sha256": "398952a2f6cd1d22bc1774fd663808e32cf36add0280dee5cdd84a8fff2db944", # noqa: B950
+ },
+ "filename": "regex-syntax-0.1.0.crate",
+ "url": "https://static.crates.io/crates/regex-syntax/regex-syntax-0.1.0.crate",
+ "version": "0.1.0",
+ },
+ ],
},
]
@@ -67,7 +96,7 @@
res = lister.run()
assert res.pages == 3
- assert res.origins == 7
+ assert res.origins == 3
expected_origins_sorted = sorted(expected_origins, key=lambda x: x.get("url"))
scheduler_origins_sorted = sorted(
@@ -76,14 +105,10 @@
)
for scheduled, expected in zip(scheduler_origins_sorted, expected_origins_sorted):
- assert scheduled.visit_type == "rust-crate"
+ assert scheduled.visit_type == "crates"
assert scheduled.url == expected.get("url")
- assert scheduled.extra_loader_arguments.get("name") == expected.get("name")
- assert scheduled.extra_loader_arguments.get("version") == expected.get(
- "version"
- )
- assert scheduled.extra_loader_arguments.get("checksum") == expected.get(
- "checksum"
+ assert scheduled.extra_loader_arguments.get("artifacts") == expected.get(
+ "artifacts"
)
assert len(scheduler_origins_sorted) == len(expected_origins_sorted)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 5 2024, 11:53 AM (18 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218635
Attached To
D7654: crates: create one origin per package instead of per version
Event Timeline
Log In to Comment