Changeset View
Standalone View
swh/loader/package/crates/loader.py
Show First 20 Lines • Show All 207 Lines • ▼ Show 20 Lines | class CratesLoader(PackageLoader[CratesPackageInfo]): | ||||||||||||||||
"""Load Crates package origins into swh archive.""" | """Load Crates package origins into swh archive.""" | ||||||||||||||||
visit_type = "crates" | visit_type = "crates" | ||||||||||||||||
def __init__( | def __init__( | ||||||||||||||||
self, | self, | ||||||||||||||||
storage: StorageInterface, | storage: StorageInterface, | ||||||||||||||||
url: str, | url: str, | ||||||||||||||||
package_name: str, | artifacts: List[Dict[Any, Any]], | ||||||||||||||||
vlorentzUnsubmitted Done Inline Actions
vlorentz: | |||||||||||||||||
version: str, | |||||||||||||||||
checksum: Optional[str] = None, | |||||||||||||||||
max_content_size: Optional[int] = None, | max_content_size: Optional[int] = None, | ||||||||||||||||
): | ): | ||||||||||||||||
"""Constructor | """Constructor | ||||||||||||||||
Args: | Args: | ||||||||||||||||
url : str | url: | ||||||||||||||||
Origin url (e.g. | Origin url, (e.g. https://crates.io/api/v1/crates/<package_name>) | ||||||||||||||||
https://static.crates.io/crates/<name>/<name>-<version>.crate) | |||||||||||||||||
package_name : str | artifacts: | ||||||||||||||||
Crate package name | A list of dict listing all existing released versions for a | ||||||||||||||||
package (Usually set with crates lister `extra_loader_arguments`). | |||||||||||||||||
Each line is a dict that should have an `url` | |||||||||||||||||
(where to download package specific version) and a `version` entry. | |||||||||||||||||
version : str | |||||||||||||||||
Crate package version | |||||||||||||||||
checksum : str, optional | Example:: | ||||||||||||||||
Checksum for the package file to download | |||||||||||||||||
""" | [ | ||||||||||||||||
{ | |||||||||||||||||
"version": <version>, | |||||||||||||||||
"url": "https://static.crates.io/crates/<package_name>/<package_name>-<version>.crate", | |||||||||||||||||
} | |||||||||||||||||
] | |||||||||||||||||
""" # noqa | |||||||||||||||||
super().__init__(storage=storage, url=url, max_content_size=max_content_size) | super().__init__(storage=storage, url=url, max_content_size=max_content_size) | ||||||||||||||||
self.name = package_name | self.url = url | ||||||||||||||||
Done Inline Actions
You could make that dict of key artifact version, value artifact. ^ That'd simplify some stuff below. all_versions = sorted(list(self.artifacts.keys())) # artifact[version] for some value of version... ardumont: You could make that dict of key artifact version, value artifact. ^
That'd simplify some stuff… | |||||||||||||||||
Done Inline ActionsSure! franckbret: Sure! | |||||||||||||||||
self.provider_url = f"https://crates.io/api/v1/crates/{self.name}" | self.artifacts = artifacts | ||||||||||||||||
# Check consistency of name, version, url | |||||||||||||||||
filename = urlparse(url).path.split("/")[-1] | |||||||||||||||||
assert f"{self.name}-{version}.crate" == filename | |||||||||||||||||
@cached_method | @cached_method | ||||||||||||||||
def _raw_info(self) -> bytes: | def _raw_info(self) -> bytes: | ||||||||||||||||
"""Get crate metadata (fetched from http api endpoint set as self.provider_url) | """Get crate metadata (fetched from http api endpoint set as self.url) | ||||||||||||||||
Returns: | Returns: | ||||||||||||||||
Content response as bytes. Content response is a json document. | Content response as bytes. Content response is a json document. | ||||||||||||||||
""" | """ | ||||||||||||||||
return api_info(self.provider_url) | return api_info(self.url) | ||||||||||||||||
Not Done Inline Actions
ardumont: | |||||||||||||||||
Not Done Inline Actionsnevermind that. ardumont: nevermind that. | |||||||||||||||||
@cached_method | @cached_method | ||||||||||||||||
def info(self) -> Dict: | def info(self) -> Dict: | ||||||||||||||||
"""Parse http api json response and return the crate metadata information | """Parse http api json response and return the crate metadata information | ||||||||||||||||
as a Dict.""" | as a Dict.""" | ||||||||||||||||
return json.loads(self._raw_info()) | return json.loads(self._raw_info()) | ||||||||||||||||
Not Done Inline Actions
to be consistent with my concern about 'inconsistent behavior' below. ardumont: to be consistent with my concern about 'inconsistent behavior' below. | |||||||||||||||||
Not Done Inline Actionsnvm that too. ardumont: nvm that too. | |||||||||||||||||
def get_versions(self) -> Sequence[str]: | def get_versions(self) -> Sequence[str]: | ||||||||||||||||
Not Done Inline Actions
wondering whether that could work (although it's only use twice, one in the package loader and another with this ;) ardumont: wondering whether that could work (although it's only use twice, one in the package loader and… | |||||||||||||||||
"""Get all released versions of a crate | """Get all released versions of a crate | ||||||||||||||||
Returns: | Returns: | ||||||||||||||||
A sequence of versions | A sequence of versions | ||||||||||||||||
Example:: | Example:: | ||||||||||||||||
["0.1.1", "0.10.2"] | ["0.1.1", "0.10.2"] | ||||||||||||||||
""" | """ | ||||||||||||||||
versions = [version["num"] for version in self.info()["versions"]] | versions = [item["version"] for item in self.artifacts] | ||||||||||||||||
Done Inline Actions
with my previous suggestion. ardumont: with my previous suggestion. | |||||||||||||||||
versions.sort(key=StrictVersion) | versions.sort(key=StrictVersion) | ||||||||||||||||
return versions | return versions | ||||||||||||||||
def get_default_version(self) -> str: | def get_default_version(self) -> str: | ||||||||||||||||
"""Get the newest release version of a crate | """Get the newest release version of a crate | ||||||||||||||||
Returns: | Returns: | ||||||||||||||||
A string representing a version | A string representing a version | ||||||||||||||||
Example:: | Example:: | ||||||||||||||||
"0.1.2" | "0.1.2" | ||||||||||||||||
""" | """ | ||||||||||||||||
return self.info()["crate"]["newest_version"] | return self.get_versions()[-1] | ||||||||||||||||
def get_package_info(self, version: str) -> Iterator[Tuple[str, CratesPackageInfo]]: | def get_package_info(self, version: str) -> Iterator[Tuple[str, CratesPackageInfo]]: | ||||||||||||||||
"""Get release name and package information from version | """Get release name and package information from version | ||||||||||||||||
Args: | Args: | ||||||||||||||||
version: crate version (e.g: "0.1.0") | version: crate version (e.g: "0.1.0") | ||||||||||||||||
Returns: | Returns: | ||||||||||||||||
Iterator of tuple (release_name, p_info) | Iterator of tuple (release_name, p_info) | ||||||||||||||||
""" | """ | ||||||||||||||||
filename = f"{self.name}-{version}.crate" | (artifact,) = [ | ||||||||||||||||
url = f"https://static.crates.io/crates/{self.name}/{self.name}-{version}.crate" | artifact for artifact in self.artifacts if artifact["version"] == version | ||||||||||||||||
] | |||||||||||||||||
filename = artifact["filename"] | |||||||||||||||||
package_name = urlparse(self.url).path.split("/")[-1] | |||||||||||||||||
url = artifact["url"] | |||||||||||||||||
Done Inline Actions
with my previous suggestion. ardumont: with my previous suggestion. | |||||||||||||||||
# Get extrinsic metadata from http api | # Get extrinsic metadata from http api | ||||||||||||||||
# Raw crate info | # Raw crate info | ||||||||||||||||
e_metadata = ExtrinsicPackageMetadata(**self.info()) # type: ignore[misc] | e_metadata = ExtrinsicPackageMetadata(**self.info()) # type: ignore[misc] | ||||||||||||||||
Not Done Inline ActionsThere is some inconsistency here now i think. The self.info method is using self.url which is now the origin url (and not the artifact url). That will then avoid the "strange" filtering command for the crate version just one line after my comment. ardumont: There is some inconsistency here now i think.
This ExtrincitPackageMetadata should be done for… | |||||||||||||||||
Not Done Inline ActionsNot sure for this.. Maybe I miss something so I may be wrong. The api endpoint is for one origin, it returns generic + information for each version. Calling self.info(artifact['url']) will fail because artifact['url'] is the archive to download not an url suitable to make an api call to get extrinsic metadata. We can sure call a more specific endpoint by adding version to the http api url (something like this https//crates.io/api/v1/crates/x/0.0.1) but we will do as many http api call as existing versions for this crate. franckbret: Not sure for this.. Maybe I miss something so I may be wrong.
The api endpoint is for one… | |||||||||||||||||
Not Done Inline Actions
ah yeah, thx, that makes sense. ardumont: > The api endpoint is for one origin, it returns generic + information for each version.
>… | |||||||||||||||||
Not Done Inline Actions
with my previous adaptations. ardumont: with my previous adaptations. | |||||||||||||||||
# Extract crate info for current version (One .crate file for a given version) | # Extract crate info for current version (One .crate file for a given version) | ||||||||||||||||
(crate_version,) = [ | (crate_version,) = [ | ||||||||||||||||
crate for crate in e_metadata["versions"] if crate["num"] == version | crate for crate in e_metadata["versions"] if crate["num"] == version | ||||||||||||||||
] | ] | ||||||||||||||||
e_metadata_version = ExtrinsicVersionPackageMetadata( # type: ignore[misc] | e_metadata_version = ExtrinsicVersionPackageMetadata( # type: ignore[misc] | ||||||||||||||||
**crate_version | **crate_version | ||||||||||||||||
) | ) | ||||||||||||||||
p_info = CratesPackageInfo( | p_info = CratesPackageInfo( | ||||||||||||||||
name=self.name, | name=package_name, | ||||||||||||||||
filename=filename, | filename=filename, | ||||||||||||||||
url=url, | url=url, | ||||||||||||||||
version=version, | version=version, | ||||||||||||||||
e_metadata=e_metadata, | e_metadata=e_metadata, | ||||||||||||||||
e_metadata_version=e_metadata_version, | e_metadata_version=e_metadata_version, | ||||||||||||||||
) | ) | ||||||||||||||||
yield release_name(version, filename), p_info | yield release_name(version, filename), p_info | ||||||||||||||||
def build_release( | def build_release( | ||||||||||||||||
self, p_info: CratesPackageInfo, uncompressed_path: str, directory: Sha1Git | self, p_info: CratesPackageInfo, uncompressed_path: str, directory: Sha1Git | ||||||||||||||||
) -> Optional[Release]: | ) -> Optional[Release]: | ||||||||||||||||
# Extract intrinsic metadata from dir_path/Cargo.toml | # Extract intrinsic metadata from dir_path/Cargo.toml | ||||||||||||||||
name = p_info.name | name = p_info.name | ||||||||||||||||
version = p_info.version | version = p_info.version | ||||||||||||||||
dir_path = Path(uncompressed_path, f"{name}-{version}") | dir_path = Path(uncompressed_path, f"{name}-{version}") | ||||||||||||||||
i_metadata_raw = extract_intrinsic_metadata(dir_path) | i_metadata_raw = extract_intrinsic_metadata(dir_path) | ||||||||||||||||
# Get only corresponding key of IntrinsicPackageMetadata | # Get only corresponding key of IntrinsicPackageMetadata | ||||||||||||||||
i_metadata_keys = [k for k in IntrinsicPackageMetadata.__annotations__.keys()] | i_metadata_keys = [k for k in IntrinsicPackageMetadata.__annotations__.keys()] | ||||||||||||||||
# We use data only from "package" entry | # We use data only from "package" entry | ||||||||||||||||
i_metadata = { | i_metadata = { | ||||||||||||||||
k: v for k, v in i_metadata_raw["package"].items() if k in i_metadata_keys | k: v for k, v in i_metadata_raw["package"].items() if k in i_metadata_keys | ||||||||||||||||
} | } | ||||||||||||||||
p_info.i_metadata = IntrinsicPackageMetadata(**i_metadata) # type: ignore[misc] | p_info.i_metadata = IntrinsicPackageMetadata(**i_metadata) # type: ignore[misc] | ||||||||||||||||
author = extract_author(p_info) | author = extract_author(p_info) | ||||||||||||||||
description = extract_description(p_info) | description = extract_description(p_info) | ||||||||||||||||
message = ( | message = ( | ||||||||||||||||
Show All 16 Lines |