Changeset View
Standalone View
swh/loader/package/rpm/loader.py
- This file was added.
| from datetime import datetime | |||||
ardumont: missing copyright header | |||||
Not Done Inline ActionsCopyright (C) 2022 anlambert: Copyright (C) 2022 | |||||
| import logging | |||||
| from os import path, remove | |||||
| import subprocess | |||||
| from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple | |||||
| import attr | |||||
| from swh.core.tarball import uncompress | |||||
| from swh.loader.package.loader import BasePackageInfo, PackageLoader | |||||
| from swh.model.model import ObjectType, Release, Sha1Git | |||||
| from swh.storage.interface import StorageInterface | |||||
| logger = logging.getLogger(__name__) | |||||
| @attr.s | |||||
| class RpmPackageInfo(BasePackageInfo): | |||||
| raw_info = attr.ib(type=Dict[str, Any]) | |||||
| name = attr.ib(type=str) | |||||
| build_time = attr.ib(type=datetime, default=None) | |||||
| @classmethod | |||||
| def from_metadata( | |||||
| cls, a_metadata: Dict[str, Any], origin: str, version: str | |||||
Not Done Inline Actionsthis field is not used, you can remove it anlambert: this field is not used, you can remove it | |||||
| ) -> "RpmPackageInfo": | |||||
| filename = a_metadata["url"].split("/")[-1] | |||||
| assert filename.endswith(".rpm") | |||||
| return cls( | |||||
| url=a_metadata["url"], # url of the .rpm file | |||||
Not Done Inline ActionsI would rather use string.Template("$name $version $build_time") as manifest format. anlambert: I would rather use `string.Template("$name $version $build_time")` as manifest format. | |||||
| filename=filename, # nginx-1.18.0-5.fc34.src.rpm | |||||
| version=version, # 34/Everything/1.18.0 | |||||
| build_time=datetime.fromisoformat(a_metadata["buildTime"]), | |||||
| raw_info=a_metadata, # TODO: `a_metadata` should include fedora edition too? | |||||
Not Done Inline Actionsorigin parameter is not used, you can remove it anlambert: `origin` parameter is not used, you can remove it | |||||
| name=a_metadata["name"], # nginx | |||||
Not Done Inline ActionsAdd from __future__ import annotations at the top of the file and you can remove the quotes around RpmPackageInfo anlambert: Add `from __future__ import annotations` at the top of the file and you can remove the quotes… | |||||
| ) | |||||
| class RpmLoader(PackageLoader[RpmPackageInfo]): | |||||
Done Inline ActionsHere edition can be ["Everything", "Server", "Workstation", "Modular"] (although it's already included in the version) KShivendu: Here edition can be `["Everything", "Server", "Workstation", "Modular"]` (although it's already… | |||||
| visit_type = "rpm" | |||||
| def __init__( | |||||
Not Done Inline Actionswith recent updates of the lister, version is now 1.18.0-5.fc34 anlambert: with recent updates of the lister, version is now `1.18.0-5.fc34` | |||||
| self, | |||||
| storage: StorageInterface, | |||||
| url: str, | |||||
| packages: Mapping[str, Any], | |||||
| **kwargs: Any, | |||||
| ): | |||||
| """RPM Loader implementation. | |||||
| FIXME | |||||
| Args: | |||||
| url: Origin url (e.g. rpm://Fedora/packages/nginx) | |||||
| packages: versioned packages and associated artifacts, example:: | |||||
| { | |||||
| '34/Everything/1.18.0': { | |||||
| 'name': 'nginx', | |||||
| 'version': '1.18.0' | |||||
| 'release': 34, | |||||
| 'buildTime': '2022-11-01T12:00:55.764371', | |||||
| 'url': 'https://archives.fedoraproject.org/nginx-1.18.0-5.fc34.src.rpm', | |||||
| }, | |||||
| # ... | |||||
| } | |||||
| """ | |||||
| super().__init__(storage=storage, url=url, **kwargs) | |||||
| self.url = url | |||||
| self.packages = packages | |||||
| def get_versions(self) -> Sequence[str]: | |||||
| """Returns the keys of the packages input (e.g. 34/Everything/1.18.0, etc...)""" | |||||
| return list(self.packages) | |||||
| def get_package_info(self, version: str) -> Iterator[Tuple[str, RpmPackageInfo]]: | |||||
| yield ( | |||||
| version, | |||||
| RpmPackageInfo.from_metadata( | |||||
| self.packages[version], self.origin.url, version | |||||
| ), | |||||
| ) | |||||
Not Done Inline Actionswe should sort the versions just in case: return list(sorted(self.packages, key=parse_version)) from packaging.version import parse as parse_version anlambert: we should sort the versions just in case: `return list(sorted(self.packages… | |||||
| def uncompress( | |||||
Not Done Inline ActionsYou should also implement get_default_version to have the latest version displayed by default in the webapp. def get_default_version(self) -> str: """Get the newest release version of a rpm package""" return self.get_versions()[-1] anlambert: You should also implement `get_default_version` to have the latest version displayed by default… | |||||
| self, dl_artifacts: List[Tuple[str, Mapping[str, Any]]], dest: str | |||||
| ) -> str: | |||||
| rpm_path, _ = dl_artifacts[0] | |||||
| return extract_rpm_package(rpm_path, dest=dest) | |||||
| def build_release( | |||||
| self, p_info: RpmPackageInfo, uncompressed_path: str, directory: Sha1Git | |||||
| ) -> Optional[Release]: | |||||
| msg = ( | |||||
| f"Synthetic release for Rpm source package {p_info.name} " | |||||
| f"version {p_info.version}\n" | |||||
| ) | |||||
| return Release( | |||||
| name=p_info.name.encode(), | |||||
| # FIXME date isn't allowed without author. But | |||||
| # author field doesn't exist in RPM metadata (primary.xml file) | |||||
| # date=p_info.build_time, | |||||
| message=msg.encode(), | |||||
| target=directory, | |||||
| target_type=ObjectType.DIRECTORY, | |||||
| synthetic=True, | |||||
Done Inline ActionsAny thoughts on this? I think it makes sense to use builld_time as the release date. Should we just hardcore the author field as "Fedora archive" or something like that? KShivendu: Any thoughts on this? I think it makes sense to use builld_time as the release date. Should we… | |||||
Not Done Inline Actionsuse the empty author, like some other package loaders do vlorentz: use the empty author, like some other package loaders do | |||||
| ) | |||||
Not Done Inline ActionsThe release name should be the version here otherwise all releases will have the same name. anlambert: The release name should be the version here otherwise all releases will have the same name. | |||||
| def extract_rpm_package(rpm_path: str, dest: str) -> str: | |||||
| """Extracts an RPM package.""" | |||||
| logger.debug("rpm path: %s", rpm_path) | |||||
| if not path.exists(rpm_path): | |||||
| raise FileNotFoundError(f"RPM package {rpm_path} not found") | |||||
| destdir = path.join(dest, "extracted") | |||||
| logfile = path.join(dest, "extract.log") | |||||
| logger.debug( | |||||
| "extract RPM source package %s in %s" % (rpm_path, destdir), | |||||
| extra={ | |||||
| "swh_type": "rpm_extract", | |||||
| "swh_rpm": rpm_path, # TODO what should be the the key? | |||||
| "swh_destdir": destdir, | |||||
| }, | |||||
| ) | |||||
| try: | |||||
| with open(logfile, "w") as stdout: | |||||
| rpm2cpio = subprocess.Popen(("rpm2cpio", rpm_path), stdout=subprocess.PIPE) | |||||
| subprocess.check_call( | |||||
| ("cpio", "-idmv", "-D", destdir), | |||||
| stdin=rpm2cpio.stdout, | |||||
| stdout=stdout, | |||||
Not Done Inline Actionssend stderr to the logfile too vlorentz: send stderr to the logfile too | |||||
| stderr=stdout, | |||||
| ) | |||||
| rpm2cpio.wait() | |||||
| except subprocess.CalledProcessError as e: | |||||
| logdata = open(logfile, "r").read() | |||||
| raise ValueError( | |||||
| "rpm2cpio | cpio exited with code %s: %s" % (e.returncode, logdata) | |||||
| ) from None | |||||
| # Extract .tar file obtained from .rpm | |||||
| rpm_filename = rpm_path.split("/")[-1] | |||||
| tar_path = destdir + "/" + "-".join(rpm_filename.split("-")[:2]) + ".tar.gz" | |||||
| if path.exists(tar_path): | |||||
| uncompress(tar_path, destdir) | |||||
| remove(tar_path) | |||||
| return destdir | |||||
Done Inline ActionsPlease suggest improvements in this :) KShivendu: Please suggest improvements in this :) | |||||
Done Inline ActionsAny suggestions on how can I test this? Coverage is 99% because of this. KShivendu: Any suggestions on how can I test this? Coverage is 99% because of this. | |||||
Not Done Inline Actionsdebug logs are not of interest to test in that case and 99% coverage is already quite good, do not bother with it. anlambert: debug logs are not of interest to test in that case and 99% coverage is already quite good, do… | |||||
missing copyright header