Changeset View
Standalone View
swh/loader/package/rpm/loader.py
- This file was added.
from datetime import datetime | |||||
ardumont: missing copyright header | |||||
Not Done Inline ActionsCopyright (C) 2022 anlambert: Copyright (C) 2022 | |||||
import logging | |||||
from os import path, remove | |||||
import subprocess | |||||
from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple | |||||
import attr | |||||
from swh.core.tarball import uncompress | |||||
from swh.loader.package.loader import BasePackageInfo, PackageLoader | |||||
from swh.model.model import ObjectType, Release, Sha1Git | |||||
from swh.storage.interface import StorageInterface | |||||
logger = logging.getLogger(__name__) | |||||
@attr.s | |||||
class RpmPackageInfo(BasePackageInfo): | |||||
raw_info = attr.ib(type=Dict[str, Any]) | |||||
name = attr.ib(type=str) | |||||
build_time = attr.ib(type=datetime, default=None) | |||||
@classmethod | |||||
def from_metadata( | |||||
cls, a_metadata: Dict[str, Any], origin: str, version: str | |||||
Not Done Inline Actionsthis field is not used, you can remove it anlambert: this field is not used, you can remove it | |||||
) -> "RpmPackageInfo": | |||||
filename = a_metadata["url"].split("/")[-1] | |||||
assert filename.endswith(".rpm") | |||||
return cls( | |||||
url=a_metadata["url"], # url of the .rpm file | |||||
Not Done Inline ActionsI would rather use string.Template("$name $version $build_time") as manifest format. anlambert: I would rather use `string.Template("$name $version $build_time")` as manifest format. | |||||
filename=filename, # nginx-1.18.0-5.fc34.src.rpm | |||||
version=version, # 34/Everything/1.18.0 | |||||
build_time=datetime.fromisoformat(a_metadata["buildTime"]), | |||||
raw_info=a_metadata, # TODO: `a_metadata` should include fedora edition too? | |||||
Not Done Inline Actionsorigin parameter is not used, you can remove it anlambert: `origin` parameter is not used, you can remove it | |||||
name=a_metadata["name"], # nginx | |||||
Not Done Inline ActionsAdd from __future__ import annotations at the top of the file and you can remove the quotes around RpmPackageInfo anlambert: Add `from __future__ import annotations` at the top of the file and you can remove the quotes… | |||||
) | |||||
class RpmLoader(PackageLoader[RpmPackageInfo]): | |||||
Done Inline ActionsHere edition can be ["Everything", "Server", "Workstation", "Modular"] (although it's already included in the version) KShivendu: Here edition can be `["Everything", "Server", "Workstation", "Modular"]` (although it's already… | |||||
visit_type = "rpm" | |||||
def __init__( | |||||
Not Done Inline Actionswith recent updates of the lister, version is now 1.18.0-5.fc34 anlambert: with recent updates of the lister, version is now `1.18.0-5.fc34` | |||||
self, | |||||
storage: StorageInterface, | |||||
url: str, | |||||
packages: Mapping[str, Any], | |||||
**kwargs: Any, | |||||
): | |||||
"""RPM Loader implementation. | |||||
FIXME | |||||
Args: | |||||
url: Origin url (e.g. rpm://Fedora/packages/nginx) | |||||
packages: versioned packages and associated artifacts, example:: | |||||
{ | |||||
'34/Everything/1.18.0': { | |||||
'name': 'nginx', | |||||
'version': '1.18.0' | |||||
'release': 34, | |||||
'buildTime': '2022-11-01T12:00:55.764371', | |||||
'url': 'https://archives.fedoraproject.org/nginx-1.18.0-5.fc34.src.rpm', | |||||
}, | |||||
# ... | |||||
} | |||||
""" | |||||
super().__init__(storage=storage, url=url, **kwargs) | |||||
self.url = url | |||||
self.packages = packages | |||||
def get_versions(self) -> Sequence[str]: | |||||
"""Returns the keys of the packages input (e.g. 34/Everything/1.18.0, etc...)""" | |||||
return list(self.packages) | |||||
def get_package_info(self, version: str) -> Iterator[Tuple[str, RpmPackageInfo]]: | |||||
yield ( | |||||
version, | |||||
RpmPackageInfo.from_metadata( | |||||
self.packages[version], self.origin.url, version | |||||
), | |||||
) | |||||
Not Done Inline Actionswe should sort the versions just in case: return list(sorted(self.packages, key=parse_version)) from packaging.version import parse as parse_version anlambert: we should sort the versions just in case: `return list(sorted(self.packages… | |||||
def uncompress( | |||||
Not Done Inline ActionsYou should also implement get_default_version to have the latest version displayed by default in the webapp. def get_default_version(self) -> str: """Get the newest release version of a rpm package""" return self.get_versions()[-1] anlambert: You should also implement `get_default_version` to have the latest version displayed by default… | |||||
self, dl_artifacts: List[Tuple[str, Mapping[str, Any]]], dest: str | |||||
) -> str: | |||||
rpm_path, _ = dl_artifacts[0] | |||||
return extract_rpm_package(rpm_path, dest=dest) | |||||
def build_release( | |||||
self, p_info: RpmPackageInfo, uncompressed_path: str, directory: Sha1Git | |||||
) -> Optional[Release]: | |||||
msg = ( | |||||
f"Synthetic release for Rpm source package {p_info.name} " | |||||
f"version {p_info.version}\n" | |||||
) | |||||
return Release( | |||||
name=p_info.name.encode(), | |||||
# FIXME date isn't allowed without author. But | |||||
# author field doesn't exist in RPM metadata (primary.xml file) | |||||
# date=p_info.build_time, | |||||
message=msg.encode(), | |||||
target=directory, | |||||
target_type=ObjectType.DIRECTORY, | |||||
synthetic=True, | |||||
Done Inline ActionsAny thoughts on this? I think it makes sense to use builld_time as the release date. Should we just hardcore the author field as "Fedora archive" or something like that? KShivendu: Any thoughts on this? I think it makes sense to use builld_time as the release date. Should we… | |||||
Not Done Inline Actionsuse the empty author, like some other package loaders do vlorentz: use the empty author, like some other package loaders do | |||||
) | |||||
Not Done Inline ActionsThe release name should be the version here otherwise all releases will have the same name. anlambert: The release name should be the version here otherwise all releases will have the same name. | |||||
def extract_rpm_package(rpm_path: str, dest: str) -> str: | |||||
"""Extracts an RPM package.""" | |||||
logger.debug("rpm path: %s", rpm_path) | |||||
if not path.exists(rpm_path): | |||||
raise FileNotFoundError(f"RPM package {rpm_path} not found") | |||||
destdir = path.join(dest, "extracted") | |||||
logfile = path.join(dest, "extract.log") | |||||
logger.debug( | |||||
"extract RPM source package %s in %s" % (rpm_path, destdir), | |||||
extra={ | |||||
"swh_type": "rpm_extract", | |||||
"swh_rpm": rpm_path, # TODO what should be the the key? | |||||
"swh_destdir": destdir, | |||||
}, | |||||
) | |||||
try: | |||||
with open(logfile, "w") as stdout: | |||||
rpm2cpio = subprocess.Popen(("rpm2cpio", rpm_path), stdout=subprocess.PIPE) | |||||
subprocess.check_call( | |||||
("cpio", "-idmv", "-D", destdir), | |||||
stdin=rpm2cpio.stdout, | |||||
stdout=stdout, | |||||
Not Done Inline Actionssend stderr to the logfile too vlorentz: send stderr to the logfile too | |||||
stderr=stdout, | |||||
) | |||||
rpm2cpio.wait() | |||||
except subprocess.CalledProcessError as e: | |||||
logdata = open(logfile, "r").read() | |||||
raise ValueError( | |||||
"rpm2cpio | cpio exited with code %s: %s" % (e.returncode, logdata) | |||||
) from None | |||||
# Extract .tar file obtained from .rpm | |||||
rpm_filename = rpm_path.split("/")[-1] | |||||
tar_path = destdir + "/" + "-".join(rpm_filename.split("-")[:2]) + ".tar.gz" | |||||
if path.exists(tar_path): | |||||
uncompress(tar_path, destdir) | |||||
remove(tar_path) | |||||
return destdir | |||||
Done Inline ActionsPlease suggest improvements in this :) KShivendu: Please suggest improvements in this :) | |||||
Done Inline ActionsAny suggestions on how can I test this? Coverage is 99% because of this. KShivendu: Any suggestions on how can I test this? Coverage is 99% because of this. | |||||
Not Done Inline Actionsdebug logs are not of interest to test in that case and 99% coverage is already quite good, do not bother with it. anlambert: debug logs are not of interest to test in that case and 99% coverage is already quite good, do… |
missing copyright header