diff --git a/docs/package-loader-specifications.rst b/docs/package-loader-specifications.rst --- a/docs/package-loader-specifications.rst +++ b/docs/package-loader-specifications.rst @@ -38,6 +38,15 @@ - "" - passed as arg - + * - aur + - ``p_info.​version`` + - ``release_name(​version, filename)`` + - =version + - Synthetic release for Aur source package {p_info.name} version {p_info.version} {description} + - true + - "" + - from extra_loader_arguments['aur_metadata'] + - Intrinsic metadata extracted from .SRCINFO file of the package * - cran - ``metadata.get(​"Version", passed as arg)`` - ``release_name(​version)`` diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -57,6 +57,7 @@ [swh.workers] loader.arch=swh.loader.package.arch:register loader.archive=swh.loader.package.archive:register + loader.aur=swh.loader.package.aur:register loader.cran=swh.loader.package.cran:register loader.crates=swh.loader.package.crates:register loader.debian=swh.loader.package.debian:register diff --git a/swh/loader/package/aur/__init__.py b/swh/loader/package/aur/__init__.py new file mode 100644 --- /dev/null +++ b/swh/loader/package/aur/__init__.py @@ -0,0 +1,17 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +from typing import Any, Mapping + + +def register() -> Mapping[str, Any]: + """Register the current worker module's definition""" + from .loader import AurLoader + + return { + "task_modules": [f"{__name__}.tasks"], + "loader": AurLoader, + } diff --git a/swh/loader/package/aur/loader.py b/swh/loader/package/aur/loader.py new file mode 100644 --- /dev/null +++ b/swh/loader/package/aur/loader.py @@ -0,0 +1,160 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information +from distutils.version import LooseVersion +from pathlib import Path +import re +from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple + +import attr + +from swh.loader.package.loader import BasePackageInfo, PackageLoader +from swh.loader.package.utils import EMPTY_AUTHOR, release_name +from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone +from swh.storage.interface import StorageInterface + + +@attr.s +class AurPackageInfo(BasePackageInfo): + + name = attr.ib(type=str) + """Name of the package""" + + version = attr.ib(type=str) + """Current version""" + + last_modified = attr.ib(type=str) + """File last modified date as release date""" + + +def extract_intrinsic_metadata(dir_path: Path) -> Dict[str, Any]: + """Extract intrinsic metadata from .SRCINFO file at dir_path. + + Each Aur package has a .SRCINFO file at the root of the archive. + + Args: + dir_path: A directory on disk where a package has been extracted + + Returns: + A dict mapping + """ + assert dir_path.exists() + # top directory from extracted archive is always named with the package name + (pkgname,) = [elt.name for elt in dir_path.iterdir() if elt.is_dir()] + srcinfo_path = Path(dir_path, pkgname, ".SRCINFO") + rex = re.compile(r"^(\w+)\s=\s(.*)$", re.M) + with srcinfo_path.open("r") as content: + # Except first and last line, lines may starts with a tab, remove them + srcinfo = content.read().replace("\t", "") + parsed = rex.findall(srcinfo) + data: Dict[str, Any] = {} + for (k, v) in parsed: + if k in data: + if type(data[k]) is not list: + data[k] = [data[k]] + data[k].append(v) + else: + data[k] = v + return data + + +class AurLoader(PackageLoader[AurPackageInfo]): + visit_type = "aur" + + def __init__( + self, + storage: StorageInterface, + url: str, + artifacts: List[Dict[str, Any]], + aur_metadata: List[Dict[str, Any]], + **kwargs, + ): + + super().__init__(storage=storage, url=url, **kwargs) + self.url = url + self.artifacts: Dict[str, Dict] = { + artifact["version"]: artifact for artifact in artifacts + } + self.aur_metadata: Dict[str, Dict] = { + meta["version"]: meta for meta in aur_metadata + } + + def get_versions(self) -> Sequence[str]: + """Get all released versions of an Aur package + + Returns: + A sequence of versions + + Example:: + + ["0.1.1", "0.10.2"] + """ + versions = list(self.artifacts) + versions.sort(key=LooseVersion) + return versions + + def get_default_version(self) -> str: + """Get the newest release version of an Aur package + + Returns: + A string representing a version + + Example:: + + "0.1.2" + """ + return self.get_versions()[-1] + + def get_package_info(self, version: str) -> Iterator[Tuple[str, AurPackageInfo]]: + """Get release name and package information from version + + Args: + version: aur version (e.g: "0.1.0") + + Returns: + Iterator of tuple (release_name, p_info) + """ + artifact = self.artifacts[version] + assert version == artifact["version"] + data = self.aur_metadata[version] + + url = artifact["url"] + filename = artifact["filename"] + + p_info = AurPackageInfo( + name=data["pkgname"], + filename=filename, + url=url, + version=version, + last_modified=data["last_update"], + ) + yield release_name(version, filename), p_info + + def build_release( + self, p_info: AurPackageInfo, uncompressed_path: str, directory: Sha1Git + ) -> Optional[Release]: + intrinsic_metadata = extract_intrinsic_metadata(Path(uncompressed_path)) + author = EMPTY_AUTHOR + description: str = "" + assert intrinsic_metadata["pkgdesc"] + + if type(intrinsic_metadata["pkgdesc"]) is list: + description = "\n".join(intrinsic_metadata["pkgdesc"]) + else: + description = intrinsic_metadata["pkgdesc"] + + message = ( + f"Synthetic release for Aur source package {p_info.name} " + f"version {p_info.version}\n\n" + f"{description}\n" + ) + return Release( + name=p_info.version.encode(), + author=author, + date=TimestampWithTimezone.from_iso8601(p_info.last_modified), + message=message.encode(), + target_type=ObjectType.DIRECTORY, + target=directory, + synthetic=True, + ) diff --git a/swh/loader/package/aur/tasks.py b/swh/loader/package/aur/tasks.py new file mode 100644 --- /dev/null +++ b/swh/loader/package/aur/tasks.py @@ -0,0 +1,14 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from celery import shared_task + +from swh.loader.package.aur.loader import AurLoader + + +@shared_task(name=__name__ + ".LoadAur") +def load_aur(**kwargs): + """Load Arch User Repository packages""" + return AurLoader.from_configfile(**kwargs).load() diff --git a/swh/loader/package/aur/tests/__init__.py b/swh/loader/package/aur/tests/__init__.py new file mode 100644 diff --git a/swh/loader/package/aur/tests/data/fake_aur.sh b/swh/loader/package/aur/tests/data/fake_aur.sh new file mode 100755 --- /dev/null +++ b/swh/loader/package/aur/tests/data/fake_aur.sh @@ -0,0 +1,187 @@ +#!/usr/bin/env bash + +# Script to generate fake Aur packages files and servable directories. + +set -euo pipefail + +# Create directories +readonly TMP=tmp_dir/aur +readonly BASE_URL=https_aur.archlinux.org +readonly SNAPSHOT_PREFIX=cgit_aur.git_snapshot + +mkdir -p $TMP +mkdir -p $BASE_URL + +cd $TMP + +mkdir 'hg-evolve' +echo -e '''pkgbase = hg-evolve + pkgdesc = Flexible evolution of Mercurial history + pkgver = 10.5.2 + pkgrel = 1 + url = https://www.mercurial-scm.org/doc/evolution/ + arch = any + license = GPL2 + makedepends = python-build + makedepends = python-installer + makedepends = python-wheel + depends = mercurial + source = https://files.pythonhosted.org/packages/source/h/hg-evolve/hg-evolve-10.5.2.tar.gz + sha512sums = 81a1cc1202ffaf364fde70c6a36e32330e93aa69c9b9f7e11fbc11f988f7fb302d8b79414c644d274fedb7f0a67e10c4344c0206a1424f2bb97ae2cb11a51315 + +pkgname = hg-evolve +''' > hg-evolve/.SRCINFO + +mkdir 'ibus-git' +echo -e '''pkgbase = ibus-git + pkgdesc = Next Generation Input Bus for Linux + pkgver = 1.5.23+12+gef4c5c7e + pkgrel = 1 + url = https://github.com/ibus/ibus/wiki + arch = x86_64 + license = LGPL + makedepends = gobject-introspection + makedepends = vala + makedepends = intltool + makedepends = gnome-common + makedepends = gtk-doc + makedepends = gtk2 + makedepends = qt5-base + makedepends = unicode-cldr + makedepends = unicode-character-database + makedepends = unicode-emoji + makedepends = git + depends = dconf + depends = gtk3 + depends = hicolor-icon-theme + depends = libnotify + depends = python-dbus + depends = python-gobject + depends = iso-codes + depends = librsvg + options = !emptydirs + source = ibus::git+https://github.com/ibus/ibus + sha512sums = SKIP + +pkgname = ibus-git + depends = dconf + depends = gtk3 + depends = hicolor-icon-theme + depends = libnotify + depends = python-dbus + depends = python-gobject + depends = iso-codes + depends = librsvg + depends = libibus-git=1.5.23+12+gef4c5c7e + provides = ibus + conflicts = ibus + +pkgname = libibus-git + pkgdesc = IBus support library + depends = libglib-2.0.so + depends = libgobject-2.0.so + depends = libgio-2.0.so + provides = libibus + provides = libibus-1.0.so + conflicts = libibus +''' > ibus-git/.SRCINFO + +mkdir 'libervia-web-hg' +echo -e '''pkgbase = libervia-web-hg + pkgdesc = Salut à Toi, multi-frontends multi-purposes XMPP client (Web interface) + pkgver = 0.9.0.r1492.3a34d78f2717 + pkgrel = 1 + url = http://salut-a-toi.org/ + install = libervia-web-hg.install + arch = any + license = AGPL3 + makedepends = python-setuptools + makedepends = mercurial + depends = python + depends = python-jinja + depends = python-shortuuid-git + depends = libervia-media-hg + depends = libervia-backend-hg + depends = libervia-templates-hg + depends = python-zope-interface + depends = python-pyopenssl + depends = python-autobahn + depends = dbus + depends = python-brython + provides = libervia-web + options = !strip + source = hg+https://repos.goffi.org/libervia + md5sums = SKIP + +pkgname = libervia-web-hg +''' > libervia-web-hg/.SRCINFO + +mkdir 'tealdeer-git' +echo -e '''# Generated by mksrcinfo v8 +# Fri Sep 4 20:36:25 UTC 2020 +pkgbase = tealdeer-git + pkgdesc = A fast tldr client in Rust. + pkgver = r255.30b7c5f + pkgrel = 1 + url = https://github.com/dbrgn/tealdeer + arch = x86_64 + arch = armv6h + arch = armv7h + arch = aarch64 + license = MIT + license = Apache + makedepends = git + makedepends = rust + makedepends = cargo + depends = openssl + provides = tldr + conflicts = tldr + options = !emptydirs + source = git+https://github.com/dbrgn/tealdeer + sha256sums = SKIP + +pkgname = tealdeer-git +''' > tealdeer-git/.SRCINFO + +mkdir 'a-fake-one' +echo -e '''# This one does not exists +# For test purpose, in particular for multi keys, multi lines edge case +pkgbase = a-fake-one + pkgdesc = A first line of description. + pkgdesc = A second line for more information. + pkgver = 0.0.1 + pkgrel = 1 + url = https://nowhere/a-fake-one + url = https://mirror/a-fake-one + arch = x86_64 + arch = armv6h + arch = armv7h + arch = aarch64 + license = MIT + license = Apache + makedepends = git + makedepends = rust + makedepends = cargo + depends = openssl + provides = a-fake-one + conflicts = a-fake-one + options = !emptydirs + source = git+https://nowhere/a-fake-one + sha256sums = SKIP + +pkgname = a-fake-one +''' > a-fake-one/.SRCINFO + +# Compress packages folders to .tar.gz archives +tar -czf ${SNAPSHOT_PREFIX}_hg-evolve.tar.gz hg-evolve +tar -czf ${SNAPSHOT_PREFIX}_ibus-git.tar.gz ibus-git +tar -czf ${SNAPSHOT_PREFIX}_libervia-web-hg.tar.gz libervia-web-hg +tar -czf ${SNAPSHOT_PREFIX}_tealdeer-git.tar.gz tealdeer-git +tar -czf ${SNAPSHOT_PREFIX}_a-fake-one.tar.gz a-fake-one + +# Move .tar.gz archives to a servable directory +mv *.tar.gz ../../$BASE_URL + +# Clean up removing tmp_dir +cd ../../ +rm -r tmp_dir/ diff --git a/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_a-fake-one.tar.gz b/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_a-fake-one.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@