diff --git a/docs/package-loader-specifications.rst b/docs/package-loader-specifications.rst index bdebfe2..1ec6bcc 100644 --- a/docs/package-loader-specifications.rst +++ b/docs/package-loader-specifications.rst @@ -1,142 +1,151 @@ .. _package-loader-specifications: Package loader specifications ============================= Release fields -------------- Here is an overview of the fields (+ internal version name + branch name) used by each package loader, after D6616: .. list-table:: Fields used by each package loader :header-rows: 1 * - Loader - internal version - branch name - name - message - synthetic - author - date - Notes * - arch - ``p_info.​version`` - ``release_name(​version, filename)`` - =version - Synthetic release for Arch Linux source package {p_info.name} version {p_info.version} {description} - true - from intrinsic metadata - from extra_loader_arguments['artifacts'] - Intrinsic metadata extracted from .PKGINFO file of the package * - archive - passed as arg - ``release_name(​version)`` - =version - "Synthetic release for archive at {p_info.url}\n" - true - "" - passed as arg - + * - aur + - ``p_info.​version`` + - ``release_name(​version, filename)`` + - =version + - Synthetic release for Aur source package {p_info.name} version {p_info.version} {description} + - true + - "" + - from extra_loader_arguments['aur_metadata'] + - Intrinsic metadata extracted from .SRCINFO file of the package * - cran - ``metadata.get(​"Version", passed as arg)`` - ``release_name(​version)`` - =version - standard message - true - ``metadata.get(​"Maintainer", "")`` - ``metadata.get(​"Date")`` - metadata is intrinsic * - crates - ``p_info.​version`` - ``release_name(​version, filename) + "\n\n" + i_metadata.description + "\n"`` - =version - Synthetic release for Crate source package {p_info.name} version {p_info.version} {description} - true - from int metadata - from ext metadata - ``i_metadata`` for intrinsic metadata, ``e_metadata`` for extrinsic metadata * - debian - =``version`` - ``release_name(​version)`` - =``i_version`` - standard message (using ``i_version``) - true - ``metadata​.changelog​.person`` - ``metadata​.changelog​.date`` - metadata is intrinsic. Old revisions have ``dsc`` as type ``i_version`` is the intrinsic version (eg. ``0.7.2-3``) while ``version`` contains the debian suite name (eg. ``stretch/contrib/0.7.2-3``) and is passed as arg * - deposit - HEAD - only HEAD - HEAD - "{client}: Deposit {id} in collection {collection}\n" - true - original author - ```` from SWORD XML - revisions had parents * - maven-loader - passed as arg - HEAD - ``release_name(version)`` - "Synthetic release for archive at {p_info.url}\n" - true - "" - passed as arg - Only one artefact per url (jar/zip src) * - nixguix - URL - URL - URL - None - true - "" - None - it's the URL of the artifact referenced by the derivation * - npm - ``metadata​["version"]`` - ``release_name(​version)`` - =version - standard message - true - from int metadata or "" - from ext metadata or None - * - opam - as given by opam - "{opam_package}​.{version}" - =version - standard message - true - from metadata - None - "{self.opam_package}​.{version}" matches the version names used by opam's backend. metadata is extrinsic * - pypi - ``metadata​["version"]`` - ``release_name(​version)`` or ``release_name(​version, filename)`` - =version - ``metadata[​'comment_text']}`` or standard message - true - from int metadata or "" - from ext metadata or None - metadata is intrinsic using this function:: def release_name(version: str, filename: Optional[str] = None) -> str: if filename: return "releases/%s/%s" % (version, filename) return "releases/%s" % version and "standard message" being:: msg = ( f"Synthetic release for {PACKAGE_MANAGER} source package {name} " f"version {version}\n" ) The ``target_type`` field is always ``dir``, and the target the id of a directory loaded by unpacking a tarball/zip file/... diff --git a/setup.py b/setup.py index 448102f..49f18ff 100755 --- a/setup.py +++ b/setup.py @@ -1,83 +1,84 @@ #!/usr/bin/env python3 # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from io import open from os import path from setuptools import find_packages, setup here = path.abspath(path.dirname(__file__)) # Get the long description from the README file with open(path.join(here, "README.rst"), encoding="utf-8") as f: long_description = f.read() def parse_requirements(name=None): if name: reqf = "requirements-%s.txt" % name else: reqf = "requirements.txt" requirements = [] if not path.exists(reqf): return requirements with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith("#"): continue requirements.append(line) return requirements setup( name="swh.loader.core", description="Software Heritage Base Loader", long_description=long_description, long_description_content_type="text/markdown", python_requires=">=3.7", author="Software Heritage developers", author_email="swh-devel@inria.fr", url="https://forge.softwareheritage.org/diffusion/DLDBASE", packages=find_packages(), # packages's modules scripts=[], # scripts to package install_requires=parse_requirements() + parse_requirements("swh"), setup_requires=["setuptools-scm"], use_scm_version=True, extras_require={"testing": parse_requirements("test")}, include_package_data=True, entry_points=""" [swh.cli.subcommands] loader=swh.loader.cli [swh.workers] loader.arch=swh.loader.package.arch:register loader.archive=swh.loader.package.archive:register + loader.aur=swh.loader.package.aur:register loader.cran=swh.loader.package.cran:register loader.crates=swh.loader.package.crates:register loader.debian=swh.loader.package.debian:register loader.deposit=swh.loader.package.deposit:register loader.nixguix=swh.loader.package.nixguix:register loader.npm=swh.loader.package.npm:register loader.opam=swh.loader.package.opam:register loader.pypi=swh.loader.package.pypi:register loader.maven=swh.loader.package.maven:register """, classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Development Status :: 5 - Production/Stable", ], project_urls={ "Bug Reports": "https://forge.softwareheritage.org/maniphest", "Funding": "https://www.softwareheritage.org/donate", "Source": "https://forge.softwareheritage.org/source/swh-loader-core", "Documentation": "https://docs.softwareheritage.org/devel/swh-loader-core/", }, ) diff --git a/swh/loader/package/aur/__init__.py b/swh/loader/package/aur/__init__.py new file mode 100644 index 0000000..1682976 --- /dev/null +++ b/swh/loader/package/aur/__init__.py @@ -0,0 +1,17 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +from typing import Any, Mapping + + +def register() -> Mapping[str, Any]: + """Register the current worker module's definition""" + from .loader import AurLoader + + return { + "task_modules": [f"{__name__}.tasks"], + "loader": AurLoader, + } diff --git a/swh/loader/package/aur/loader.py b/swh/loader/package/aur/loader.py new file mode 100644 index 0000000..24577be --- /dev/null +++ b/swh/loader/package/aur/loader.py @@ -0,0 +1,160 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information +from distutils.version import LooseVersion +from pathlib import Path +import re +from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple + +import attr + +from swh.loader.package.loader import BasePackageInfo, PackageLoader +from swh.loader.package.utils import EMPTY_AUTHOR, release_name +from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone +from swh.storage.interface import StorageInterface + + +@attr.s +class AurPackageInfo(BasePackageInfo): + + name = attr.ib(type=str) + """Name of the package""" + + version = attr.ib(type=str) + """Current version""" + + last_modified = attr.ib(type=str) + """File last modified date as release date""" + + +def extract_intrinsic_metadata(dir_path: Path) -> Dict[str, Any]: + """Extract intrinsic metadata from .SRCINFO file at dir_path. + + Each Aur package has a .SRCINFO file at the root of the archive. + + Args: + dir_path: A directory on disk where a package has been extracted + + Returns: + A dict mapping + """ + assert dir_path.exists() + # top directory from extracted archive is always named with the package name + (pkgname,) = [elt.name for elt in dir_path.iterdir() if elt.is_dir()] + srcinfo_path = Path(dir_path, pkgname, ".SRCINFO") + rex = re.compile(r"^(\w+)\s=\s(.*)$", re.M) + with srcinfo_path.open("r") as content: + # Except first and last line, lines may starts with a tab, remove them + srcinfo = content.read().replace("\t", "") + parsed = rex.findall(srcinfo) + data: Dict[str, Any] = {} + for (k, v) in parsed: + if k in data: + if type(data[k]) is not list: + data[k] = [data[k]] + data[k].append(v) + else: + data[k] = v + return data + + +class AurLoader(PackageLoader[AurPackageInfo]): + visit_type = "aur" + + def __init__( + self, + storage: StorageInterface, + url: str, + artifacts: List[Dict[str, Any]], + aur_metadata: List[Dict[str, Any]], + **kwargs, + ): + + super().__init__(storage=storage, url=url, **kwargs) + self.url = url + self.artifacts: Dict[str, Dict] = { + artifact["version"]: artifact for artifact in artifacts + } + self.aur_metadata: Dict[str, Dict] = { + meta["version"]: meta for meta in aur_metadata + } + + def get_versions(self) -> Sequence[str]: + """Get all released versions of an Aur package + + Returns: + A sequence of versions + + Example:: + + ["0.1.1", "0.10.2"] + """ + versions = list(self.artifacts) + versions.sort(key=LooseVersion) + return versions + + def get_default_version(self) -> str: + """Get the newest release version of an Aur package + + Returns: + A string representing a version + + Example:: + + "0.1.2" + """ + return self.get_versions()[-1] + + def get_package_info(self, version: str) -> Iterator[Tuple[str, AurPackageInfo]]: + """Get release name and package information from version + + Args: + version: aur version (e.g: "0.1.0") + + Returns: + Iterator of tuple (release_name, p_info) + """ + artifact = self.artifacts[version] + assert version == artifact["version"] + data = self.aur_metadata[version] + + url = artifact["url"] + filename = artifact["filename"] + + p_info = AurPackageInfo( + name=data["pkgname"], + filename=filename, + url=url, + version=version, + last_modified=data["last_update"], + ) + yield release_name(version, filename), p_info + + def build_release( + self, p_info: AurPackageInfo, uncompressed_path: str, directory: Sha1Git + ) -> Optional[Release]: + intrinsic_metadata = extract_intrinsic_metadata(Path(uncompressed_path)) + author = EMPTY_AUTHOR + description: str = "" + assert intrinsic_metadata["pkgdesc"] + + if type(intrinsic_metadata["pkgdesc"]) is list: + description = "\n".join(intrinsic_metadata["pkgdesc"]) + else: + description = intrinsic_metadata["pkgdesc"] + + message = ( + f"Synthetic release for Aur source package {p_info.name} " + f"version {p_info.version}\n\n" + f"{description}\n" + ) + return Release( + name=p_info.version.encode(), + author=author, + date=TimestampWithTimezone.from_iso8601(p_info.last_modified), + message=message.encode(), + target_type=ObjectType.DIRECTORY, + target=directory, + synthetic=True, + ) diff --git a/swh/loader/package/aur/tasks.py b/swh/loader/package/aur/tasks.py new file mode 100644 index 0000000..9cfb24b --- /dev/null +++ b/swh/loader/package/aur/tasks.py @@ -0,0 +1,14 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from celery import shared_task + +from swh.loader.package.aur.loader import AurLoader + + +@shared_task(name=__name__ + ".LoadAur") +def load_aur(**kwargs): + """Load Arch User Repository packages""" + return AurLoader.from_configfile(**kwargs).load() diff --git a/swh/loader/package/aur/tests/__init__.py b/swh/loader/package/aur/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/swh/loader/package/aur/tests/data/fake_aur.sh b/swh/loader/package/aur/tests/data/fake_aur.sh new file mode 100755 index 0000000..70e6844 --- /dev/null +++ b/swh/loader/package/aur/tests/data/fake_aur.sh @@ -0,0 +1,187 @@ +#!/usr/bin/env bash + +# Script to generate fake Aur packages files and servable directories. + +set -euo pipefail + +# Create directories +readonly TMP=tmp_dir/aur +readonly BASE_URL=https_aur.archlinux.org +readonly SNAPSHOT_PREFIX=cgit_aur.git_snapshot + +mkdir -p $TMP +mkdir -p $BASE_URL + +cd $TMP + +mkdir 'hg-evolve' +echo -e '''pkgbase = hg-evolve + pkgdesc = Flexible evolution of Mercurial history + pkgver = 10.5.2 + pkgrel = 1 + url = https://www.mercurial-scm.org/doc/evolution/ + arch = any + license = GPL2 + makedepends = python-build + makedepends = python-installer + makedepends = python-wheel + depends = mercurial + source = https://files.pythonhosted.org/packages/source/h/hg-evolve/hg-evolve-10.5.2.tar.gz + sha512sums = 81a1cc1202ffaf364fde70c6a36e32330e93aa69c9b9f7e11fbc11f988f7fb302d8b79414c644d274fedb7f0a67e10c4344c0206a1424f2bb97ae2cb11a51315 + +pkgname = hg-evolve +''' > hg-evolve/.SRCINFO + +mkdir 'ibus-git' +echo -e '''pkgbase = ibus-git + pkgdesc = Next Generation Input Bus for Linux + pkgver = 1.5.23+12+gef4c5c7e + pkgrel = 1 + url = https://github.com/ibus/ibus/wiki + arch = x86_64 + license = LGPL + makedepends = gobject-introspection + makedepends = vala + makedepends = intltool + makedepends = gnome-common + makedepends = gtk-doc + makedepends = gtk2 + makedepends = qt5-base + makedepends = unicode-cldr + makedepends = unicode-character-database + makedepends = unicode-emoji + makedepends = git + depends = dconf + depends = gtk3 + depends = hicolor-icon-theme + depends = libnotify + depends = python-dbus + depends = python-gobject + depends = iso-codes + depends = librsvg + options = !emptydirs + source = ibus::git+https://github.com/ibus/ibus + sha512sums = SKIP + +pkgname = ibus-git + depends = dconf + depends = gtk3 + depends = hicolor-icon-theme + depends = libnotify + depends = python-dbus + depends = python-gobject + depends = iso-codes + depends = librsvg + depends = libibus-git=1.5.23+12+gef4c5c7e + provides = ibus + conflicts = ibus + +pkgname = libibus-git + pkgdesc = IBus support library + depends = libglib-2.0.so + depends = libgobject-2.0.so + depends = libgio-2.0.so + provides = libibus + provides = libibus-1.0.so + conflicts = libibus +''' > ibus-git/.SRCINFO + +mkdir 'libervia-web-hg' +echo -e '''pkgbase = libervia-web-hg + pkgdesc = Salut à Toi, multi-frontends multi-purposes XMPP client (Web interface) + pkgver = 0.9.0.r1492.3a34d78f2717 + pkgrel = 1 + url = http://salut-a-toi.org/ + install = libervia-web-hg.install + arch = any + license = AGPL3 + makedepends = python-setuptools + makedepends = mercurial + depends = python + depends = python-jinja + depends = python-shortuuid-git + depends = libervia-media-hg + depends = libervia-backend-hg + depends = libervia-templates-hg + depends = python-zope-interface + depends = python-pyopenssl + depends = python-autobahn + depends = dbus + depends = python-brython + provides = libervia-web + options = !strip + source = hg+https://repos.goffi.org/libervia + md5sums = SKIP + +pkgname = libervia-web-hg +''' > libervia-web-hg/.SRCINFO + +mkdir 'tealdeer-git' +echo -e '''# Generated by mksrcinfo v8 +# Fri Sep 4 20:36:25 UTC 2020 +pkgbase = tealdeer-git + pkgdesc = A fast tldr client in Rust. + pkgver = r255.30b7c5f + pkgrel = 1 + url = https://github.com/dbrgn/tealdeer + arch = x86_64 + arch = armv6h + arch = armv7h + arch = aarch64 + license = MIT + license = Apache + makedepends = git + makedepends = rust + makedepends = cargo + depends = openssl + provides = tldr + conflicts = tldr + options = !emptydirs + source = git+https://github.com/dbrgn/tealdeer + sha256sums = SKIP + +pkgname = tealdeer-git +''' > tealdeer-git/.SRCINFO + +mkdir 'a-fake-one' +echo -e '''# This one does not exists +# For test purpose, in particular for multi keys, multi lines edge case +pkgbase = a-fake-one + pkgdesc = A first line of description. + pkgdesc = A second line for more information. + pkgver = 0.0.1 + pkgrel = 1 + url = https://nowhere/a-fake-one + url = https://mirror/a-fake-one + arch = x86_64 + arch = armv6h + arch = armv7h + arch = aarch64 + license = MIT + license = Apache + makedepends = git + makedepends = rust + makedepends = cargo + depends = openssl + provides = a-fake-one + conflicts = a-fake-one + options = !emptydirs + source = git+https://nowhere/a-fake-one + sha256sums = SKIP + +pkgname = a-fake-one +''' > a-fake-one/.SRCINFO + +# Compress packages folders to .tar.gz archives +tar -czf ${SNAPSHOT_PREFIX}_hg-evolve.tar.gz hg-evolve +tar -czf ${SNAPSHOT_PREFIX}_ibus-git.tar.gz ibus-git +tar -czf ${SNAPSHOT_PREFIX}_libervia-web-hg.tar.gz libervia-web-hg +tar -czf ${SNAPSHOT_PREFIX}_tealdeer-git.tar.gz tealdeer-git +tar -czf ${SNAPSHOT_PREFIX}_a-fake-one.tar.gz a-fake-one + +# Move .tar.gz archives to a servable directory +mv *.tar.gz ../../$BASE_URL + +# Clean up removing tmp_dir +cd ../../ +rm -r tmp_dir/ diff --git a/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_a-fake-one.tar.gz b/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_a-fake-one.tar.gz new file mode 100644 index 0000000..f193cc6 Binary files /dev/null and b/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_a-fake-one.tar.gz differ diff --git a/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_hg-evolve.tar.gz b/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_hg-evolve.tar.gz new file mode 100644 index 0000000..d95aa4e Binary files /dev/null and b/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_hg-evolve.tar.gz differ diff --git a/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_ibus-git.tar.gz b/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_ibus-git.tar.gz new file mode 100644 index 0000000..99cb97b Binary files /dev/null and b/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_ibus-git.tar.gz differ diff --git a/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_libervia-web-hg.tar.gz b/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_libervia-web-hg.tar.gz new file mode 100644 index 0000000..a02d15d Binary files /dev/null and b/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_libervia-web-hg.tar.gz differ diff --git a/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_tealdeer-git.tar.gz b/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_tealdeer-git.tar.gz new file mode 100644 index 0000000..4ff29c4 Binary files /dev/null and b/swh/loader/package/aur/tests/data/https_aur.archlinux.org/cgit_aur.git_snapshot_tealdeer-git.tar.gz differ diff --git a/swh/loader/package/aur/tests/test_aur.py b/swh/loader/package/aur/tests/test_aur.py new file mode 100644 index 0000000..86e69ee --- /dev/null +++ b/swh/loader/package/aur/tests/test_aur.py @@ -0,0 +1,276 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information +from swh.loader.package.aur.loader import AurLoader +from swh.loader.package.utils import EMPTY_AUTHOR +from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats +from swh.model.hashutil import hash_to_bytes +from swh.model.model import ( + ObjectType, + Release, + Snapshot, + SnapshotBranch, + TargetType, + TimestampWithTimezone, +) + +EXPECTED_PACKAGES = [ + { + "url": "https://aur.archlinux.org/hg-evolve.git", + "artifacts": [ + { + "filename": "hg-evolve.tar.gz", + "url": "https://aur.archlinux.org/cgit/aur.git/snapshot/hg-evolve.tar.gz", # noqa: B950 + "version": "10.5.2-1", + } + ], + "aur_metadata": [ + { + "version": "10.5.2-1", + "project_url": "https://www.mercurial-scm.org/doc/evolution/", + "last_update": "2022-07-16T00:08:41+00:00", + "pkgname": "hg-evolve", + } + ], + }, + { + "url": "https://aur.archlinux.org/ibus-git.git", + "artifacts": [ + { + "filename": "ibus-git.tar.gz", + "url": "https://aur.archlinux.org/cgit/aur.git/snapshot/ibus-git.tar.gz", # noqa: B950 + "version": "1.5.23+12+gef4c5c7e-1", + } + ], + "aur_metadata": [ + { + "version": "1.5.23+12+gef4c5c7e-1", + "project_url": "https://github.com/ibus/ibus/wiki", + "last_update": "2021-02-08T06:12:11+00:00", + "pkgname": "ibus-git", + } + ], + }, + { + "url": "https://aur.archlinux.org/libervia-web-hg.git", + "artifacts": [ + { + "filename": "libervia-web-hg.tar.gz", + "url": "https://aur.archlinux.org/cgit/aur.git/snapshot/libervia-web-hg.tar.gz", # noqa: B950 + "version": "0.9.0.r1492.3a34d78f2717-1", + } + ], + "aur_metadata": [ + { + "version": "0.9.0.r1492.3a34d78f2717-1", + "project_url": "http://salut-a-toi.org/", + "last_update": "2022-02-26T15:30:58+00:00", + "pkgname": "libervia-web-hg", + } + ], + }, + { + "url": "https://aur.archlinux.org/tealdeer-git.git", + "artifacts": [ + { + "filename": "tealdeer-git.tar.gz", + "url": "https://aur.archlinux.org/cgit/aur.git/snapshot/tealdeer-git.tar.gz", # noqa: B950 + "version": "r255.30b7c5f-1", + } + ], + "aur_metadata": [ + { + "version": "r255.30b7c5f-1", + "project_url": "https://github.com/dbrgn/tealdeer", + "last_update": "2020-09-04T20:36:52+00:00", + "pkgname": "tealdeer-git", + } + ], + }, + { + "url": "https://aur.archlinux.org/a-fake-one.git", + "artifacts": [ + { + "filename": "a-fake-one.tar.gz", + "url": "https://aur.archlinux.org/cgit/aur.git/snapshot/a-fake-one.tar.gz", # noqa: B950 + "version": "0.0.1", + }, + ], + "aur_metadata": [ + { + "version": "0.0.1", + "project_url": "https://nowhere/a-fake-one", + "last_update": "2022-02-02T22:22:22+00:00", + "pkgname": "a-fake-one", + } + ], + }, +] + + +def test_get_versions(swh_storage): + loader = AurLoader( + swh_storage, + url=EXPECTED_PACKAGES[0]["url"], + artifacts=EXPECTED_PACKAGES[0]["artifacts"], + aur_metadata=EXPECTED_PACKAGES[0]["aur_metadata"], + ) + + assert loader.get_versions() == [ + "10.5.2-1", + ] + + +def test_get_default_version(requests_mock_datadir, swh_storage): + loader = AurLoader( + swh_storage, + url=EXPECTED_PACKAGES[0]["url"], + artifacts=EXPECTED_PACKAGES[0]["artifacts"], + aur_metadata=EXPECTED_PACKAGES[0]["aur_metadata"], + ) + assert loader.get_default_version() == "10.5.2-1" + + +def test_aur_loader_load_one_version(datadir, requests_mock_datadir, swh_storage): + loader = AurLoader( + swh_storage, + url=EXPECTED_PACKAGES[0]["url"], + artifacts=EXPECTED_PACKAGES[0]["artifacts"], + aur_metadata=EXPECTED_PACKAGES[0]["aur_metadata"], + ) + actual_load_status = loader.load() + assert actual_load_status["status"] == "eventful" + assert actual_load_status["snapshot_id"] is not None + + expected_snapshot_id = "fb9ff853036ea48c94f5e5366a9e49d7610d98ed" + expected_release_id = "35ddfe3106bb47f259a9316898de5cab5bf15864" + + assert expected_snapshot_id == actual_load_status["snapshot_id"] + + expected_snapshot = Snapshot( + id=hash_to_bytes(actual_load_status["snapshot_id"]), + branches={ + b"releases/10.5.2-1/hg-evolve.tar.gz": SnapshotBranch( + target=hash_to_bytes(expected_release_id), + target_type=TargetType.RELEASE, + ), + b"HEAD": SnapshotBranch( + target=b"releases/10.5.2-1/hg-evolve.tar.gz", + target_type=TargetType.ALIAS, + ), + }, + ) + + check_snapshot(expected_snapshot, swh_storage) + + stats = get_stats(swh_storage) + assert { + "content": 1, + "directory": 2, + "origin": 1, + "origin_visit": 1, + "release": 1, + "revision": 0, + "skipped_content": 0, + "snapshot": 1, + } == stats + + assert swh_storage.release_get([hash_to_bytes(expected_release_id)])[0] == Release( + name=b"10.5.2-1", + message=b"Synthetic release for Aur source package hg-evolve version " + b"10.5.2-1\n\nFlexible evolution of Mercurial history\n", + target=hash_to_bytes("cc4079be57e7cc0dbf2ecc76c81f5d84782ba632"), + target_type=ObjectType.DIRECTORY, + synthetic=True, + author=EMPTY_AUTHOR, + date=TimestampWithTimezone.from_iso8601("2022-07-16T00:08:41+00:00"), + id=hash_to_bytes(expected_release_id), + ) + + assert_last_visit_matches( + swh_storage, + url=EXPECTED_PACKAGES[0]["url"], + status="full", + type="aur", + snapshot=expected_snapshot.id, + ) + + +def test_aur_loader_load_expected_packages(datadir, requests_mock_datadir, swh_storage): + # Exclude the last 'fake' package from EXPECTED_PACKAGES + for package in EXPECTED_PACKAGES[:-1]: + loader = AurLoader( + swh_storage, + url=package["url"], + artifacts=package["artifacts"], + aur_metadata=package["aur_metadata"], + ) + actual_load_status = loader.load() + assert actual_load_status["status"] == "eventful" + assert actual_load_status["snapshot_id"] is not None + + stats = get_stats(swh_storage) + assert { + "content": 1 + 1 + 1 + 1, + "directory": 2 + 2 + 2 + 2, + "origin": 1 + 1 + 1 + 1, + "origin_visit": 1 + 1 + 1 + 1, + "release": 1 + 1 + 1 + 1, + "revision": 0, + "skipped_content": 0, + "snapshot": 1 + 1 + 1 + 1, + } == stats + + +def test_aur_invalid_origin_not_found(swh_storage, requests_mock_datadir): + url = "http://nowhere/packages/42.git" + loader = AurLoader( + swh_storage, + url, + artifacts=[ + { + "version": "0.0.1", + "url": "https://myforge.nowhere/42/42.tar.gz", + "filename": "42.tar.gz", + }, + ], + aur_metadata=[ + { + "pkgname": "42", + "version": "0.0.1", + "project_url": "https://myforge.nowhere/42", + "last_update": "2022-04-07T21:08:14", + }, + ], + ) + + load_status = loader.load() + assert load_status["status"] == "uneventful" + + +def test_aur_parse_srcinfo(swh_storage, requests_mock_datadir): + """Ensure that multiple lines of `pkgdesc` in .SRCINFO results in `description` + string""" + + loader = AurLoader( + swh_storage, + url=EXPECTED_PACKAGES[-1]["url"], + artifacts=EXPECTED_PACKAGES[-1]["artifacts"], + aur_metadata=EXPECTED_PACKAGES[-1]["aur_metadata"], + ) + loader.load() + + expected_release_id = "2af50761854fee5589b75ff0ecd6886d1185377e" + + assert swh_storage.release_get([hash_to_bytes(expected_release_id)])[0] == Release( + name=b"0.0.1", + message=b"Synthetic release for Aur source package a-fake-one version 0.0.1\n\n" + b"A first line of description.\nA second line for more information.\n", + target=hash_to_bytes("82c770b7d8b1aa573e57b13864831e141d40fe26"), + target_type=ObjectType.DIRECTORY, + synthetic=True, + author=EMPTY_AUTHOR, + date=TimestampWithTimezone.from_iso8601("2022-02-02T22:22:22+00:00"), + id=hash_to_bytes(expected_release_id), + ) diff --git a/swh/loader/package/aur/tests/test_tasks.py b/swh/loader/package/aur/tests/test_tasks.py new file mode 100644 index 0000000..b3ebafa --- /dev/null +++ b/swh/loader/package/aur/tests/test_tasks.py @@ -0,0 +1,38 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +def test_tasks_aur_loader( + mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config +): + mock_load = mocker.patch("swh.loader.package.aur.loader.AurLoader.load") + mock_load.return_value = {"status": "eventful"} + + res = swh_scheduler_celery_app.send_task( + "swh.loader.package.aur.tasks.LoadAur", + kwargs=dict( + url="https://somewhere/some-package.git", + artifacts=[ + { + "filename": "some-package.tar.gz", + "url": "https://somewhere/some-package.tar.gz", + "version": "0.0.1", + } + ], + aur_metadata=[ + { + "version": "0.0.1", + "project_url": "https://somewhere/some-package", + "last_update": "1970-01-01T21:08:14", + "pkgname": "some-package", + } + ], + ), + ) + assert res + res.wait() + assert res.successful() + assert mock_load.called + assert res.result == {"status": "eventful"}