Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123613
D7995.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
20 KB
Subscribers
None
D7995.diff
View Options
diff --git a/docs/package-loader-specifications.rst b/docs/package-loader-specifications.rst
--- a/docs/package-loader-specifications.rst
+++ b/docs/package-loader-specifications.rst
@@ -20,6 +20,15 @@
- author
- date
- Notes
+ * - arch
+ - ``p_info.version``
+ - ``release_name(version, filename)``
+ - =version
+ - Synthetic release for Arch Linux source package {p_info.name} version {p_info.version} {description}
+ - true
+ - from intrinsic metadata
+ - from extra_loader_arguments['artifacts']
+ - Intrinsic metadata extracted from .PKGINFO file of the package
* - archive
- passed as arg
- ``release_name(version)``
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,4 @@
-swh.core >= 0.3
+swh.core >= 2.12
swh.model >= 4.4.0
swh.objstorage >= 0.2.2
swh.scheduler >= 0.4.0
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -55,6 +55,7 @@
[swh.cli.subcommands]
loader=swh.loader.cli
[swh.workers]
+ loader.arch=swh.loader.package.arch:register
loader.archive=swh.loader.package.archive:register
loader.cran=swh.loader.package.cran:register
loader.crates=swh.loader.package.crates:register
diff --git a/swh/loader/package/arch/__init__.py b/swh/loader/package/arch/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/arch/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+from typing import Any, Mapping
+
+
+def register() -> Mapping[str, Any]:
+ """Register the current worker module's definition"""
+ from .loader import ArchLoader
+
+ return {
+ "task_modules": [f"{__name__}.tasks"],
+ "loader": ArchLoader,
+ }
diff --git a/swh/loader/package/arch/loader.py b/swh/loader/package/arch/loader.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/arch/loader.py
@@ -0,0 +1,136 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+from distutils.version import LooseVersion
+from pathlib import Path
+import re
+from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple
+
+import attr
+
+from swh.loader.package.loader import BasePackageInfo, PackageLoader
+from swh.loader.package.utils import release_name
+from swh.model.model import ObjectType, Person, Release, Sha1Git, TimestampWithTimezone
+from swh.storage.interface import StorageInterface
+
+
+@attr.s
+class ArchPackageInfo(BasePackageInfo):
+
+ name = attr.ib(type=str)
+ """Name of the package"""
+
+ version = attr.ib(type=str)
+ """Current version"""
+
+ last_modified = attr.ib(type=str)
+ """File last modified date as release date"""
+
+
+def extract_intrinsic_metadata(dir_path: Path) -> Dict[str, Any]:
+ """Extract intrinsic metadata from .PKGINFO file at dir_path.
+
+ Each Arch linux package has a .PKGINFO file at the root of the archive.
+
+ Args:
+ dir_path: A directory on disk where a package has been extracted
+
+ Returns:
+ A dict mapping
+ """
+ pkginfo_path = Path(dir_path, ".PKGINFO")
+ rex = re.compile(r"^(\w+)\s=\s(.*)$", re.M)
+ with pkginfo_path.open("rb") as content:
+ parsed = rex.findall(content.read().decode())
+ data = {entry[0].lower(): entry[1] for entry in parsed}
+ if "url" in data.keys():
+ data["project_url"] = data["url"]
+ return data
+
+
+class ArchLoader(PackageLoader[ArchPackageInfo]):
+ visit_type = "arch"
+
+ def __init__(
+ self,
+ storage: StorageInterface,
+ url: str,
+ artifacts: List[Dict[str, Any]],
+ **kwargs,
+ ):
+
+ super().__init__(storage=storage, url=url, **kwargs)
+ self.url = url
+ self.artifacts: Dict[str, Dict] = {
+ artifact["version"]: artifact for artifact in artifacts
+ }
+
+ def get_versions(self) -> Sequence[str]:
+ """Get all released versions of an Arch Linux package
+
+ Returns:
+ A sequence of versions
+
+ Example::
+
+ ["0.1.1", "0.10.2"]
+ """
+ versions = list(self.artifacts.keys())
+ versions.sort(key=LooseVersion)
+ return versions
+
+ def get_default_version(self) -> str:
+ """Get the newest release version of an Arch Linux package
+
+ Returns:
+ A string representing a version
+
+ Example::
+
+ "0.1.2"
+ """
+ return self.get_versions()[-1]
+
+ def get_package_info(self, version: str) -> Iterator[Tuple[str, ArchPackageInfo]]:
+ """Get release name and package information from version
+
+ Args:
+ version: arch version (e.g: "0.1.0")
+
+ Returns:
+ Iterator of tuple (release_name, p_info)
+ """
+ artifact = self.artifacts[version]
+ assert version == artifact["version"]
+
+ p_info = ArchPackageInfo(
+ name=artifact["name"],
+ filename=artifact["filename"],
+ url=artifact["url"],
+ version=version,
+ last_modified=artifact["last_modified"],
+ )
+ yield release_name(version, artifact["filename"]), p_info
+
+ def build_release(
+ self, p_info: ArchPackageInfo, uncompressed_path: str, directory: Sha1Git
+ ) -> Optional[Release]:
+ intrinsic_metadata = extract_intrinsic_metadata(Path(uncompressed_path))
+ author = Person.from_fullname(intrinsic_metadata["packager"].encode())
+ description = intrinsic_metadata["pkgdesc"]
+
+ message = (
+ f"Synthetic release for Arch Linux source package {p_info.name} "
+ f"version {p_info.version}\n\n"
+ f"{description}\n"
+ )
+ return Release(
+ name=p_info.version.encode(),
+ author=author,
+ date=TimestampWithTimezone.from_iso8601(p_info.last_modified),
+ message=message.encode(),
+ target_type=ObjectType.DIRECTORY,
+ target=directory,
+ synthetic=True,
+ )
diff --git a/swh/loader/package/arch/tasks.py b/swh/loader/package/arch/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/arch/tasks.py
@@ -0,0 +1,14 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from celery import shared_task
+
+from swh.loader.package.arch.loader import ArchLoader
+
+
+@shared_task(name=__name__ + ".LoadArch")
+def load_arch(*, url=None, artifacts: list):
+ """Load Arch Linux packages"""
+ return ArchLoader.from_configfile(url=url, artifacts=artifacts).load()
diff --git a/swh/loader/package/arch/tests/__init__.py b/swh/loader/package/arch/tests/__init__.py
new file mode 100644
diff --git a/swh/loader/package/arch/tests/data/fake_arch.sh b/swh/loader/package/arch/tests/data/fake_arch.sh
new file mode 100755
--- /dev/null
+++ b/swh/loader/package/arch/tests/data/fake_arch.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+
+# Script to generate fake Arch Linux packages files and fake http response.
+
+set -euo pipefail
+
+# Please note that you need to install Zstandard compression tool (zstd) to compress
+# to .zst archive and Xz utils (xz) to compress to .xz archive.
+command -v zstd || echo "you should install 'zstd' to run this script"
+command -v xz || echo "you should install 'xz' to run this script"
+
+# files and directories
+mkdir https_archive.archlinux.org
+mkdir https_uk.mirror.archlinuxarm.org
+mkdir -p tmp_dir/arch/
+
+cd tmp_dir/arch/
+
+mkdir 'packages_d_dialog_dialog-1:1.3_20190211-1-x86_64'
+mkdir 'packages_d_dialog_dialog-1:1.3_20220414-1-x86_64'
+
+echo -e '''pkgname = dialog
+pkgbase = dialog
+pkgver = 1:1.3_20190211-1
+pkgdesc = A tool to display dialog boxes from shell scripts
+url = https://invisible-island.net/dialog/
+builddate = 1550046926
+packager = Evangelos Foutras <evangelos@foutrelis.com>
+size = 455680
+arch = x86_64
+license = LGPL2.1
+provides = libdialog.so=15-64
+depend = sh
+''' > packages_d_dialog_dialog-1:1.3_20190211-1-x86_64/.PKGINFO
+
+echo -e '''pkgname = dialog
+pkgbase = dialog
+pkgver = 1:1.3_20220414-1
+pkgdesc = A tool to display dialog boxes from shell scripts
+url = https://invisible-island.net/dialog/
+builddate = 1650081535
+packager = Evangelos Foutras <foutrelis@archlinux.org>
+size = 483988
+arch = x86_64
+license = LGPL2.1
+provides = libdialog.so=15-64
+depend = sh
+depend = ncurses
+''' > packages_d_dialog_dialog-1:1.3_20220414-1-x86_64/.PKGINFO
+
+# Compress packages folders to .tar.gz archives
+
+tar --force-local -acf 'packages_d_dialog_dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz' -C 'packages_d_dialog_dialog-1:1.3_20190211-1-x86_64' .
+tar --force-local -acf 'packages_d_dialog_dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst' -C 'packages_d_dialog_dialog-1:1.3_20220414-1-x86_64' .
+
+mv *.xz ../../https_archive.archlinux.org
+mv *.zst ../../https_archive.archlinux.org
+
+# uk.mirror.archlinuxarm.org
+mkdir 'aarch64_core_gzip-1.12-1-aarch64'
+
+echo -e '''# Generated by makepkg 6.0.1
+# using fakeroot version 1.28
+pkgname = gzip
+pkgbase = gzip
+pkgver = 1.12-1
+pkgdesc = GNU compression utility
+url = https://www.gnu.org/software/gzip/
+builddate = 1649365694
+packager = Arch Linux ARM Build System <builder+seattle@archlinuxarm.org>
+size = 162688
+arch = aarch64
+license = GPL3
+group = base-devel
+depend = glibc
+depend = bash
+depend = less
+''' > aarch64_core_gzip-1.12-1-aarch64/.PKGINFO
+
+tar --force-local -acf 'aarch64_core_gzip-1.12-1-aarch64.pkg.tar.xz' -C 'aarch64_core_gzip-1.12-1-aarch64' .
+
+mv *.xz ../../https_uk.mirror.archlinuxarm.org
+
+# Clean up removing tmp_dir
+cd ../../
+rm -r tmp_dir/
diff --git a/swh/loader/package/arch/tests/data/https_archive.archlinux.org/packages_d_dialog_dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz b/swh/loader/package/arch/tests/data/https_archive.archlinux.org/packages_d_dialog_dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/package/arch/tests/data/https_archive.archlinux.org/packages_d_dialog_dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst b/swh/loader/package/arch/tests/data/https_archive.archlinux.org/packages_d_dialog_dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/package/arch/tests/data/https_uk.mirror.archlinuxarm.org/aarch64_core_gzip-1.12-1-aarch64.pkg.tar.xz b/swh/loader/package/arch/tests/data/https_uk.mirror.archlinuxarm.org/aarch64_core_gzip-1.12-1-aarch64.pkg.tar.xz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/package/arch/tests/test_arch.py b/swh/loader/package/arch/tests/test_arch.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/arch/tests/test_arch.py
@@ -0,0 +1,231 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+import pytest
+
+from swh.loader.package.arch.loader import ArchLoader
+from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
+from swh.model.hashutil import hash_to_bytes
+from swh.model.model import (
+ ObjectType,
+ Person,
+ Release,
+ Snapshot,
+ SnapshotBranch,
+ TargetType,
+ TimestampWithTimezone,
+)
+
+EXPECTED_PACKAGES = [
+ {
+ "url": "https://archive.archlinux.org/packages/d/dialog/",
+ "artifacts": [
+ {
+ "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz", # noqa: B950
+ "arch": "x86_64",
+ "repo": "core",
+ "name": "dialog",
+ "version": "1:1.3_20190211-1",
+ "length": 180000,
+ "filename": "dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz",
+ "last_modified": "2019-02-13T08:36:00",
+ },
+ {
+ "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst", # noqa: B950
+ "arch": "x86_64",
+ "repo": "core",
+ "name": "dialog",
+ "version": "1:1.3_20220414-1",
+ "length": 198000,
+ "filename": "dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst",
+ "last_modified": "2022-04-16T03:59:00",
+ },
+ ],
+ },
+ {
+ "url": "https://archlinuxarm.org/packages/aarch64/gzip",
+ "artifacts": [
+ {
+ "url": "https://uk.mirror.archlinuxarm.org/aarch64/core/gzip-1.12-1-aarch64.pkg.tar.xz", # noqa: B950
+ "arch": "aarch64",
+ "name": "gzip",
+ "repo": "core",
+ "length": 79640,
+ "version": "1.12-1",
+ "filename": "gzip-1.12-1-aarch64.pkg.tar.xz",
+ "last_modified": "2022-04-07T21:08:14",
+ }
+ ],
+ },
+]
+
+
+def test_get_versions(swh_storage):
+ loader = ArchLoader(
+ swh_storage,
+ url=EXPECTED_PACKAGES[0]["url"],
+ artifacts=EXPECTED_PACKAGES[0]["artifacts"],
+ )
+
+ assert loader.get_versions() == [
+ "1:1.3_20190211-1",
+ "1:1.3_20220414-1",
+ ]
+
+
+def test_get_default_version(requests_mock_datadir, swh_storage):
+ loader = ArchLoader(
+ swh_storage,
+ url=EXPECTED_PACKAGES[0]["url"],
+ artifacts=EXPECTED_PACKAGES[0]["artifacts"],
+ )
+ assert loader.get_default_version() == "1:1.3_20220414-1"
+
+
+def test_arch_loader_load_one_version(datadir, requests_mock_datadir, swh_storage):
+ loader = ArchLoader(
+ swh_storage,
+ url=EXPECTED_PACKAGES[1]["url"],
+ artifacts=EXPECTED_PACKAGES[1]["artifacts"],
+ )
+ actual_load_status = loader.load()
+ assert actual_load_status["status"] == "eventful"
+ assert actual_load_status["snapshot_id"] is not None
+
+ expected_snapshot_id = "4020d0a278027550e336b5481a4159a913c91aa4"
+ expected_release_id = "7681098c9e381f9cc8bd1724d57eeee2182982dc"
+
+ assert expected_snapshot_id == actual_load_status["snapshot_id"]
+
+ expected_snapshot = Snapshot(
+ id=hash_to_bytes(actual_load_status["snapshot_id"]),
+ branches={
+ b"releases/1.12-1/gzip-1.12-1-aarch64.pkg.tar.xz": SnapshotBranch(
+ target=hash_to_bytes(expected_release_id),
+ target_type=TargetType.RELEASE,
+ ),
+ b"HEAD": SnapshotBranch(
+ target=b"releases/1.12-1/gzip-1.12-1-aarch64.pkg.tar.xz",
+ target_type=TargetType.ALIAS,
+ ),
+ },
+ )
+ check_snapshot(expected_snapshot, swh_storage)
+
+ stats = get_stats(swh_storage)
+ assert {
+ "content": 1,
+ "directory": 1,
+ "origin": 1,
+ "origin_visit": 1,
+ "release": 1,
+ "revision": 0,
+ "skipped_content": 0,
+ "snapshot": 1,
+ } == stats
+
+ assert swh_storage.release_get([hash_to_bytes(expected_release_id)])[0] == Release(
+ name=b"1.12-1",
+ message=b"Synthetic release for Arch Linux source package gzip version "
+ b"1.12-1\n\nGNU compression utility\n",
+ target=hash_to_bytes("bd742aaf422953a1f7a5e084ec4a7477491d63fb"),
+ target_type=ObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person.from_fullname(
+ b"Arch Linux ARM Build System <builder+seattle@archlinuxarm.org>"
+ ),
+ date=TimestampWithTimezone.from_iso8601("2022-04-07T21:08:14+00:00"),
+ id=hash_to_bytes(expected_release_id),
+ )
+
+ assert_last_visit_matches(
+ swh_storage,
+ url=EXPECTED_PACKAGES[1]["url"],
+ status="full",
+ type="arch",
+ snapshot=expected_snapshot.id,
+ )
+
+
+def test_arch_loader_load_n_versions(datadir, requests_mock_datadir, swh_storage):
+
+ loader = ArchLoader(
+ swh_storage,
+ url=EXPECTED_PACKAGES[0]["url"],
+ artifacts=EXPECTED_PACKAGES[0]["artifacts"],
+ )
+ actual_load_status = loader.load()
+ assert actual_load_status["status"] == "eventful"
+ assert actual_load_status["snapshot_id"] is not None
+
+ expected_snapshot_id = "832139d69a91edffcc3a96cca11deaf9255041c3"
+
+ assert expected_snapshot_id == actual_load_status["snapshot_id"]
+
+ expected_snapshot = Snapshot(
+ id=hash_to_bytes(actual_load_status["snapshot_id"]),
+ branches={
+ b"releases/1:1.3_20190211-1/"
+ b"dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz": SnapshotBranch(
+ target=hash_to_bytes("37efb727ff8bb8fbf92518aa8fe5fff2ad427d06"),
+ target_type=TargetType.RELEASE,
+ ),
+ b"releases/1:1.3_20220414-1/"
+ b"dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst": SnapshotBranch(
+ target=hash_to_bytes("020d3f5627df7474f257fd04f1ede4415296e265"),
+ target_type=TargetType.RELEASE,
+ ),
+ b"HEAD": SnapshotBranch(
+ target=b"releases/1:1.3_20220414-1/dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst",
+ target_type=TargetType.ALIAS,
+ ),
+ },
+ )
+
+ check_snapshot(expected_snapshot, swh_storage)
+
+ stats = get_stats(swh_storage)
+ assert {
+ "content": 2,
+ "directory": 2,
+ "origin": 1,
+ "origin_visit": 1,
+ "release": 2,
+ "revision": 0,
+ "skipped_content": 0,
+ "snapshot": 1,
+ } == stats
+
+ assert_last_visit_matches(
+ swh_storage,
+ url=EXPECTED_PACKAGES[0]["url"],
+ status="full",
+ type="arch",
+ snapshot=expected_snapshot.id,
+ )
+
+
+def test_arch_invalid_origin_archive_not_found(swh_storage, requests_mock_datadir):
+ url = "https://nowhere/packages/42"
+ loader = ArchLoader(
+ swh_storage,
+ url,
+ artifacts=[
+ {
+ "filename": "42-0.0.1.pkg.xz",
+ "url": "https://mirror2.nowhere/pkg/42-0.0.1.pkg.xz",
+ "version": "0.0.1",
+ "arch": "aarch64",
+ "name": "42",
+ "repo": "community",
+ "length": 42,
+ "last_modified": "2022-04-07T21:08:14",
+ },
+ ],
+ )
+ with pytest.raises(Exception):
+ assert loader.load() == {"status": "failed"}
+ assert_last_visit_matches(
+ swh_storage, url, status="not_found", type="arch", snapshot=None
+ )
diff --git a/swh/loader/package/arch/tests/test_tasks.py b/swh/loader/package/arch/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/arch/tests/test_tasks.py
@@ -0,0 +1,35 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def test_tasks_arch_loader(
+ mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
+):
+ mock_load = mocker.patch("swh.loader.package.arch.loader.ArchLoader.load")
+ mock_load.return_value = {"status": "eventful"}
+
+ res = swh_scheduler_celery_app.send_task(
+ "swh.loader.package.arch.tasks.LoadArch",
+ kwargs=dict(
+ url="some-url/packages/s/some-package",
+ artifacts=[
+ {
+ "version": "0.0.1",
+ "url": "https://somewhere/some-package-0.0.1.pkg.xz",
+ "filename": "some-package-0.0.1.pkg.xz",
+ "arch": "aarch64",
+ "name": "some-package",
+ "repo": "community",
+ "length": 42,
+ "last_modified": "1970-01-01T21:08:14",
+ }
+ ],
+ ),
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+ assert mock_load.called
+ assert res.result == {"status": "eventful"}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Dec 19, 1:40 PM (16 h, 54 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220090
Attached To
D7995: Arch Linux loader
Event Timeline
Log In to Comment