Page MenuHomeSoftware Heritage

D8575.diff
No OneTemporary

D8575.diff

diff --git a/docs/package-loader-specifications.rst b/docs/package-loader-specifications.rst
--- a/docs/package-loader-specifications.rst
+++ b/docs/package-loader-specifications.rst
@@ -149,6 +149,15 @@
- from extrinsic metadata
- from extrinsic metadata
- name, version and description from intrinsic metadata
+ * - puppet
+ - ``p_info.​version``
+ - ``release_name(​version)``
+ - =version
+ - Synthetic release for Puppet source package {p_info.name} version {version} {description}
+ - true
+ - from intrinsic metadata
+ - from extrinsic metadata
+ - version and description from intrinsic metadata
* - pypi
- ``metadata​["version"]``
- ``release_name(​version)`` or ``release_name(​version, filename)``
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -68,6 +68,7 @@
loader.npm=swh.loader.package.npm:register
loader.opam=swh.loader.package.opam:register
loader.pubdev=swh.loader.package.pubdev:register
+ loader.puppet=swh.loader.package.puppet:register
loader.pypi=swh.loader.package.pypi:register
loader.maven=swh.loader.package.maven:register
""",
diff --git a/swh/loader/package/puppet/__init__.py b/swh/loader/package/puppet/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/puppet/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+from typing import Any, Mapping
+
+
+def register() -> Mapping[str, Any]:
+ """Register the current worker module's definition"""
+ from .loader import PuppetLoader
+
+ return {
+ "task_modules": [f"{__name__}.tasks"],
+ "loader": PuppetLoader,
+ }
diff --git a/swh/loader/package/puppet/loader.py b/swh/loader/package/puppet/loader.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/puppet/loader.py
@@ -0,0 +1,152 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from datetime import datetime
+import json
+from pathlib import Path
+from typing import Any, Dict, Iterator, Optional, Sequence, Tuple
+
+import attr
+import iso8601
+from packaging.version import parse as parse_version
+
+from swh.loader.package.loader import BasePackageInfo, PackageLoader
+from swh.loader.package.utils import Person, release_name
+from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone
+from swh.storage.interface import StorageInterface
+
+
+@attr.s
+class PuppetPackageInfo(BasePackageInfo):
+
+ name = attr.ib(type=str)
+ """Name of the package"""
+
+ filename = attr.ib(type=str)
+ """Archive (tar.gz) file name"""
+
+ version = attr.ib(type=str)
+ """Current version"""
+
+ last_modified = attr.ib(type=datetime)
+ """Module last update date as release date"""
+
+
+def extract_intrinsic_metadata(dir_path: Path) -> Dict[str, Any]:
+ """Extract intrinsic metadata from metadata.json file at dir_path.
+
+ Each Puppet module version has a metadata.json file at the root of the archive.
+
+ See ``https://puppet.com/docs/puppet/7/modules_metadata.html`` for metadata specifications.
+
+ Args:
+ dir_path: A directory on disk where a metadata.json file must be present
+
+ Returns:
+ A dict mapping from json parser
+ """
+ meta_json_path = dir_path / "metadata.json"
+ metadata: Dict[str, Any] = json.loads(meta_json_path.read_text())
+ return metadata
+
+
+class PuppetLoader(PackageLoader[PuppetPackageInfo]):
+ visit_type = "puppet"
+
+ def __init__(
+ self,
+ storage: StorageInterface,
+ url: str,
+ artifacts: Dict[str, Any],
+ **kwargs,
+ ):
+
+ super().__init__(storage=storage, url=url, **kwargs)
+ self.url = url
+ self.artifacts = artifacts
+
+ def get_versions(self) -> Sequence[str]:
+ """Get all released versions of a Puppet module
+
+ Returns:
+ A sequence of versions
+
+ Example::
+
+ ["0.1.1", "0.10.2"]
+ """
+ versions = list(self.artifacts.keys())
+ versions.sort(key=parse_version)
+ return versions
+
+ def get_default_version(self) -> str:
+ """Get the newest release version of a Puppet module
+
+ Returns:
+ A string representing a version
+
+ Example::
+
+ "0.10.2"
+ """
+ return self.get_versions()[-1]
+
+ def get_package_info(self, version: str) -> Iterator[Tuple[str, PuppetPackageInfo]]:
+ """Get release name and package information from version
+
+ Args:
+ version: Package version (e.g: "0.1.0")
+
+ Returns:
+ Iterator of tuple (release_name, p_info)
+ """
+ data = self.artifacts[version]
+ assert data["filename"].endswith(f"-{version}.tar.gz")
+ pkgname: str = data["filename"].split(f"-{version}.tar.gz")[0]
+ url: str = data["url"]
+ filename: str = data["filename"]
+ last_modified: datetime = iso8601.parse_date(data["last_update"])
+
+ p_info = PuppetPackageInfo(
+ name=pkgname,
+ filename=filename,
+ url=url,
+ version=version,
+ last_modified=last_modified,
+ )
+ yield release_name(version), p_info
+
+ def build_release(
+ self, p_info: PuppetPackageInfo, uncompressed_path: str, directory: Sha1Git
+ ) -> Optional[Release]:
+ # compute extracted module directory name
+ dirname = p_info.filename.split(".tar.gz")[0]
+
+ # Extract intrinsic metadata from uncompressed_path/{dirname}/metadata.json
+ intrinsic_metadata = extract_intrinsic_metadata(
+ Path(uncompressed_path) / f"{dirname}"
+ )
+
+ version: str = intrinsic_metadata["version"]
+ assert version == p_info.version
+
+ description = intrinsic_metadata["summary"]
+ author = Person.from_fullname(intrinsic_metadata["author"].encode())
+
+ message = (
+ f"Synthetic release for Puppet source package {p_info.name} "
+ f"version {version}\n\n"
+ f"{description}\n"
+ )
+
+ return Release(
+ name=version.encode(),
+ author=author,
+ date=TimestampWithTimezone.from_datetime(p_info.last_modified),
+ message=message.encode(),
+ target_type=ObjectType.DIRECTORY,
+ target=directory,
+ synthetic=True,
+ )
diff --git a/swh/loader/package/puppet/tasks.py b/swh/loader/package/puppet/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/puppet/tasks.py
@@ -0,0 +1,14 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from celery import shared_task
+
+from swh.loader.package.puppet.loader import PuppetLoader
+
+
+@shared_task(name=__name__ + ".LoadPuppet")
+def load_puppet(**kwargs):
+ """Load Puppet modules from puppet.com"""
+ return PuppetLoader.from_configfile(**kwargs).load()
diff --git a/swh/loader/package/puppet/tests/__init__.py b/swh/loader/package/puppet/tests/__init__.py
new file mode 100644
diff --git a/swh/loader/package/puppet/tests/data/fake_puppet.sh b/swh/loader/package/puppet/tests/data/fake_puppet.sh
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/puppet/tests/data/fake_puppet.sh
@@ -0,0 +1,136 @@
+#!/usr/bin/env bash
+
+# Script to generate fake Puppet module archives as .tar.gz.
+
+set -euo pipefail
+
+# Create directories
+readonly TMP=tmp_dir/puppet
+readonly BASE_PATH=https_forgeapi.puppet.com
+
+mkdir -p $TMP
+
+# tar.gz package archives
+# Puppet module tar.gz archive needs at least one directory with a metadata.json file
+mkdir -p ${TMP}/saz-memcached-1.0.0
+mkdir -p ${TMP}/saz-memcached-8.1.0
+mkdir -p $BASE_PATH
+
+echo -e '''{
+ "summary": "UNKNOWN",
+ "author": "saz",
+ "source": "UNKNOWN",
+ "dependencies": [
+
+ ],
+ "types": [
+
+ ],
+ "license": "Apache License, Version 2.0",
+ "project_page": "https://github.com/saz/puppet-memcached",
+ "version": "1.0.0",
+ "name": "saz-memcached",
+ "checksums": {
+ "spec/spec_helper.rb": "ca19ec4f451ebc7fdb035b52eae6e909",
+ "manifests/params.pp": "0b8904086e7fa6f0d1f667d547a17d96",
+ "README.md": "fa0b9f6d97f2763e565d8a330fb3930b",
+ "manifests/config.pp": "706f7c5001fb6014575909a335a52def",
+ "templates/memcached.conf.erb": "8151e00d922bb9ebb1a24a05ac0969d7",
+ "manifests/service.pp": "a528751401189c299a38cab12d52431f",
+ "tests/init.pp": "e798f4999ba392f3c0fce0d5290c263f",
+ "manifests/install.pp": "11a9e9a99a7bc1c7b2511ce7e79c9fb4",
+ "spec/spec.opts": "a600ded995d948e393fbe2320ba8e51c",
+ "metadata.json": "d34d0b70aba36510fbc2df4e667479ef",
+ "manifests/init.pp": "c5166a8a88b544ded705efac21494bc1",
+ "Modulefile": "7f512991a7d2ad99ffb28ac6e7419f9e"
+ },
+ "description": "Manage memcached via Puppet"
+}
+''' > ${TMP}/saz-memcached-1.0.0/metadata.json
+
+echo -e '''{
+ "name": "saz-memcached",
+ "version": "8.1.0",
+ "author": "saz",
+ "summary": "Manage memcached via Puppet",
+ "license": "Apache-2.0",
+ "source": "git://github.com/saz/puppet-memcached.git",
+ "project_page": "https://github.com/saz/puppet-memcached",
+ "issues_url": "https://github.com/saz/puppet-memcached/issues",
+ "description": "Manage memcached via Puppet",
+ "requirements": [
+ {"name":"puppet","version_requirement":">= 6.1.0 < 8.0.0" }
+ ],
+ "dependencies": [
+ {"name":"puppetlabs/stdlib","version_requirement":">= 4.13.1 < 9.0.0"},
+ {"name":"puppetlabs/firewall","version_requirement":">= 0.1.0 < 4.0.0"},
+ {"name":"puppet/systemd","version_requirement":">= 2.10.0 < 4.0.0"},
+ {"name":"puppet/selinux","version_requirement":">= 3.2.0 < 4.0.0"}
+ ],
+ "operatingsystem_support": [
+ {
+ "operatingsystem": "RedHat",
+ "operatingsystemrelease": [
+ "7",
+ "8",
+ "9"
+ ]
+ },
+ {
+ "operatingsystem": "CentOS",
+ "operatingsystemrelease": [
+ "7",
+ "8",
+ "9"
+ ]
+ },
+ {
+ "operatingsystem": "OracleLinux",
+ "operatingsystemrelease": [
+ "7"
+ ]
+ },
+ {
+ "operatingsystem": "Scientific",
+ "operatingsystemrelease": [
+ "7"
+ ]
+ },
+ {
+ "operatingsystem": "Debian",
+ "operatingsystemrelease": [
+ "9",
+ "10",
+ "11"
+ ]
+ },
+ {
+ "operatingsystem": "Ubuntu",
+ "operatingsystemrelease": [
+ "18.04",
+ "20.04",
+ "22.04"
+ ]
+ },
+ {
+ "operatingsystem": "Windows"
+ },
+ {
+ "operatingsystem": "FreeBSD"
+ }
+ ]
+}
+''' > ${TMP}/saz-memcached-8.1.0/metadata.json
+
+cd $TMP
+
+# Tar compress
+tar -czf v3_files_saz-memcached-1.0.0.tar.gz saz-memcached-1.0.0
+tar -czf v3_files_saz-memcached-8.1.0.tar.gz saz-memcached-8.1.0
+
+# Move .tar.gz archives to a servable directory
+mv *.tar.gz ../../$BASE_PATH
+
+# Clean up removing tmp_dir
+cd ../../
+rm -r tmp_dir/
diff --git a/swh/loader/package/puppet/tests/data/https_forgeapi.puppet.com/v3_files_saz-memcached-1.0.0.tar.gz b/swh/loader/package/puppet/tests/data/https_forgeapi.puppet.com/v3_files_saz-memcached-1.0.0.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/package/puppet/tests/data/https_forgeapi.puppet.com/v3_files_saz-memcached-8.1.0.tar.gz b/swh/loader/package/puppet/tests/data/https_forgeapi.puppet.com/v3_files_saz-memcached-8.1.0.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/package/puppet/tests/test_puppet.py b/swh/loader/package/puppet/tests/test_puppet.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/puppet/tests/test_puppet.py
@@ -0,0 +1,118 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.loader.package.puppet.loader import PuppetLoader
+from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
+from swh.model.hashutil import hash_to_bytes
+from swh.model.model import (
+ ObjectType,
+ Person,
+ Release,
+ Snapshot,
+ SnapshotBranch,
+ TargetType,
+ TimestampWithTimezone,
+)
+
+ORIGINS = {
+ "url": "https://forge.puppet.com/modules/saz/memcached",
+ "artifacts": {
+ "1.0.0": {
+ "url": "https://forgeapi.puppet.com/v3/files/saz-memcached-1.0.0.tar.gz", # noqa: B950
+ "version": "1.0.0",
+ "filename": "saz-memcached-1.0.0.tar.gz",
+ "last_update": "2011-11-20T13:40:30-08:00",
+ },
+ "8.1.0": {
+ "url": "https://forgeapi.puppet.com/v3/files/saz-memcached-8.1.0.tar.gz", # noqa: B950
+ "version": "8.1.0",
+ "filename": "saz-memcached-8.1.0.tar.gz",
+ "last_update": "2022-07-11T03:34:55-07:00",
+ },
+ },
+}
+
+
+def test_get_versions(requests_mock_datadir, swh_storage):
+ loader = PuppetLoader(
+ swh_storage, url=ORIGINS["url"], artifacts=ORIGINS["artifacts"]
+ )
+ assert loader.get_versions() == ["1.0.0", "8.1.0"]
+
+
+def test_get_default_version(requests_mock_datadir, swh_storage):
+ loader = PuppetLoader(
+ swh_storage, url=ORIGINS["url"], artifacts=ORIGINS["artifacts"]
+ )
+ assert loader.get_default_version() == "8.1.0"
+
+
+def test_puppet_loader_load_multiple_version(
+ datadir, requests_mock_datadir, swh_storage
+):
+ loader = PuppetLoader(
+ swh_storage, url=ORIGINS["url"], artifacts=ORIGINS["artifacts"]
+ )
+ load_status = loader.load()
+ assert load_status["status"] == "eventful"
+ assert load_status["snapshot_id"] is not None
+
+ expected_snapshot_id = "9a8e76a8a6eae5285059d9f6d5083a99317727cf"
+
+ assert expected_snapshot_id == load_status["snapshot_id"]
+
+ expected_snapshot = Snapshot(
+ id=hash_to_bytes(load_status["snapshot_id"]),
+ branches={
+ b"HEAD": SnapshotBranch(
+ target=b"releases/8.1.0",
+ target_type=TargetType.ALIAS,
+ ),
+ b"releases/1.0.0": SnapshotBranch(
+ target=hash_to_bytes("50eb560bb5322cd149359b9cc8debc78834bcfad"),
+ target_type=TargetType.RELEASE,
+ ),
+ b"releases/8.1.0": SnapshotBranch(
+ target=hash_to_bytes("2f5722136d775dd48fe85fabdd274f1e2d7fcf22"),
+ target_type=TargetType.RELEASE,
+ ),
+ },
+ )
+
+ check_snapshot(expected_snapshot, swh_storage)
+
+ stats = get_stats(swh_storage)
+ assert {
+ "content": 1 + 1,
+ "directory": 2 + 2,
+ "origin": 1,
+ "origin_visit": 1,
+ "release": 1 + 1,
+ "revision": 0,
+ "skipped_content": 0,
+ "snapshot": 1,
+ } == stats
+
+ assert swh_storage.release_get(
+ [hash_to_bytes("2f5722136d775dd48fe85fabdd274f1e2d7fcf22")]
+ )[0] == Release(
+ name=b"8.1.0",
+ message=b"Synthetic release for Puppet source package saz-memcached version 8.1.0\n\n"
+ b"Manage memcached via Puppet\n",
+ target=hash_to_bytes("1b9a2dbc80f954e1ba4b2f1c6344d1ce4e84ab7c"),
+ target_type=ObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person(fullname=b"saz", name=b"saz", email=None),
+ date=TimestampWithTimezone.from_iso8601("2022-07-11T03:34:55-07:00"),
+ id=hash_to_bytes("2f5722136d775dd48fe85fabdd274f1e2d7fcf22"),
+ )
+
+ assert_last_visit_matches(
+ swh_storage,
+ url=ORIGINS["url"],
+ status="full",
+ type="puppet",
+ snapshot=expected_snapshot.id,
+ )
diff --git a/swh/loader/package/puppet/tests/test_tasks.py b/swh/loader/package/puppet/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/puppet/tests/test_tasks.py
@@ -0,0 +1,31 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def test_tasks_puppet_loader(
+ mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
+):
+ mock_load = mocker.patch("swh.loader.package.puppet.loader.PuppetLoader.load")
+ mock_load.return_value = {"status": "eventful"}
+
+ res = swh_scheduler_celery_app.send_task(
+ "swh.loader.package.puppet.tasks.LoadPuppet",
+ kwargs=dict(
+ url="some-url/api/packages/some-package",
+ artifacts={
+ "1.0.0": {
+ "url": "https://domain/some-package-1.0.0.tar.gz",
+ "version": "1.0.0",
+ "filename": "some-module-1.0.0.tar.gz",
+ "last_update": "2011-11-20T13:40:30-08:00",
+ },
+ },
+ ),
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+ assert mock_load.called
+ assert res.result == {"status": "eventful"}

File Metadata

Mime Type
text/plain
Expires
Dec 19 2024, 3:34 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226200

Event Timeline