Page MenuHomeSoftware Heritage

D8569.diff
No OneTemporary

D8569.diff

diff --git a/docs/package-loader-specifications.rst b/docs/package-loader-specifications.rst
--- a/docs/package-loader-specifications.rst
+++ b/docs/package-loader-specifications.rst
@@ -176,6 +176,15 @@
- from int metadata or ""
- from ext metadata or None
- metadata is intrinsic
+ * - rubygems
+ - ``p_info.version``
+ - ``release_name(​version)``
+ - =version
+ - Synthetic release for RubyGems source package {p_info.name} version {p_info.version}
+ - true
+ - from ext metadata
+ - from ext metadata
+ - The source code is extracted from a tarball nested within the gem file
using this function::
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -74,6 +74,7 @@
loader.puppet=swh.loader.package.puppet:register
loader.pypi=swh.loader.package.pypi:register
loader.maven=swh.loader.package.maven:register
+ loader.rubygems=swh.loader.package.rubygems:register
""",
classifiers=[
"Programming Language :: Python :: 3",
diff --git a/swh/loader/package/rubygems/__init__.py b/swh/loader/package/rubygems/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/rubygems/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+from typing import Any, Mapping
+
+
+def register() -> Mapping[str, Any]:
+ """Register the current worker module's definition"""
+ from .loader import RubyGemsLoader
+
+ return {
+ "task_modules": [f"{__name__}.tasks"],
+ "loader": RubyGemsLoader,
+ }
diff --git a/swh/loader/package/rubygems/loader.py b/swh/loader/package/rubygems/loader.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/rubygems/loader.py
@@ -0,0 +1,135 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import json
+import logging
+import os
+from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple
+
+import attr
+
+from swh.loader.package.loader import BasePackageInfo, PackageLoader
+from swh.loader.package.utils import cached_method, get_url_body, release_name
+from swh.model import from_disk
+from swh.model.model import ObjectType, Person, Release, Sha1Git, TimestampWithTimezone
+from swh.storage.interface import StorageInterface
+
+logger = logging.getLogger(__name__)
+
+
+@attr.s
+class RubyGemsPackageInfo(BasePackageInfo):
+ name = attr.ib(type=str)
+ """Name of the package"""
+
+ version = attr.ib(type=str)
+ """Current version"""
+
+ built_at = attr.ib(type=Optional[TimestampWithTimezone])
+ """Version build date"""
+
+ authors = attr.ib(type=List[Person])
+ """Authors"""
+
+
+class RubyGemsLoader(PackageLoader[RubyGemsPackageInfo]):
+ """Load ``.gem`` files from ``RubyGems.org`` into the SWH archive."""
+
+ visit_type = "rubygems"
+
+ def __init__(
+ self,
+ storage: StorageInterface,
+ url: str,
+ max_content_size: Optional[int] = None,
+ **kwargs,
+ ):
+ super().__init__(storage, url, max_content_size=max_content_size, **kwargs)
+ # Lister URLs are in the ``https://rubygems.org/gems/{pkgname}`` format
+ assert url.startswith("https://rubygems.org/gems/"), (
+ "Expected rubygems.org url, got '%s'" % url
+ )
+ self.gem_name = url[len("https://rubygems.org/gems/") :]
+ # API docs at ``https://guides.rubygems.org/rubygems-org-api/``
+ self.api_base_url = "https://rubygems.org/api/v1"
+ # Mapping of version number to corresponding metadata from the API
+ self.versions_info: Dict[str, Dict[str, Any]] = {}
+
+ def get_versions(self) -> Sequence[str]:
+ """Return all versions for the gem being loaded.
+
+ Also stores the detailed information for each version since everything
+ is present in this API call."""
+ versions_info = get_url_body(
+ f"{self.api_base_url}/versions/{self.gem_name}.json"
+ )
+ versions = []
+
+ for version_info in json.loads(versions_info):
+ number = version_info["number"]
+ self.versions_info[number] = version_info
+ versions.append(number)
+
+ return versions
+
+ @cached_method
+ def get_default_version(self) -> str:
+ latest = get_url_body(
+ f"{self.api_base_url}/versions/{self.gem_name}/latest.json"
+ )
+ return json.loads(latest)["version"]
+
+ def _load_directory(
+ self, dl_artifacts: List[Tuple[str, Mapping[str, Any]]], tmpdir: str
+ ) -> Tuple[str, from_disk.Directory]:
+ """Override the directory loading to point it to the actual code.
+
+ Gem files are uncompressed tarballs containing:
+ - ``metadata.gz``: the metadata about this gem
+ - ``data.tar.gz``: the code and possible binary artifacts
+ - ``checksums.yaml.gz``: checksums
+ """
+ logger.debug("Unpacking gem file to point to the actual code")
+ uncompressed_path = self.uncompress(dl_artifacts, dest=tmpdir)
+ source_code_tarball = os.path.join(uncompressed_path, "data.tar.gz")
+
+ return super()._load_directory([(source_code_tarball, {})], tmpdir)
+
+ def get_package_info(
+ self, version: str
+ ) -> Iterator[Tuple[str, RubyGemsPackageInfo]]:
+
+ info = self.versions_info[version]
+
+ authors = info["authors"].split(", ")
+ p_info = RubyGemsPackageInfo(
+ url=f"https://rubygems.org/downloads/{self.gem_name}-{version}.gem",
+ # See format of gem files in ``_load_directory``
+ filename=f"{self.gem_name}-{version}.tar",
+ version=version,
+ built_at=TimestampWithTimezone.from_iso8601(info["built_at"]),
+ name=self.gem_name,
+ authors=[Person.from_fullname(person.encode()) for person in authors],
+ )
+ yield release_name(version), p_info
+
+ def build_release(
+ self, p_info: RubyGemsPackageInfo, uncompressed_path: str, directory: Sha1Git
+ ) -> Optional[Release]:
+ msg = (
+ f"Synthetic release for RubyGems source package {p_info.name} "
+ f"version {p_info.version}\n"
+ )
+
+ return Release(
+ name=p_info.version.encode(),
+ message=msg.encode(),
+ date=p_info.built_at,
+ # TODO multiple authors (T3887)
+ author=p_info.authors[0],
+ target_type=ObjectType.DIRECTORY,
+ target=directory,
+ synthetic=True,
+ )
diff --git a/swh/loader/package/rubygems/tasks.py b/swh/loader/package/rubygems/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/rubygems/tasks.py
@@ -0,0 +1,14 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from celery import shared_task
+
+from swh.loader.package.rubygems.loader import RubyGemsLoader
+
+
+@shared_task(name=__name__ + ".LoadRubyGems")
+def load_rubygems(**kwargs):
+ """Load ruby gems"""
+ return RubyGemsLoader.from_configfile(**kwargs).load()
diff --git a/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper.json b/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper.json
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper.json
@@ -0,0 +1,36 @@
+[
+ {
+ "authors": "Fabio Neves",
+ "built_at": "2014-09-11T00:00:00.000Z",
+ "created_at": "2014-09-25T09:02:44.313Z",
+ "description": "A simple wrapper around HG command line tool",
+ "downloads_count": 2770,
+ "metadata": {},
+ "number": "0.8.5",
+ "summary": "Mercurial command line ruby wrapper",
+ "platform": "ruby",
+ "rubygems_version": "\u003e= 0",
+ "ruby_version": "\u003e= 0",
+ "prerelease": false,
+ "licenses": [],
+ "requirements": [],
+ "sha": "cee62e168ffd7d36c565e00f29fa6a0b57ef15c4c14055345b1e01148ec4fab8"
+ },
+ {
+ "authors": "Fabio Neves",
+ "built_at": "2014-09-11T00:00:00.000Z",
+ "created_at": "2014-09-18T08:59:42.895Z",
+ "description": "A simple wrapper around HG command line tool",
+ "downloads_count": 2415,
+ "metadata": {},
+ "number": "0.8.4",
+ "summary": "Mercurial command line ruby wrapper",
+ "platform": "ruby",
+ "rubygems_version": "\u003e= 0",
+ "ruby_version": "\u003e= 0",
+ "prerelease": false,
+ "licenses": [],
+ "requirements": [],
+ "sha": "ec60f0568f4f8744a0da78089a05e51d1c0e9799a1abfb37f63cdf7ed019c862"
+ }
+]
\ No newline at end of file
diff --git a/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper_latest.json b/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper_latest.json
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper_latest.json
@@ -0,0 +1 @@
+{"version":"0.8.5"}
\ No newline at end of file
diff --git a/swh/loader/package/rubygems/tests/data/https_rubygems.org/downloads_mercurial-wrapper-0.8.4.gem b/swh/loader/package/rubygems/tests/data/https_rubygems.org/downloads_mercurial-wrapper-0.8.4.gem
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/package/rubygems/tests/data/https_rubygems.org/downloads_mercurial-wrapper-0.8.5.gem b/swh/loader/package/rubygems/tests/data/https_rubygems.org/downloads_mercurial-wrapper-0.8.5.gem
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/package/rubygems/tests/test_rubygems.py b/swh/loader/package/rubygems/tests/test_rubygems.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/rubygems/tests/test_rubygems.py
@@ -0,0 +1,26 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.loader.package.rubygems.loader import RubyGemsLoader
+from swh.loader.tests import get_stats
+
+
+def test_rubygems_loader(swh_storage, requests_mock_datadir):
+ url = "https://rubygems.org/gems/mercurial-wrapper"
+ loader = RubyGemsLoader(swh_storage, url)
+
+ assert loader.load()["status"] == "eventful"
+
+ stats = get_stats(swh_storage)
+ assert {
+ "content": 8,
+ "directory": 4,
+ "origin": 1,
+ "origin_visit": 1,
+ "release": 2,
+ "revision": 0,
+ "skipped_content": 0,
+ "snapshot": 1,
+ } == stats
diff --git a/swh/loader/package/rubygems/tests/test_tasks.py b/swh/loader/package/rubygems/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/rubygems/tests/test_tasks.py
@@ -0,0 +1,21 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def test_tasks_rubygems_loader(
+ mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
+):
+ mock_load = mocker.patch("swh.loader.package.rubygems.loader.RubyGemsLoader.load")
+ mock_load.return_value = {"status": "eventful"}
+
+ res = swh_scheduler_celery_app.send_task(
+ "swh.loader.package.rubygems.tasks.LoadRubyGems",
+ kwargs={"url": "https://rubygems.org/gems/whatever-package"},
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+ assert mock_load.called
+ assert res.result == {"status": "eventful"}

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 19, 9:47 AM (18 h, 57 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219725

Event Timeline