Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123503
D8569.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
12 KB
Subscribers
None
D8569.diff
View Options
diff --git a/docs/package-loader-specifications.rst b/docs/package-loader-specifications.rst
--- a/docs/package-loader-specifications.rst
+++ b/docs/package-loader-specifications.rst
@@ -176,6 +176,15 @@
- from int metadata or ""
- from ext metadata or None
- metadata is intrinsic
+ * - rubygems
+ - ``p_info.version``
+ - ``release_name(version)``
+ - =version
+ - Synthetic release for RubyGems source package {p_info.name} version {p_info.version}
+ - true
+ - from ext metadata
+ - from ext metadata
+ - The source code is extracted from a tarball nested within the gem file
using this function::
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -74,6 +74,7 @@
loader.puppet=swh.loader.package.puppet:register
loader.pypi=swh.loader.package.pypi:register
loader.maven=swh.loader.package.maven:register
+ loader.rubygems=swh.loader.package.rubygems:register
""",
classifiers=[
"Programming Language :: Python :: 3",
diff --git a/swh/loader/package/rubygems/__init__.py b/swh/loader/package/rubygems/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/rubygems/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+from typing import Any, Mapping
+
+
+def register() -> Mapping[str, Any]:
+ """Register the current worker module's definition"""
+ from .loader import RubyGemsLoader
+
+ return {
+ "task_modules": [f"{__name__}.tasks"],
+ "loader": RubyGemsLoader,
+ }
diff --git a/swh/loader/package/rubygems/loader.py b/swh/loader/package/rubygems/loader.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/rubygems/loader.py
@@ -0,0 +1,135 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import json
+import logging
+import os
+from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple
+
+import attr
+
+from swh.loader.package.loader import BasePackageInfo, PackageLoader
+from swh.loader.package.utils import cached_method, get_url_body, release_name
+from swh.model import from_disk
+from swh.model.model import ObjectType, Person, Release, Sha1Git, TimestampWithTimezone
+from swh.storage.interface import StorageInterface
+
+logger = logging.getLogger(__name__)
+
+
+@attr.s
+class RubyGemsPackageInfo(BasePackageInfo):
+ name = attr.ib(type=str)
+ """Name of the package"""
+
+ version = attr.ib(type=str)
+ """Current version"""
+
+ built_at = attr.ib(type=Optional[TimestampWithTimezone])
+ """Version build date"""
+
+ authors = attr.ib(type=List[Person])
+ """Authors"""
+
+
+class RubyGemsLoader(PackageLoader[RubyGemsPackageInfo]):
+ """Load ``.gem`` files from ``RubyGems.org`` into the SWH archive."""
+
+ visit_type = "rubygems"
+
+ def __init__(
+ self,
+ storage: StorageInterface,
+ url: str,
+ max_content_size: Optional[int] = None,
+ **kwargs,
+ ):
+ super().__init__(storage, url, max_content_size=max_content_size, **kwargs)
+ # Lister URLs are in the ``https://rubygems.org/gems/{pkgname}`` format
+ assert url.startswith("https://rubygems.org/gems/"), (
+ "Expected rubygems.org url, got '%s'" % url
+ )
+ self.gem_name = url[len("https://rubygems.org/gems/") :]
+ # API docs at ``https://guides.rubygems.org/rubygems-org-api/``
+ self.api_base_url = "https://rubygems.org/api/v1"
+ # Mapping of version number to corresponding metadata from the API
+ self.versions_info: Dict[str, Dict[str, Any]] = {}
+
+ def get_versions(self) -> Sequence[str]:
+ """Return all versions for the gem being loaded.
+
+ Also stores the detailed information for each version since everything
+ is present in this API call."""
+ versions_info = get_url_body(
+ f"{self.api_base_url}/versions/{self.gem_name}.json"
+ )
+ versions = []
+
+ for version_info in json.loads(versions_info):
+ number = version_info["number"]
+ self.versions_info[number] = version_info
+ versions.append(number)
+
+ return versions
+
+ @cached_method
+ def get_default_version(self) -> str:
+ latest = get_url_body(
+ f"{self.api_base_url}/versions/{self.gem_name}/latest.json"
+ )
+ return json.loads(latest)["version"]
+
+ def _load_directory(
+ self, dl_artifacts: List[Tuple[str, Mapping[str, Any]]], tmpdir: str
+ ) -> Tuple[str, from_disk.Directory]:
+ """Override the directory loading to point it to the actual code.
+
+ Gem files are uncompressed tarballs containing:
+ - ``metadata.gz``: the metadata about this gem
+ - ``data.tar.gz``: the code and possible binary artifacts
+ - ``checksums.yaml.gz``: checksums
+ """
+ logger.debug("Unpacking gem file to point to the actual code")
+ uncompressed_path = self.uncompress(dl_artifacts, dest=tmpdir)
+ source_code_tarball = os.path.join(uncompressed_path, "data.tar.gz")
+
+ return super()._load_directory([(source_code_tarball, {})], tmpdir)
+
+ def get_package_info(
+ self, version: str
+ ) -> Iterator[Tuple[str, RubyGemsPackageInfo]]:
+
+ info = self.versions_info[version]
+
+ authors = info["authors"].split(", ")
+ p_info = RubyGemsPackageInfo(
+ url=f"https://rubygems.org/downloads/{self.gem_name}-{version}.gem",
+ # See format of gem files in ``_load_directory``
+ filename=f"{self.gem_name}-{version}.tar",
+ version=version,
+ built_at=TimestampWithTimezone.from_iso8601(info["built_at"]),
+ name=self.gem_name,
+ authors=[Person.from_fullname(person.encode()) for person in authors],
+ )
+ yield release_name(version), p_info
+
+ def build_release(
+ self, p_info: RubyGemsPackageInfo, uncompressed_path: str, directory: Sha1Git
+ ) -> Optional[Release]:
+ msg = (
+ f"Synthetic release for RubyGems source package {p_info.name} "
+ f"version {p_info.version}\n"
+ )
+
+ return Release(
+ name=p_info.version.encode(),
+ message=msg.encode(),
+ date=p_info.built_at,
+ # TODO multiple authors (T3887)
+ author=p_info.authors[0],
+ target_type=ObjectType.DIRECTORY,
+ target=directory,
+ synthetic=True,
+ )
diff --git a/swh/loader/package/rubygems/tasks.py b/swh/loader/package/rubygems/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/rubygems/tasks.py
@@ -0,0 +1,14 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from celery import shared_task
+
+from swh.loader.package.rubygems.loader import RubyGemsLoader
+
+
+@shared_task(name=__name__ + ".LoadRubyGems")
+def load_rubygems(**kwargs):
+ """Load ruby gems"""
+ return RubyGemsLoader.from_configfile(**kwargs).load()
diff --git a/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper.json b/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper.json
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper.json
@@ -0,0 +1,36 @@
+[
+ {
+ "authors": "Fabio Neves",
+ "built_at": "2014-09-11T00:00:00.000Z",
+ "created_at": "2014-09-25T09:02:44.313Z",
+ "description": "A simple wrapper around HG command line tool",
+ "downloads_count": 2770,
+ "metadata": {},
+ "number": "0.8.5",
+ "summary": "Mercurial command line ruby wrapper",
+ "platform": "ruby",
+ "rubygems_version": "\u003e= 0",
+ "ruby_version": "\u003e= 0",
+ "prerelease": false,
+ "licenses": [],
+ "requirements": [],
+ "sha": "cee62e168ffd7d36c565e00f29fa6a0b57ef15c4c14055345b1e01148ec4fab8"
+ },
+ {
+ "authors": "Fabio Neves",
+ "built_at": "2014-09-11T00:00:00.000Z",
+ "created_at": "2014-09-18T08:59:42.895Z",
+ "description": "A simple wrapper around HG command line tool",
+ "downloads_count": 2415,
+ "metadata": {},
+ "number": "0.8.4",
+ "summary": "Mercurial command line ruby wrapper",
+ "platform": "ruby",
+ "rubygems_version": "\u003e= 0",
+ "ruby_version": "\u003e= 0",
+ "prerelease": false,
+ "licenses": [],
+ "requirements": [],
+ "sha": "ec60f0568f4f8744a0da78089a05e51d1c0e9799a1abfb37f63cdf7ed019c862"
+ }
+]
\ No newline at end of file
diff --git a/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper_latest.json b/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper_latest.json
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper_latest.json
@@ -0,0 +1 @@
+{"version":"0.8.5"}
\ No newline at end of file
diff --git a/swh/loader/package/rubygems/tests/data/https_rubygems.org/downloads_mercurial-wrapper-0.8.4.gem b/swh/loader/package/rubygems/tests/data/https_rubygems.org/downloads_mercurial-wrapper-0.8.4.gem
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/package/rubygems/tests/data/https_rubygems.org/downloads_mercurial-wrapper-0.8.5.gem b/swh/loader/package/rubygems/tests/data/https_rubygems.org/downloads_mercurial-wrapper-0.8.5.gem
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/package/rubygems/tests/test_rubygems.py b/swh/loader/package/rubygems/tests/test_rubygems.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/rubygems/tests/test_rubygems.py
@@ -0,0 +1,26 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.loader.package.rubygems.loader import RubyGemsLoader
+from swh.loader.tests import get_stats
+
+
+def test_rubygems_loader(swh_storage, requests_mock_datadir):
+ url = "https://rubygems.org/gems/mercurial-wrapper"
+ loader = RubyGemsLoader(swh_storage, url)
+
+ assert loader.load()["status"] == "eventful"
+
+ stats = get_stats(swh_storage)
+ assert {
+ "content": 8,
+ "directory": 4,
+ "origin": 1,
+ "origin_visit": 1,
+ "release": 2,
+ "revision": 0,
+ "skipped_content": 0,
+ "snapshot": 1,
+ } == stats
diff --git a/swh/loader/package/rubygems/tests/test_tasks.py b/swh/loader/package/rubygems/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/rubygems/tests/test_tasks.py
@@ -0,0 +1,21 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def test_tasks_rubygems_loader(
+ mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
+):
+ mock_load = mocker.patch("swh.loader.package.rubygems.loader.RubyGemsLoader.load")
+ mock_load.return_value = {"status": "eventful"}
+
+ res = swh_scheduler_celery_app.send_task(
+ "swh.loader.package.rubygems.tasks.LoadRubyGems",
+ kwargs={"url": "https://rubygems.org/gems/whatever-package"},
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+ assert mock_load.called
+ assert res.result == {"status": "eventful"}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Dec 19, 9:47 AM (18 h, 57 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219725
Attached To
D8569: Add rubygems loader
Event Timeline
Log In to Comment