diff --git a/docs/package-loader-specifications.rst b/docs/package-loader-specifications.rst --- a/docs/package-loader-specifications.rst +++ b/docs/package-loader-specifications.rst @@ -176,6 +176,15 @@ - from int metadata or "" - from ext metadata or None - metadata is intrinsic + * - rubygems + - ``p_info.version`` + - ``release_name(​version)`` + - =version + - Synthetic release for RubyGems source package {p_info.name} version {p_info.version} + - true + - from ext metadata + - from ext metadata + - The source code is extracted from a tarball nested within the gem file using this function:: diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -74,6 +74,7 @@ loader.puppet=swh.loader.package.puppet:register loader.pypi=swh.loader.package.pypi:register loader.maven=swh.loader.package.maven:register + loader.rubygems=swh.loader.package.rubygems:register """, classifiers=[ "Programming Language :: Python :: 3", diff --git a/swh/loader/package/rubygems/__init__.py b/swh/loader/package/rubygems/__init__.py new file mode 100644 --- /dev/null +++ b/swh/loader/package/rubygems/__init__.py @@ -0,0 +1,17 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +from typing import Any, Mapping + + +def register() -> Mapping[str, Any]: + """Register the current worker module's definition""" + from .loader import RubyGemsLoader + + return { + "task_modules": [f"{__name__}.tasks"], + "loader": RubyGemsLoader, + } diff --git a/swh/loader/package/rubygems/loader.py b/swh/loader/package/rubygems/loader.py new file mode 100644 --- /dev/null +++ b/swh/loader/package/rubygems/loader.py @@ -0,0 +1,135 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import json +import logging +import os +from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple + +import attr + +from swh.loader.package.loader import BasePackageInfo, PackageLoader +from swh.loader.package.utils import cached_method, get_url_body, release_name +from swh.model import from_disk +from swh.model.model import ObjectType, Person, Release, Sha1Git, TimestampWithTimezone +from swh.storage.interface import StorageInterface + +logger = logging.getLogger(__name__) + + +@attr.s +class RubyGemsPackageInfo(BasePackageInfo): + name = attr.ib(type=str) + """Name of the package""" + + version = attr.ib(type=str) + """Current version""" + + built_at = attr.ib(type=Optional[TimestampWithTimezone]) + """Version build date""" + + authors = attr.ib(type=List[Person]) + """Authors""" + + +class RubyGemsLoader(PackageLoader[RubyGemsPackageInfo]): + """Load ``.gem`` files from ``RubyGems.org`` into the SWH archive.""" + + visit_type = "rubygems" + + def __init__( + self, + storage: StorageInterface, + url: str, + max_content_size: Optional[int] = None, + **kwargs, + ): + super().__init__(storage, url, max_content_size=max_content_size, **kwargs) + # Lister URLs are in the ``https://rubygems.org/gems/{pkgname}`` format + assert url.startswith("https://rubygems.org/gems/"), ( + "Expected rubygems.org url, got '%s'" % url + ) + self.gem_name = url[len("https://rubygems.org/gems/") :] + # API docs at ``https://guides.rubygems.org/rubygems-org-api/`` + self.api_base_url = "https://rubygems.org/api/v1" + # Mapping of version number to corresponding metadata from the API + self.versions_info: Dict[str, Dict[str, Any]] = {} + + def get_versions(self) -> Sequence[str]: + """Return all versions for the gem being loaded. + + Also stores the detailed information for each version since everything + is present in this API call.""" + versions_info = get_url_body( + f"{self.api_base_url}/versions/{self.gem_name}.json" + ) + versions = [] + + for version_info in json.loads(versions_info): + number = version_info["number"] + self.versions_info[number] = version_info + versions.append(number) + + return versions + + @cached_method + def get_default_version(self) -> str: + latest = get_url_body( + f"{self.api_base_url}/versions/{self.gem_name}/latest.json" + ) + return json.loads(latest)["version"] + + def _load_directory( + self, dl_artifacts: List[Tuple[str, Mapping[str, Any]]], tmpdir: str + ) -> Tuple[str, from_disk.Directory]: + """Override the directory loading to point it to the actual code. + + Gem files are uncompressed tarballs containing: + - ``metadata.gz``: the metadata about this gem + - ``data.tar.gz``: the code and possible binary artifacts + - ``checksums.yaml.gz``: checksums + """ + logger.debug("Unpacking gem file to point to the actual code") + uncompressed_path = self.uncompress(dl_artifacts, dest=tmpdir) + source_code_tarball = os.path.join(uncompressed_path, "data.tar.gz") + + return super()._load_directory([(source_code_tarball, {})], tmpdir) + + def get_package_info( + self, version: str + ) -> Iterator[Tuple[str, RubyGemsPackageInfo]]: + + info = self.versions_info[version] + + authors = info["authors"].split(", ") + p_info = RubyGemsPackageInfo( + url=f"https://rubygems.org/downloads/{self.gem_name}-{version}.gem", + # See format of gem files in ``_load_directory`` + filename=f"{self.gem_name}-{version}.tar", + version=version, + built_at=TimestampWithTimezone.from_iso8601(info["built_at"]), + name=self.gem_name, + authors=[Person.from_fullname(person.encode()) for person in authors], + ) + yield release_name(version), p_info + + def build_release( + self, p_info: RubyGemsPackageInfo, uncompressed_path: str, directory: Sha1Git + ) -> Optional[Release]: + msg = ( + f"Synthetic release for RubyGems source package {p_info.name} " + f"version {p_info.version}\n" + ) + + return Release( + name=p_info.version.encode(), + message=msg.encode(), + date=p_info.built_at, + # TODO multiple authors (T3887) + author=p_info.authors[0], + target_type=ObjectType.DIRECTORY, + target=directory, + synthetic=True, + ) diff --git a/swh/loader/package/rubygems/tasks.py b/swh/loader/package/rubygems/tasks.py new file mode 100644 --- /dev/null +++ b/swh/loader/package/rubygems/tasks.py @@ -0,0 +1,14 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from celery import shared_task + +from swh.loader.package.rubygems.loader import RubyGemsLoader + + +@shared_task(name=__name__ + ".LoadRubyGems") +def load_rubygems(**kwargs): + """Load ruby gems""" + return RubyGemsLoader.from_configfile(**kwargs).load() diff --git a/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper.json b/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper.json new file mode 100644 --- /dev/null +++ b/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper.json @@ -0,0 +1,36 @@ +[ + { + "authors": "Fabio Neves", + "built_at": "2014-09-11T00:00:00.000Z", + "created_at": "2014-09-25T09:02:44.313Z", + "description": "A simple wrapper around HG command line tool", + "downloads_count": 2770, + "metadata": {}, + "number": "0.8.5", + "summary": "Mercurial command line ruby wrapper", + "platform": "ruby", + "rubygems_version": "\u003e= 0", + "ruby_version": "\u003e= 0", + "prerelease": false, + "licenses": [], + "requirements": [], + "sha": "cee62e168ffd7d36c565e00f29fa6a0b57ef15c4c14055345b1e01148ec4fab8" + }, + { + "authors": "Fabio Neves", + "built_at": "2014-09-11T00:00:00.000Z", + "created_at": "2014-09-18T08:59:42.895Z", + "description": "A simple wrapper around HG command line tool", + "downloads_count": 2415, + "metadata": {}, + "number": "0.8.4", + "summary": "Mercurial command line ruby wrapper", + "platform": "ruby", + "rubygems_version": "\u003e= 0", + "ruby_version": "\u003e= 0", + "prerelease": false, + "licenses": [], + "requirements": [], + "sha": "ec60f0568f4f8744a0da78089a05e51d1c0e9799a1abfb37f63cdf7ed019c862" + } +] \ No newline at end of file diff --git a/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper_latest.json b/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper_latest.json new file mode 100644 --- /dev/null +++ b/swh/loader/package/rubygems/tests/data/https_rubygems.org/api_v1_versions_mercurial-wrapper_latest.json @@ -0,0 +1 @@ +{"version":"0.8.5"} \ No newline at end of file diff --git a/swh/loader/package/rubygems/tests/data/https_rubygems.org/downloads_mercurial-wrapper-0.8.4.gem b/swh/loader/package/rubygems/tests/data/https_rubygems.org/downloads_mercurial-wrapper-0.8.4.gem new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@