diff --git a/docs/package-loader-specifications.rst b/docs/package-loader-specifications.rst --- a/docs/package-loader-specifications.rst +++ b/docs/package-loader-specifications.rst @@ -77,6 +77,15 @@ ``i_version`` is the intrinsic version (eg. ``0.7.2-3``) while ``version`` contains the debian suite name (eg. ``stretch/contrib/0.7.2-3``) and is passed as arg + * - golang + - ``p_info.​version`` + - ``release_name(version)`` + - =version + - Synthetic release for Golang source package {p_info.name} version {p_info.version} + - true + - "" + - from ext metadata + - Golang offers basically no metadata outside of version and timestamp * - deposit - HEAD - only HEAD diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -62,6 +62,7 @@ loader.crates=swh.loader.package.crates:register loader.debian=swh.loader.package.debian:register loader.deposit=swh.loader.package.deposit:register + loader.golang=swh.loader.package.golang:register loader.nixguix=swh.loader.package.nixguix:register loader.npm=swh.loader.package.npm:register loader.opam=swh.loader.package.opam:register diff --git a/swh/loader/package/golang/__init__.py b/swh/loader/package/golang/__init__.py new file mode 100644 --- /dev/null +++ b/swh/loader/package/golang/__init__.py @@ -0,0 +1,17 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +from typing import Any, Mapping + + +def register() -> Mapping[str, Any]: + """Register the current worker module's definition""" + from .loader import GolangLoader + + return { + "task_modules": [f"{__name__}.tasks"], + "loader": GolangLoader, + } diff --git a/swh/loader/package/golang/loader.py b/swh/loader/package/golang/loader.py new file mode 100644 --- /dev/null +++ b/swh/loader/package/golang/loader.py @@ -0,0 +1,91 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import json +import logging +from typing import Iterator, Optional, Sequence, Tuple + +import attr + +from swh.loader.package.loader import BasePackageInfo, PackageLoader +from swh.loader.package.utils import EMPTY_AUTHOR, api_info, release_name +from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone +from swh.storage.interface import StorageInterface + +logger = logging.getLogger(__name__) + + +@attr.s +class GolangPackageInfo(BasePackageInfo): + name = attr.ib(type=str) + timestamp = attr.ib(type=Optional[TimestampWithTimezone]) + + +class GolangLoader(PackageLoader[GolangPackageInfo]): + """Load Golang module zip file into SWH archive.""" + + visit_type = "golang" + GOLANG_PKG_DEV_URL = "https://pkg.go.dev" + GOLANG_PROXY_URL = "https://proxy.golang.org" + + def __init__( + self, + storage: StorageInterface, + url: str, + max_content_size: Optional[int] = None, + **kwargs, + ): + super().__init__(storage, url, max_content_size=max_content_size, **kwargs) + # The lister saves human-usable URLs, so we translate them to proxy URLs + # for use in the loader. + # This URL format is detailed in https://go.dev/ref/mod#goproxy-protocol + assert url.startswith( + self.GOLANG_PKG_DEV_URL + ), "Go package URL (%s) not from %s" % (url, self.GOLANG_PKG_DEV_URL) + self.name = url[len(self.GOLANG_PKG_DEV_URL) + 1 :] + self.url = url.replace(self.GOLANG_PKG_DEV_URL, self.GOLANG_PROXY_URL) + + def get_versions(self) -> Sequence[str]: + return api_info(f"{self.url}/@v/list").decode().splitlines() + + def get_default_version(self) -> str: + latest = api_info(f"{self.url}/@latest") + return json.loads(latest)["Version"] + + def _raw_info(self, version: str) -> dict: + url = f"{self.url}/@v/{version}.info" + return json.loads(api_info(url)) + + def get_package_info(self, version: str) -> Iterator[Tuple[str, GolangPackageInfo]]: + # Encode the name because creating nested folders can become problematic + encoded_name = self.name.replace("/", "__") + filename = f"{encoded_name}-{version}.zip" + timestamp = TimestampWithTimezone.from_iso8601(self._raw_info(version)["Time"]) + p_info = GolangPackageInfo( + url=f"{self.url}/@v/{version}.zip", + filename=filename, + version=version, + timestamp=timestamp, + name=self.name, + ) + yield release_name(version), p_info + + def build_release( + self, p_info: GolangPackageInfo, uncompressed_path: str, directory: Sha1Git + ) -> Optional[Release]: + msg = ( + f"Synthetic release for Golang source package {p_info.name} " + f"version {p_info.version}\n" + ) + + return Release( + name=p_info.version.encode(), + message=msg.encode(), + date=p_info.timestamp, + author=EMPTY_AUTHOR, # Go modules offer very little metadata + target_type=ObjectType.DIRECTORY, + target=directory, + synthetic=True, + ) diff --git a/swh/loader/package/golang/tasks.py b/swh/loader/package/golang/tasks.py new file mode 100644 --- /dev/null +++ b/swh/loader/package/golang/tasks.py @@ -0,0 +1,15 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from celery import shared_task + +from swh.loader.package.golang.loader import GolangLoader + + +@shared_task(name=__name__ + ".LoadGolang") +def load_golang(**kwargs): + """Load Golang module""" + loader = GolangLoader.from_configfile(**kwargs) + return loader.load() diff --git a/swh/loader/package/golang/tests/__init__.py b/swh/loader/package/golang/tests/__init__.py new file mode 100644 diff --git a/swh/loader/package/golang/tests/data/https_proxy.golang.org/example.com_basic-go-module_@latest b/swh/loader/package/golang/tests/data/https_proxy.golang.org/example.com_basic-go-module_@latest new file mode 100644 --- /dev/null +++ b/swh/loader/package/golang/tests/data/https_proxy.golang.org/example.com_basic-go-module_@latest @@ -0,0 +1 @@ +{"Version":"v0.1.3","Time":"2022-03-15T13:54:34Z"} diff --git a/swh/loader/package/golang/tests/data/https_proxy.golang.org/example.com_basic-go-module_@v_list b/swh/loader/package/golang/tests/data/https_proxy.golang.org/example.com_basic-go-module_@v_list new file mode 100644 --- /dev/null +++ b/swh/loader/package/golang/tests/data/https_proxy.golang.org/example.com_basic-go-module_@v_list @@ -0,0 +1 @@ +v0.1.3 diff --git a/swh/loader/package/golang/tests/data/https_proxy.golang.org/example.com_basic-go-module_@v_v0.1.3.info b/swh/loader/package/golang/tests/data/https_proxy.golang.org/example.com_basic-go-module_@v_v0.1.3.info new file mode 100644 --- /dev/null +++ b/swh/loader/package/golang/tests/data/https_proxy.golang.org/example.com_basic-go-module_@v_v0.1.3.info @@ -0,0 +1 @@ +{"Version":"v0.1.3","Time":"2022-03-17T15:42:55Z"} diff --git a/swh/loader/package/golang/tests/data/https_proxy.golang.org/example.com_basic-go-module_@v_v0.1.3.zip b/swh/loader/package/golang/tests/data/https_proxy.golang.org/example.com_basic-go-module_@v_v0.1.3.zip new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@