diff --git a/swh/loader/package/golang/loader.py b/swh/loader/package/golang/loader.py index 9caff6a..d29445d 100644 --- a/swh/loader/package/golang/loader.py +++ b/swh/loader/package/golang/loader.py @@ -1,91 +1,97 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import logging +import re from typing import Iterator, Optional, Sequence, Tuple import attr from swh.loader.package.loader import BasePackageInfo, PackageLoader from swh.loader.package.utils import EMPTY_AUTHOR, api_info, release_name from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone from swh.storage.interface import StorageInterface logger = logging.getLogger(__name__) +def _uppercase_encode(url: str) -> str: + return re.sub("([A-Z]{1})", r"!\1", url).lower() + + @attr.s class GolangPackageInfo(BasePackageInfo): name = attr.ib(type=str) timestamp = attr.ib(type=Optional[TimestampWithTimezone]) class GolangLoader(PackageLoader[GolangPackageInfo]): """Load Golang module zip file into SWH archive.""" visit_type = "golang" GOLANG_PKG_DEV_URL = "https://pkg.go.dev" GOLANG_PROXY_URL = "https://proxy.golang.org" def __init__( self, storage: StorageInterface, url: str, max_content_size: Optional[int] = None, **kwargs, ): super().__init__(storage, url, max_content_size=max_content_size, **kwargs) # The lister saves human-usable URLs, so we translate them to proxy URLs # for use in the loader. # This URL format is detailed in https://go.dev/ref/mod#goproxy-protocol assert url.startswith( self.GOLANG_PKG_DEV_URL ), "Go package URL (%s) not from %s" % (url, self.GOLANG_PKG_DEV_URL) self.name = url[len(self.GOLANG_PKG_DEV_URL) + 1 :] self.url = url.replace(self.GOLANG_PKG_DEV_URL, self.GOLANG_PROXY_URL) + self.url = _uppercase_encode(self.url) def get_versions(self) -> Sequence[str]: return api_info(f"{self.url}/@v/list").decode().splitlines() def get_default_version(self) -> str: latest = api_info(f"{self.url}/@latest") return json.loads(latest)["Version"] def _raw_info(self, version: str) -> dict: - url = f"{self.url}/@v/{version}.info" + url = f"{self.url}/@v/{_uppercase_encode(version)}.info" return json.loads(api_info(url)) def get_package_info(self, version: str) -> Iterator[Tuple[str, GolangPackageInfo]]: # Encode the name because creating nested folders can become problematic encoded_name = self.name.replace("/", "__") filename = f"{encoded_name}-{version}.zip" timestamp = TimestampWithTimezone.from_iso8601(self._raw_info(version)["Time"]) p_info = GolangPackageInfo( url=f"{self.url}/@v/{version}.zip", filename=filename, version=version, timestamp=timestamp, name=self.name, ) yield release_name(version), p_info def build_release( self, p_info: GolangPackageInfo, uncompressed_path: str, directory: Sha1Git ) -> Optional[Release]: msg = ( f"Synthetic release for Golang source package {p_info.name} " f"version {p_info.version}\n" ) return Release( name=p_info.version.encode(), message=msg.encode(), date=p_info.timestamp, author=EMPTY_AUTHOR, # Go modules offer very little metadata target_type=ObjectType.DIRECTORY, target=directory, synthetic=True, ) diff --git a/swh/loader/package/golang/tests/data/https_proxy.golang.org/github.com_adam-hanna_array!operations_@latest b/swh/loader/package/golang/tests/data/https_proxy.golang.org/github.com_adam-hanna_array!operations_@latest new file mode 100644 index 0000000..cc87e10 --- /dev/null +++ b/swh/loader/package/golang/tests/data/https_proxy.golang.org/github.com_adam-hanna_array!operations_@latest @@ -0,0 +1 @@ +{"Version":"v1.0.1","Time":"2022-03-23T18:02:43Z"} \ No newline at end of file diff --git a/swh/loader/package/golang/tests/data/https_proxy.golang.org/github.com_adam-hanna_array!operations_@v_list b/swh/loader/package/golang/tests/data/https_proxy.golang.org/github.com_adam-hanna_array!operations_@v_list new file mode 100644 index 0000000..b18d465 --- /dev/null +++ b/swh/loader/package/golang/tests/data/https_proxy.golang.org/github.com_adam-hanna_array!operations_@v_list @@ -0,0 +1 @@ +v1.0.1 diff --git a/swh/loader/package/golang/tests/data/https_proxy.golang.org/github.com_adam-hanna_array!operations_@v_v1.0.1.info b/swh/loader/package/golang/tests/data/https_proxy.golang.org/github.com_adam-hanna_array!operations_@v_v1.0.1.info new file mode 100644 index 0000000..cc87e10 --- /dev/null +++ b/swh/loader/package/golang/tests/data/https_proxy.golang.org/github.com_adam-hanna_array!operations_@v_v1.0.1.info @@ -0,0 +1 @@ +{"Version":"v1.0.1","Time":"2022-03-23T18:02:43Z"} \ No newline at end of file diff --git a/swh/loader/package/golang/tests/data/https_proxy.golang.org/github.com_adam-hanna_array!operations_@v_v1.0.1.zip b/swh/loader/package/golang/tests/data/https_proxy.golang.org/github.com_adam-hanna_array!operations_@v_v1.0.1.zip new file mode 100644 index 0000000..8fe5583 Binary files /dev/null and b/swh/loader/package/golang/tests/data/https_proxy.golang.org/github.com_adam-hanna_array!operations_@v_v1.0.1.zip differ diff --git a/swh/loader/package/golang/tests/test_golang.py b/swh/loader/package/golang/tests/test_golang.py index 63bde1b..1249807 100644 --- a/swh/loader/package/golang/tests/test_golang.py +++ b/swh/loader/package/golang/tests/test_golang.py @@ -1,13 +1,22 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.loader.package.golang.loader import GolangLoader def test_golang_loader_first_visit(swh_storage, requests_mock_datadir): url = "https://pkg.go.dev/example.com/basic-go-module" loader = GolangLoader(swh_storage, url) assert loader.load()["status"] == "eventful" + + +def test_golang_loader_package_name_with_uppercase_characters( + swh_storage, requests_mock_datadir +): + url = "https://pkg.go.dev/github.com/adam-hanna/arrayOperations" + loader = GolangLoader(swh_storage, url) + + assert loader.load()["status"] == "eventful"