diff --git a/swh/loader/package/maven/loader.py b/swh/loader/package/maven/loader.py --- a/swh/loader/package/maven/loader.py +++ b/swh/loader/package/maven/loader.py @@ -9,7 +9,6 @@ from os import path import string from typing import Iterator, List, Optional, Sequence, Tuple -from urllib.parse import urlparse import attr import iso8601 @@ -58,6 +57,9 @@ version: str """artifact's version""" + base_url: str + """root URL of the Maven instance""" + @attr.s class MavenPackageInfo(BasePackageInfo): @@ -69,6 +71,8 @@ """Artifact ID of the maven artifact""" version = attr.ib(type=str) """Version of the maven artifact""" + base_url = attr.ib(type=str) + """Root URL of the Maven instance""" # default format for maven artifacts MANIFEST_FORMAT = string.Template("$gid $aid $version $url $time") @@ -87,6 +91,7 @@ gid=a_metadata["gid"], aid=a_metadata["aid"], version=a_metadata["version"], + base_url=a_metadata["base_url"], directory_extrinsic_metadata=[ RawExtrinsicMetadataCore( format="maven-json", metadata=json.dumps(a_metadata).encode(), @@ -126,6 +131,20 @@ jar["version"]: jar for jar in artifacts if jar["version"] } + if artifacts: + base_urls = {jar["base_url"] for jar in artifacts} + try: + (self.base_url,) = base_urls + except ValueError: + raise ValueError( + "Artifacts originate from more than one Maven instance: " + + ", ".join(base_urls) + ) from None + else: + # There is no artifact, so self.metadata_authority won't be called, + # so self.base_url won't be accessed. + pass + def get_versions(self) -> Sequence[str]: return list(self.version_artifact) @@ -134,12 +153,7 @@ return self.artifacts[-1]["version"] def get_metadata_authority(self): - p_url = urlparse(self.url) - return MetadataAuthority( - type=MetadataAuthorityType.FORGE, - url=f"{p_url.scheme}://{p_url.netloc}/", - metadata={}, - ) + return MetadataAuthority(type=MetadataAuthorityType.FORGE, url=self.base_url) def build_extrinsic_directory_metadata( self, p_info: MavenPackageInfo, release_id: Sha1Git, directory_id: Sha1Git, @@ -172,7 +186,6 @@ self, p_info: MavenPackageInfo, uncompressed_path: str, directory: Sha1Git ) -> Optional[Release]: msg = f"Synthetic release for archive at {p_info.url}\n".encode("utf-8") - # time is an iso8601 date normalized_time = TimestampWithTimezone.from_datetime(p_info.time) return Release( name=p_info.version.encode(), diff --git a/swh/loader/package/maven/tests/test_maven.py b/swh/loader/package/maven/tests/test_maven.py --- a/swh/loader/package/maven/tests/test_maven.py +++ b/swh/loader/package/maven/tests/test_maven.py @@ -38,6 +38,7 @@ "aid": "sprova4j", "filename": "sprova4j-0.1.0-sources.jar", "version": "0.1.0", + "base_url": "https://repo1.maven.org/maven2/", }, { "time": "2021-07-12 19:37:05.534000", @@ -47,6 +48,7 @@ "aid": "sprova4j", "filename": "sprova4j-0.1.1-sources.jar", "version": "0.1.1", + "base_url": "https://repo1.maven.org/maven2/", }, ] @@ -94,6 +96,7 @@ "aid": "sprova4j", "filename": "sprova4j-0.1.0-sources.jar", "version": "0.1.0", + "base_url": "https://repo1.maven.org/maven2/", } _expected_pom_metadata = ( """ @@ -255,6 +258,7 @@ "gid": "al/aldi", "aid": "sprova4j", "version": "0.1.0", + "base_url": "https://repo1.maven.org/maven2/", } ], ) @@ -438,7 +442,7 @@ object_type=ExtendedObjectType.DIRECTORY, object_id=release.target ) metadata_authority = MetadataAuthority( - type=MetadataAuthorityType.FORGE, url="https://repo1.maven.org/", + type=MetadataAuthorityType.FORGE, url="https://repo1.maven.org/maven2/", ) expected_metadata = [ @@ -497,7 +501,7 @@ object_type=ExtendedObjectType.DIRECTORY, object_id=release.target ) metadata_authority = MetadataAuthority( - type=MetadataAuthorityType.FORGE, url="https://repo1.maven.org/", + type=MetadataAuthorityType.FORGE, url="https://repo1.maven.org/maven2/", ) expected_metadata = [ diff --git a/swh/loader/package/maven/tests/test_tasks.py b/swh/loader/package/maven/tests/test_tasks.py --- a/swh/loader/package/maven/tests/test_tasks.py +++ b/swh/loader/package/maven/tests/test_tasks.py @@ -12,11 +12,12 @@ "aid": "sprova4j", "filename": "sprova4j-0.1.0.jar", "version": "0.1.0", + "base_url": "https://repo1.maven.org/maven2/al/aldi/sprova4j/0.1.0/", }, ] -def test_tasks_jar_loader( +def test_tasks_maven_loader( mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config ): mock_load = mocker.patch("swh.loader.package.maven.loader.MavenLoader.load") @@ -33,7 +34,7 @@ assert res.result == {"status": "eventful"} -def test_tasks_jar_loader_snapshot_append( +def test_tasks_maven_loader_snapshot_append( mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config ): mock_load = mocker.patch("swh.loader.package.maven.loader.MavenLoader.load")