Page MenuHomeSoftware Heritage

D6771.id.diff
No OneTemporary

D6771.id.diff

diff --git a/swh/loader/package/maven/loader.py b/swh/loader/package/maven/loader.py
--- a/swh/loader/package/maven/loader.py
+++ b/swh/loader/package/maven/loader.py
@@ -9,7 +9,6 @@
from os import path
import string
from typing import Iterator, List, Optional, Sequence, Tuple
-from urllib.parse import urlparse
import attr
import iso8601
@@ -58,6 +57,9 @@
version: str
"""artifact's version"""
+ base_url: str
+ """root URL of the Maven instance"""
+
@attr.s
class MavenPackageInfo(BasePackageInfo):
@@ -69,6 +71,8 @@
"""Artifact ID of the maven artifact"""
version = attr.ib(type=str)
"""Version of the maven artifact"""
+ base_url = attr.ib(type=str)
+ """Root URL of the Maven instance"""
# default format for maven artifacts
MANIFEST_FORMAT = string.Template("$gid $aid $version $url $time")
@@ -87,6 +91,7 @@
gid=a_metadata["gid"],
aid=a_metadata["aid"],
version=a_metadata["version"],
+ base_url=a_metadata["base_url"],
directory_extrinsic_metadata=[
RawExtrinsicMetadataCore(
format="maven-json", metadata=json.dumps(a_metadata).encode(),
@@ -126,6 +131,20 @@
jar["version"]: jar for jar in artifacts if jar["version"]
}
+ if artifacts:
+ base_urls = {jar["base_url"] for jar in artifacts}
+ try:
+ (self.base_url,) = base_urls
+ except ValueError:
+ raise ValueError(
+ "Artifacts originate from more than one Maven instance: "
+ + ", ".join(base_urls)
+ ) from None
+ else:
+ # There is no artifact, so self.metadata_authority won't be called,
+ # so self.base_url won't be accessed.
+ pass
+
def get_versions(self) -> Sequence[str]:
return list(self.version_artifact)
@@ -134,12 +153,7 @@
return self.artifacts[-1]["version"]
def get_metadata_authority(self):
- p_url = urlparse(self.url)
- return MetadataAuthority(
- type=MetadataAuthorityType.FORGE,
- url=f"{p_url.scheme}://{p_url.netloc}/",
- metadata={},
- )
+ return MetadataAuthority(type=MetadataAuthorityType.FORGE, url=self.base_url)
def build_extrinsic_directory_metadata(
self, p_info: MavenPackageInfo, release_id: Sha1Git, directory_id: Sha1Git,
@@ -172,7 +186,6 @@
self, p_info: MavenPackageInfo, uncompressed_path: str, directory: Sha1Git
) -> Optional[Release]:
msg = f"Synthetic release for archive at {p_info.url}\n".encode("utf-8")
- # time is an iso8601 date
normalized_time = TimestampWithTimezone.from_datetime(p_info.time)
return Release(
name=p_info.version.encode(),
diff --git a/swh/loader/package/maven/tests/test_maven.py b/swh/loader/package/maven/tests/test_maven.py
--- a/swh/loader/package/maven/tests/test_maven.py
+++ b/swh/loader/package/maven/tests/test_maven.py
@@ -38,6 +38,7 @@
"aid": "sprova4j",
"filename": "sprova4j-0.1.0-sources.jar",
"version": "0.1.0",
+ "base_url": "https://repo1.maven.org/maven2/",
},
{
"time": "2021-07-12 19:37:05.534000",
@@ -47,6 +48,7 @@
"aid": "sprova4j",
"filename": "sprova4j-0.1.1-sources.jar",
"version": "0.1.1",
+ "base_url": "https://repo1.maven.org/maven2/",
},
]
@@ -94,6 +96,7 @@
"aid": "sprova4j",
"filename": "sprova4j-0.1.0-sources.jar",
"version": "0.1.0",
+ "base_url": "https://repo1.maven.org/maven2/",
}
_expected_pom_metadata = (
"""<?xml version="1.0" encoding="UTF-8"?>
@@ -255,6 +258,7 @@
"gid": "al/aldi",
"aid": "sprova4j",
"version": "0.1.0",
+ "base_url": "https://repo1.maven.org/maven2/",
}
],
)
@@ -284,6 +288,23 @@
} == stats
+def test_jar_visit_inconsistent_base_url(
+ swh_storage, requests_mock, data_jar_1, data_pom_1
+):
+ """With no prior visit, loading a jar ends up with 1 snapshot
+
+ """
+ with pytest.raises(ValueError, match="more than one Maven instance"):
+ MavenLoader(
+ swh_storage,
+ MVN_ARTIFACTS[0]["url"],
+ artifacts=[
+ MVN_ARTIFACTS[0],
+ {**MVN_ARTIFACTS[1], "base_url": "http://maven.example/"},
+ ],
+ )
+
+
def test_jar_visit_with_release_artifact_no_prior_visit(
swh_storage, requests_mock, data_jar_1, data_pom_1
):
@@ -438,7 +459,7 @@
object_type=ExtendedObjectType.DIRECTORY, object_id=release.target
)
metadata_authority = MetadataAuthority(
- type=MetadataAuthorityType.FORGE, url="https://repo1.maven.org/",
+ type=MetadataAuthorityType.FORGE, url="https://repo1.maven.org/maven2/",
)
expected_metadata = [
@@ -497,7 +518,7 @@
object_type=ExtendedObjectType.DIRECTORY, object_id=release.target
)
metadata_authority = MetadataAuthority(
- type=MetadataAuthorityType.FORGE, url="https://repo1.maven.org/",
+ type=MetadataAuthorityType.FORGE, url="https://repo1.maven.org/maven2/",
)
expected_metadata = [
diff --git a/swh/loader/package/maven/tests/test_tasks.py b/swh/loader/package/maven/tests/test_tasks.py
--- a/swh/loader/package/maven/tests/test_tasks.py
+++ b/swh/loader/package/maven/tests/test_tasks.py
@@ -12,11 +12,12 @@
"aid": "sprova4j",
"filename": "sprova4j-0.1.0.jar",
"version": "0.1.0",
+ "base_url": "https://repo1.maven.org/maven2/",
},
]
-def test_tasks_jar_loader(
+def test_tasks_maven_loader(
mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
):
mock_load = mocker.patch("swh.loader.package.maven.loader.MavenLoader.load")
@@ -33,7 +34,7 @@
assert res.result == {"status": "eventful"}
-def test_tasks_jar_loader_snapshot_append(
+def test_tasks_maven_loader_snapshot_append(
mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
):
mock_load = mocker.patch("swh.loader.package.maven.loader.MavenLoader.load")

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 23, 1:48 AM (19 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223815

Event Timeline