Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/loader/package/maven/loader.py b/swh/loader/package/maven/loader.py
index cffe05b..f2403d0 100644
--- a/swh/loader/package/maven/loader.py
+++ b/swh/loader/package/maven/loader.py
@@ -1,185 +1,198 @@
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime, timezone
import json
import logging
from os import path
import string
from typing import Iterator, List, Optional, Sequence, Tuple
-from urllib.parse import urlparse
import attr
import iso8601
import requests
from typing_extensions import TypedDict
from swh.loader.package.loader import (
BasePackageInfo,
PackageLoader,
RawExtrinsicMetadataCore,
)
from swh.loader.package.utils import EMPTY_AUTHOR, release_name
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
ObjectType,
RawExtrinsicMetadata,
Release,
Sha1Git,
TimestampWithTimezone,
)
from swh.storage.interface import StorageInterface
logger = logging.getLogger(__name__)
class ArtifactDict(TypedDict):
"""Data about a Maven artifact, passed by the Maven Lister."""
time: str
"""the time of the last update of jar file on the server as an iso8601 date string
"""
url: str
"""the artifact url to retrieve filename"""
filename: Optional[str]
"""optionally, the file's name"""
gid: str
"""artifact's groupId"""
aid: str
"""artifact's artifactId"""
version: str
"""artifact's version"""
+ base_url: str
+ """root URL of the Maven instance"""
+
@attr.s
class MavenPackageInfo(BasePackageInfo):
time = attr.ib(type=datetime)
"""Timestamp of the last update of jar file on the server."""
gid = attr.ib(type=str)
"""Group ID of the maven artifact"""
aid = attr.ib(type=str)
"""Artifact ID of the maven artifact"""
version = attr.ib(type=str)
"""Version of the maven artifact"""
+ base_url = attr.ib(type=str)
+ """Root URL of the Maven instance"""
# default format for maven artifacts
MANIFEST_FORMAT = string.Template("$gid $aid $version $url $time")
EXTID_TYPE = "maven-jar"
EXTID_VERSION = 0
@classmethod
def from_metadata(cls, a_metadata: ArtifactDict) -> "MavenPackageInfo":
url = a_metadata["url"]
time = iso8601.parse_date(a_metadata["time"]).astimezone(tz=timezone.utc)
return cls(
url=url,
filename=a_metadata.get("filename") or path.split(url)[-1],
time=time,
gid=a_metadata["gid"],
aid=a_metadata["aid"],
version=a_metadata["version"],
+ base_url=a_metadata["base_url"],
directory_extrinsic_metadata=[
RawExtrinsicMetadataCore(
format="maven-json", metadata=json.dumps(a_metadata).encode(),
),
],
)
class MavenLoader(PackageLoader[MavenPackageInfo]):
"""Load source code jar origin's artifact files into swh archive
"""
visit_type = "maven"
def __init__(
self,
storage: StorageInterface,
url: str,
artifacts: Sequence[ArtifactDict],
max_content_size: Optional[int] = None,
):
"""Loader constructor.
For now, this is the lister's task output.
There is one, and only one, artefact (jar or zip) per version, as guaranteed by
the Maven coordinates system.
Args:
url: Origin url
artifacts: List of single artifact information
"""
super().__init__(storage=storage, url=url, max_content_size=max_content_size)
self.artifacts = artifacts # assume order is enforced in the lister
self.version_artifact = {
jar["version"]: jar for jar in artifacts if jar["version"]
}
+ if artifacts:
+ base_urls = {jar["base_url"] for jar in artifacts}
+ try:
+ (self.base_url,) = base_urls
+ except ValueError:
+ raise ValueError(
+ "Artifacts originate from more than one Maven instance: "
+ + ", ".join(base_urls)
+ ) from None
+ else:
+ # There is no artifact, so self.metadata_authority won't be called,
+ # so self.base_url won't be accessed.
+ pass
+
def get_versions(self) -> Sequence[str]:
return list(self.version_artifact)
def get_default_version(self) -> str:
# Default version is the last item
return self.artifacts[-1]["version"]
def get_metadata_authority(self):
- p_url = urlparse(self.url)
- return MetadataAuthority(
- type=MetadataAuthorityType.FORGE,
- url=f"{p_url.scheme}://{p_url.netloc}/",
- metadata={},
- )
+ return MetadataAuthority(type=MetadataAuthorityType.FORGE, url=self.base_url)
def build_extrinsic_directory_metadata(
self, p_info: MavenPackageInfo, release_id: Sha1Git, directory_id: Sha1Git,
) -> List[RawExtrinsicMetadata]:
# Rebuild POM URL.
pom_url = path.dirname(p_info.url)
pom_url = f"{pom_url}/{p_info.aid}-{p_info.version}.pom"
r = requests.get(pom_url, allow_redirects=True)
if r.status_code == 200:
metadata_pom = r.content
else:
metadata_pom = b""
p_info.directory_extrinsic_metadata.append(
RawExtrinsicMetadataCore(format="maven-pom", metadata=metadata_pom,)
)
return super().build_extrinsic_directory_metadata(
p_info=p_info, release_id=release_id, directory_id=directory_id,
)
def get_package_info(self, version: str) -> Iterator[Tuple[str, MavenPackageInfo]]:
a_metadata = self.version_artifact[version]
yield release_name(a_metadata["version"]), MavenPackageInfo.from_metadata(
a_metadata
)
def build_release(
self, p_info: MavenPackageInfo, uncompressed_path: str, directory: Sha1Git
) -> Optional[Release]:
msg = f"Synthetic release for archive at {p_info.url}\n".encode("utf-8")
- # time is an iso8601 date
normalized_time = TimestampWithTimezone.from_datetime(p_info.time)
return Release(
name=p_info.version.encode(),
message=msg,
date=normalized_time,
author=EMPTY_AUTHOR,
target=directory,
target_type=ObjectType.DIRECTORY,
synthetic=True,
)
diff --git a/swh/loader/package/maven/tests/test_maven.py b/swh/loader/package/maven/tests/test_maven.py
index ab5cc43..f5fce3a 100644
--- a/swh/loader/package/maven/tests/test_maven.py
+++ b/swh/loader/package/maven/tests/test_maven.py
@@ -1,596 +1,617 @@
# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import hashlib
import json
from pathlib import Path
import pytest
from swh.loader.package import __version__
from swh.loader.package.maven.loader import MavenLoader, MavenPackageInfo
from swh.loader.package.utils import EMPTY_AUTHOR
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
from swh.model.hashutil import hash_to_bytes
from swh.model.model import (
RawExtrinsicMetadata,
Release,
Snapshot,
SnapshotBranch,
TargetType,
Timestamp,
TimestampWithTimezone,
)
from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher
from swh.model.model import ObjectType as ModelObjectType
from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType
from swh.storage.algos.snapshot import snapshot_get_all_branches
URL = "https://repo1.maven.org/maven2/"
MVN_ARTIFACTS = [
{
"time": "2021-07-12 19:06:59.335000",
"url": "https://repo1.maven.org/maven2/al/aldi/sprova4j/0.1.0/"
+ "sprova4j-0.1.0-sources.jar",
"gid": "al.aldi",
"aid": "sprova4j",
"filename": "sprova4j-0.1.0-sources.jar",
"version": "0.1.0",
+ "base_url": "https://repo1.maven.org/maven2/",
},
{
"time": "2021-07-12 19:37:05.534000",
"url": "https://repo1.maven.org/maven2/al/aldi/sprova4j/0.1.1/"
+ "sprova4j-0.1.1-sources.jar",
"gid": "al.aldi",
"aid": "sprova4j",
"filename": "sprova4j-0.1.1-sources.jar",
"version": "0.1.1",
+ "base_url": "https://repo1.maven.org/maven2/",
},
]
MVN_ARTIFACTS_POM = [
"https://repo1.maven.org/maven2/al/aldi/sprova4j/0.1.0/sprova4j-0.1.0.pom",
"https://repo1.maven.org/maven2/al/aldi/sprova4j/0.1.1/sprova4j-0.1.1.pom",
]
_expected_new_contents_first_visit = [
"cd807364cd7730022b3849f90ccf4bababbada84",
"79e33dd52ebdf615e6696ae69add91cb990d81e2",
"8002bd514156f05a0940ae14ef86eb0179cbd510",
"23479553a6ccec30d377dee0496123a65d23fd8c",
"07ffbebb933bc1660e448f07d8196c2b083797f9",
"abf021b581f80035b56153c9aa27195b8d7ebbb8",
"eec70ba80a6862ed2619727663b17eb0d9dfe131",
"81a493dacb44dedf623f29ecf62c0e035bf698de",
"bda85ed0bbecf8cddfea04234bee16f476f64fe4",
"1ec91d561f5bdf59acb417086e04c54ead94e94e",
"d517b423da707fa21378623f35facebff53cb59d",
"3f0f21a764972d79e583908991c893c999613354",
"a2dd4d7dfe6043baf9619081e4e29966989211af",
"f62685cf0c6825a4097c949280b584cf0e16d047",
"56afc1ea60cef6548ce0a34f44e91b0e4b063835",
"cf7c740926e7ebc9ac8978a5c4f0e1e7a0e9e3af",
"86ff828bea1c22ca3d50ed82569b9c59ce2c41a1",
"1d0fa04454d9fec31d8ee3f35b58158ca1e28b15",
"e90239a2c8d9ede61a29671a8b397a743e18fa34",
"ce8851005d084aea089bcd8cf01052f4b234a823",
"2c34ce622aa7fa68d104900840f66671718e6249",
"e6a6fec32dcb3bee93c34fc11b0174a6b0b0ec6d",
"405d3e1be4b658bf26de37f2c90c597b2796b9d7",
"d0d2f5848721e04300e537826ef7d2d6d9441df0",
"399c67e33e38c475fd724d283dd340f6a2e8dc91",
"dea10c1111cc61ac1809fb7e88857e3db054959f",
]
_expected_json_metadata = {
"time": "2021-07-12 19:06:59.335000",
"url": (
"https://repo1.maven.org/maven2/al/aldi/sprova4j/0.1.0/"
"sprova4j-0.1.0-sources.jar"
),
"gid": "al.aldi",
"aid": "sprova4j",
"filename": "sprova4j-0.1.0-sources.jar",
"version": "0.1.0",
+ "base_url": "https://repo1.maven.org/maven2/",
}
_expected_pom_metadata = (
"""<?xml version="1.0" encoding="UTF-8"?>
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 """
'http://maven.apache.org/xsd/maven-4.0.0.xsd" '
'xmlns="http://maven.apache.org/POM/4.0.0" '
"""xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<modelVersion>4.0.0</modelVersion>
<groupId>al.aldi</groupId>
<artifactId>sprova4j</artifactId>
<version>0.1.0</version>
<name>sprova4j</name>
<description>Java client for Sprova Test Management</description>
<url>https://github.com/aldialimucaj/sprova4j</url>
<inceptionYear>2018</inceptionYear>
<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<distribution>repo</distribution>
</license>
</licenses>
<developers>
<developer>
<id>aldi</id>
<name>Aldi Alimucaj</name>
<email>aldi.alimucaj@gmail.com</email>
</developer>
</developers>
<scm>
<connection>scm:git:git://github.com/aldialimucaj/sprova4j.git</connection>
<developerConnection>scm:git:git://github.com/aldialimucaj/sprova4j.git</developerConnection>
<url>https://github.com/aldialimucaj/sprova4j</url>
</scm>
<dependencies>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.2.3</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.3</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>3.10.0</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>com.squareup.okio</groupId>
<artifactId>okio</artifactId>
<version>1.0.0</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.glassfish</groupId>
<artifactId>javax.json</artifactId>
<version>1.1.2</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>javax.json</groupId>
<artifactId>javax.json-api</artifactId>
<version>1.1.2</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>javax.validation</groupId>
<artifactId>validation-api</artifactId>
<version>2.0.1.Final</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>mockwebserver</artifactId>
<version>3.10.0</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
"""
)
_expected_new_directories_first_visit = [
"6c9de41e4cebb91a8368da1d89ae9873bd540ec3",
"c1a2ee97fc47426d0179f94d223405336b5cd075",
"9e1bdca292765a9528af18743bd793b80362c768",
"193a7af634592ef27fb341762806f61e8fb8eab3",
"a297aa21e3dbf138b370be3aae7a852dd403bbbb",
"da84026119ae04022f007d5b3362e98d46d09045",
"75bb915942a9c441ca62aeffc3b634f1ec9ce5e2",
"0851d359283b2ad82b116c8d1b55ab14b1ec219c",
"2bcbb8b723a025ee9a36b719cea229ed38c37e46",
]
_expected_new_release_first_visit = "02e83c29ec094db581f939d2e238d0613a4f59ac"
REL_MSG = (
b"Synthetic release for archive at https://repo1.maven.org/maven2/al/aldi/"
b"sprova4j/0.1.0/sprova4j-0.1.0-sources.jar\n"
)
REVISION_DATE = TimestampWithTimezone(
timestamp=Timestamp(seconds=1626116819, microseconds=335000),
offset=0,
negative_utc=False,
)
@pytest.fixture
def data_jar_1(datadir):
content = Path(
datadir, "https_maven.org", "sprova4j-0.1.0-sources.jar"
).read_bytes()
return content
@pytest.fixture
def data_pom_1(datadir):
content = Path(datadir, "https_maven.org", "sprova4j-0.1.0.pom").read_bytes()
return content
@pytest.fixture
def data_jar_2(datadir):
content = Path(
datadir, "https_maven.org", "sprova4j-0.1.1-sources.jar"
).read_bytes()
return content
@pytest.fixture
def data_pom_2(datadir):
content = Path(datadir, "https_maven.org", "sprova4j-0.1.1.pom").read_bytes()
return content
def test_jar_visit_with_no_artifact_found(swh_storage, requests_mock_datadir):
unknown_artifact_url = "https://ftp.g.o/unknown/8sync-0.1.0.tar.gz"
loader = MavenLoader(
swh_storage,
unknown_artifact_url,
artifacts=[
{
"time": "2021-07-18 08:05:05.187000",
"url": unknown_artifact_url, # unknown artifact
"filename": "8sync-0.1.0.tar.gz",
"gid": "al/aldi",
"aid": "sprova4j",
"version": "0.1.0",
+ "base_url": "https://repo1.maven.org/maven2/",
}
],
)
actual_load_status = loader.load()
assert actual_load_status["status"] == "uneventful"
assert actual_load_status["snapshot_id"] is not None
expected_snapshot_id = "1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"
assert actual_load_status["snapshot_id"] == expected_snapshot_id
stats = get_stats(swh_storage)
assert_last_visit_matches(
swh_storage, unknown_artifact_url, status="partial", type="maven"
)
assert {
"content": 0,
"directory": 0,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 0,
"skipped_content": 0,
"snapshot": 1,
} == stats
+def test_jar_visit_inconsistent_base_url(
+ swh_storage, requests_mock, data_jar_1, data_pom_1
+):
+ """With no prior visit, loading a jar ends up with 1 snapshot
+
+ """
+ with pytest.raises(ValueError, match="more than one Maven instance"):
+ MavenLoader(
+ swh_storage,
+ MVN_ARTIFACTS[0]["url"],
+ artifacts=[
+ MVN_ARTIFACTS[0],
+ {**MVN_ARTIFACTS[1], "base_url": "http://maven.example/"},
+ ],
+ )
+
+
def test_jar_visit_with_release_artifact_no_prior_visit(
swh_storage, requests_mock, data_jar_1, data_pom_1
):
"""With no prior visit, loading a jar ends up with 1 snapshot
"""
requests_mock.get(MVN_ARTIFACTS[0]["url"], content=data_jar_1)
requests_mock.get(MVN_ARTIFACTS_POM[0], content=data_pom_1)
loader = MavenLoader(
swh_storage, MVN_ARTIFACTS[0]["url"], artifacts=[MVN_ARTIFACTS[0]]
)
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
expected_snapshot_first_visit_id = hash_to_bytes(
"c5195b8ebd148649bf094561877964b131ab27e0"
)
expected_snapshot = Snapshot(
id=expected_snapshot_first_visit_id,
branches={
b"HEAD": SnapshotBranch(
target_type=TargetType.ALIAS, target=b"releases/0.1.0",
),
b"releases/0.1.0": SnapshotBranch(
target_type=TargetType.RELEASE,
target=hash_to_bytes(_expected_new_release_first_visit),
),
},
)
actual_snapshot = snapshot_get_all_branches(
swh_storage, hash_to_bytes(actual_load_status["snapshot_id"])
)
assert actual_snapshot == expected_snapshot
check_snapshot(expected_snapshot, swh_storage)
assert (
hash_to_bytes(actual_load_status["snapshot_id"])
== expected_snapshot_first_visit_id
)
stats = get_stats(swh_storage)
assert_last_visit_matches(
swh_storage, MVN_ARTIFACTS[0]["url"], status="full", type="maven"
)
expected_contents = map(hash_to_bytes, _expected_new_contents_first_visit)
assert list(swh_storage.content_missing_per_sha1(expected_contents)) == []
expected_dirs = map(hash_to_bytes, _expected_new_directories_first_visit)
assert list(swh_storage.directory_missing(expected_dirs)) == []
expected_rels = map(hash_to_bytes, {_expected_new_release_first_visit})
assert list(swh_storage.release_missing(expected_rels)) == []
rel_id = actual_snapshot.branches[b"releases/0.1.0"].target
(rel,) = swh_storage.release_get([rel_id])
assert rel == Release(
id=hash_to_bytes(_expected_new_release_first_visit),
name=b"0.1.0",
message=REL_MSG,
author=EMPTY_AUTHOR,
date=REVISION_DATE,
target_type=ModelObjectType.DIRECTORY,
target=hash_to_bytes("6c9de41e4cebb91a8368da1d89ae9873bd540ec3"),
synthetic=True,
metadata=None,
)
assert {
"content": len(_expected_new_contents_first_visit),
"directory": len(_expected_new_directories_first_visit),
"origin": 1,
"origin_visit": 1,
"release": 1,
"revision": 0,
"skipped_content": 0,
"snapshot": 1,
} == stats
def test_jar_2_visits_without_change(
swh_storage, requests_mock_datadir, requests_mock, data_jar_2, data_pom_2
):
"""With no prior visit, load a gnu project ends up with 1 snapshot
"""
requests_mock.get(MVN_ARTIFACTS[1]["url"], content=data_jar_2)
requests_mock.get(MVN_ARTIFACTS_POM[1], content=data_pom_2)
loader = MavenLoader(
swh_storage, MVN_ARTIFACTS[1]["url"], artifacts=[MVN_ARTIFACTS[1]]
)
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
expected_snapshot_first_visit_id = hash_to_bytes(
"91dcacee7a6d2b54f9cab14bc14cb86d22d2ac2b"
)
assert (
hash_to_bytes(actual_load_status["snapshot_id"])
== expected_snapshot_first_visit_id
)
assert_last_visit_matches(
swh_storage, MVN_ARTIFACTS[1]["url"], status="full", type="maven"
)
actual_load_status2 = loader.load()
assert actual_load_status2["status"] == "uneventful"
assert actual_load_status2["snapshot_id"] is not None
assert actual_load_status["snapshot_id"] == actual_load_status2["snapshot_id"]
assert_last_visit_matches(
swh_storage, MVN_ARTIFACTS[1]["url"], status="full", type="maven"
)
# Make sure we have only one entry in history for the pom fetch, one for
# the actual download of jar, and that they're correct.
urls_history = [str(req.url) for req in list(requests_mock_datadir.request_history)]
assert urls_history == [
MVN_ARTIFACTS[1]["url"],
MVN_ARTIFACTS_POM[1],
]
def test_metadatata(swh_storage, requests_mock, data_jar_1, data_pom_1):
"""With no prior visit, loading a jar ends up with 1 snapshot.
Extrinsic metadata is the pom file associated to the source jar.
"""
requests_mock.get(MVN_ARTIFACTS[0]["url"], content=data_jar_1)
requests_mock.get(MVN_ARTIFACTS_POM[0], content=data_pom_1)
loader = MavenLoader(
swh_storage, MVN_ARTIFACTS[0]["url"], artifacts=[MVN_ARTIFACTS[0]]
)
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
expected_release_id = hash_to_bytes(_expected_new_release_first_visit)
release = swh_storage.release_get([expected_release_id])[0]
assert release is not None
release_swhid = CoreSWHID(
object_type=ObjectType.RELEASE, object_id=expected_release_id
)
directory_swhid = ExtendedSWHID(
object_type=ExtendedObjectType.DIRECTORY, object_id=release.target
)
metadata_authority = MetadataAuthority(
- type=MetadataAuthorityType.FORGE, url="https://repo1.maven.org/",
+ type=MetadataAuthorityType.FORGE, url="https://repo1.maven.org/maven2/",
)
expected_metadata = [
RawExtrinsicMetadata(
target=directory_swhid,
authority=metadata_authority,
fetcher=MetadataFetcher(
name="swh.loader.package.maven.loader.MavenLoader", version=__version__,
),
discovery_date=loader.visit_date,
format="maven-pom",
metadata=_expected_pom_metadata.encode(),
origin=MVN_ARTIFACTS[0]["url"],
release=release_swhid,
),
RawExtrinsicMetadata(
target=directory_swhid,
authority=metadata_authority,
fetcher=MetadataFetcher(
name="swh.loader.package.maven.loader.MavenLoader", version=__version__,
),
discovery_date=loader.visit_date,
format="maven-json",
metadata=json.dumps(_expected_json_metadata).encode(),
origin=MVN_ARTIFACTS[0]["url"],
release=release_swhid,
),
]
res = swh_storage.raw_extrinsic_metadata_get(directory_swhid, metadata_authority)
assert res.next_page_token is None
assert set(res.results) == set(expected_metadata)
def test_metadatata_no_pom(swh_storage, requests_mock, data_jar_1):
"""With no prior visit, loading a jar ends up with 1 snapshot.
Extrinsic metadata is None if the pom file cannot be retrieved.
"""
requests_mock.get(MVN_ARTIFACTS[0]["url"], content=data_jar_1)
requests_mock.get(MVN_ARTIFACTS_POM[0], status_code="404")
loader = MavenLoader(
swh_storage, MVN_ARTIFACTS[0]["url"], artifacts=[MVN_ARTIFACTS[0]]
)
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
expected_release_id = hash_to_bytes(_expected_new_release_first_visit)
release = swh_storage.release_get([expected_release_id])[0]
assert release is not None
release_swhid = CoreSWHID(
object_type=ObjectType.RELEASE, object_id=expected_release_id
)
directory_swhid = ExtendedSWHID(
object_type=ExtendedObjectType.DIRECTORY, object_id=release.target
)
metadata_authority = MetadataAuthority(
- type=MetadataAuthorityType.FORGE, url="https://repo1.maven.org/",
+ type=MetadataAuthorityType.FORGE, url="https://repo1.maven.org/maven2/",
)
expected_metadata = [
RawExtrinsicMetadata(
target=directory_swhid,
authority=metadata_authority,
fetcher=MetadataFetcher(
name="swh.loader.package.maven.loader.MavenLoader", version=__version__,
),
discovery_date=loader.visit_date,
format="maven-pom",
metadata=b"",
origin=MVN_ARTIFACTS[0]["url"],
release=release_swhid,
),
RawExtrinsicMetadata(
target=directory_swhid,
authority=metadata_authority,
fetcher=MetadataFetcher(
name="swh.loader.package.maven.loader.MavenLoader", version=__version__,
),
discovery_date=loader.visit_date,
format="maven-json",
metadata=json.dumps(_expected_json_metadata).encode(),
origin=MVN_ARTIFACTS[0]["url"],
release=release_swhid,
),
]
res = swh_storage.raw_extrinsic_metadata_get(directory_swhid, metadata_authority)
assert res.next_page_token is None
assert set(res.results) == set(expected_metadata)
def test_jar_extid():
"""Compute primary key should return the right identity
"""
metadata = MVN_ARTIFACTS[0]
p_info = MavenPackageInfo(**metadata)
expected_manifest = "{gid} {aid} {version} {url} {time}".format(**metadata).encode()
actual_id = p_info.extid()
assert actual_id == ("maven-jar", 0, hashlib.sha256(expected_manifest).digest(),)
def test_jar_snapshot_append(
swh_storage,
requests_mock_datadir,
requests_mock,
data_jar_1,
data_pom_1,
data_jar_2,
data_pom_2,
):
# first loading with a first artifact
artifact1 = MVN_ARTIFACTS[0]
url1 = artifact1["url"]
requests_mock.get(url1, content=data_jar_1)
requests_mock.get(MVN_ARTIFACTS_POM[0], content=data_pom_1)
loader = MavenLoader(swh_storage, url1, [artifact1])
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
assert actual_load_status["snapshot_id"] is not None
assert_last_visit_matches(swh_storage, url1, status="full", type="maven")
# check expected snapshot
snapshot = loader.last_snapshot()
assert len(snapshot.branches) == 2
branch_artifact1_name = f"releases/{artifact1['version']}".encode()
assert b"HEAD" in snapshot.branches
assert branch_artifact1_name in snapshot.branches
assert snapshot.branches[b"HEAD"].target == branch_artifact1_name
# second loading with a second artifact
artifact2 = MVN_ARTIFACTS[1]
url2 = artifact2["url"]
requests_mock.get(url2, content=data_jar_2)
requests_mock.get(MVN_ARTIFACTS_POM[1], content=data_pom_2)
loader = MavenLoader(swh_storage, url2, [artifact2])
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
assert actual_load_status["snapshot_id"] is not None
assert_last_visit_matches(swh_storage, url2, status="full", type="maven")
# check expected snapshot, should contain a new branch and the
# branch for the first artifact
snapshot = loader.last_snapshot()
assert len(snapshot.branches) == 2
branch_artifact2_name = f"releases/{artifact2['version']}".encode()
assert b"HEAD" in snapshot.branches
assert branch_artifact2_name in snapshot.branches
assert branch_artifact1_name not in snapshot.branches
assert snapshot.branches[b"HEAD"].target == branch_artifact2_name
diff --git a/swh/loader/package/maven/tests/test_tasks.py b/swh/loader/package/maven/tests/test_tasks.py
index 1721219..2335af6 100644
--- a/swh/loader/package/maven/tests/test_tasks.py
+++ b/swh/loader/package/maven/tests/test_tasks.py
@@ -1,50 +1,51 @@
# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
MVN_ARTIFACTS = [
{
"time": 1626109619335,
"url": "https://repo1.maven.org/maven2/al/aldi/sprova4j/0.1.0/"
+ "sprova4j-0.1.0.jar",
"gid": "al.aldi",
"aid": "sprova4j",
"filename": "sprova4j-0.1.0.jar",
"version": "0.1.0",
+ "base_url": "https://repo1.maven.org/maven2/",
},
]
-def test_tasks_jar_loader(
+def test_tasks_maven_loader(
mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
):
mock_load = mocker.patch("swh.loader.package.maven.loader.MavenLoader.load")
mock_load.return_value = {"status": "eventful"}
res = swh_scheduler_celery_app.send_task(
"swh.loader.package.maven.tasks.LoadMaven",
kwargs=dict(url=MVN_ARTIFACTS[0]["url"], artifacts=MVN_ARTIFACTS,),
)
assert res
res.wait()
assert res.successful()
assert mock_load.called
assert res.result == {"status": "eventful"}
-def test_tasks_jar_loader_snapshot_append(
+def test_tasks_maven_loader_snapshot_append(
mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
):
mock_load = mocker.patch("swh.loader.package.maven.loader.MavenLoader.load")
mock_load.return_value = {"status": "eventful"}
res = swh_scheduler_celery_app.send_task(
"swh.loader.package.maven.tasks.LoadMaven",
kwargs=dict(url=MVN_ARTIFACTS[0]["url"], artifacts=[]),
)
assert res
res.wait()
assert res.successful()
assert mock_load.called
assert res.result == {"status": "eventful"}

File Metadata

Mime Type
text/x-diff
Expires
Fri, Jul 4, 11:09 AM (3 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3247377

Event Timeline