Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.0-sources.jar b/swh/loader/package/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.0_sprova4j-0.1.0-sources.jar
similarity index 100%
rename from swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.0-sources.jar
rename to swh/loader/package/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.0_sprova4j-0.1.0-sources.jar
diff --git a/swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.0-sources.jar.sha1 b/swh/loader/package/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.0_sprova4j-0.1.0-sources.jar.sha1
similarity index 100%
rename from swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.0-sources.jar.sha1
rename to swh/loader/package/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.0_sprova4j-0.1.0-sources.jar.sha1
diff --git a/swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.0.pom b/swh/loader/package/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.0_sprova4j-0.1.0.pom
similarity index 100%
rename from swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.0.pom
rename to swh/loader/package/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.0_sprova4j-0.1.0.pom
diff --git a/swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.1-sources.jar b/swh/loader/package/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.1_sprova4j-0.1.1-sources.jar
similarity index 100%
rename from swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.1-sources.jar
rename to swh/loader/package/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.1_sprova4j-0.1.1-sources.jar
diff --git a/swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.1-sources.jar.sha1 b/swh/loader/package/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.1_sprova4j-0.1.1-sources.jar.sha1
similarity index 100%
rename from swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.1-sources.jar.sha1
rename to swh/loader/package/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.1_sprova4j-0.1.1-sources.jar.sha1
diff --git a/swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.1.pom b/swh/loader/package/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.1_sprova4j-0.1.1.pom
similarity index 100%
rename from swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.1.pom
rename to swh/loader/package/maven/tests/data/https_repo1.maven.org/maven2_al_aldi_sprova4j_0.1.1_sprova4j-0.1.1.pom
diff --git a/swh/loader/package/maven/tests/test_maven.py b/swh/loader/package/maven/tests/test_maven.py
index 5bc6db6..acf88fb 100644
--- a/swh/loader/package/maven/tests/test_maven.py
+++ b/swh/loader/package/maven/tests/test_maven.py
@@ -1,499 +1,456 @@
# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
import hashlib
from itertools import chain
import json
import os
-from pathlib import Path
import pytest
+import requests
from swh.core.tarball import uncompress
from swh.loader.package import __version__
from swh.loader.package.maven.loader import MavenLoader, MavenPackageInfo
from swh.loader.package.utils import EMPTY_AUTHOR
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
from swh.model.from_disk import Directory, iter_directory
from swh.model.hashutil import hash_to_bytes
from swh.model.model import (
RawExtrinsicMetadata,
Release,
Snapshot,
SnapshotBranch,
TargetType,
TimestampWithTimezone,
)
from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher
from swh.model.model import ObjectType as ModelObjectType
from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType
from swh.storage.algos.snapshot import snapshot_get_all_branches
REPO_BASE_URL = "https://repo1.maven.org/maven2/"
MVN_ORIGIN_URL = f"{REPO_BASE_URL}al/aldi/sprova4j"
MVN_ARTIFACTS = [
{
"time": "2021-07-12 19:06:59.335000",
"gid": "al.aldi",
"aid": "sprova4j",
"filename": "sprova4j-0.1.0-sources.jar",
"version": "0.1.0",
"base_url": REPO_BASE_URL,
"url": f"{REPO_BASE_URL}al/aldi/sprova4j/0.1.0/sprova4j-0.1.0-sources.jar",
},
{
"time": "2021-07-12 19:37:05.534000",
"gid": "al.aldi",
"aid": "sprova4j",
"filename": "sprova4j-0.1.1-sources.jar",
"version": "0.1.1",
"base_url": REPO_BASE_URL,
"url": f"{REPO_BASE_URL}al/aldi/sprova4j/0.1.1/sprova4j-0.1.1-sources.jar",
},
]
MVN_ARTIFACTS_POM = [
f"{REPO_BASE_URL}al/aldi/sprova4j/0.1.0/sprova4j-0.1.0.pom",
f"{REPO_BASE_URL}al/aldi/sprova4j/0.1.1/sprova4j-0.1.1.pom",
]
REL_MSGS = (
b"Synthetic release for archive at https://repo1.maven.org/maven2/al/aldi/"
b"sprova4j/0.1.0/sprova4j-0.1.0-sources.jar\n",
b"Synthetic release for archive at https://repo1.maven.org/maven2/al/aldi/"
b"sprova4j/0.1.1/sprova4j-0.1.1-sources.jar\n",
)
REL_DATES = (
TimestampWithTimezone.from_datetime(
datetime.datetime(2021, 7, 12, 19, 6, 59, 335000, tzinfo=datetime.timezone.utc)
),
TimestampWithTimezone.from_datetime(
datetime.datetime(2021, 7, 12, 19, 37, 5, 534000, tzinfo=datetime.timezone.utc)
),
)
-@pytest.fixture
-def data_jar_1(datadir):
- content = Path(
- datadir, "https_maven.org", "sprova4j-0.1.0-sources.jar"
- ).read_bytes()
- return content
-
-
-@pytest.fixture
-def data_jar_1_sha1(datadir):
- content = Path(
- datadir, "https_maven.org", "sprova4j-0.1.0-sources.jar.sha1"
- ).read_bytes()
- return content
-
-
-@pytest.fixture
-def data_pom_1(datadir):
- content = Path(datadir, "https_maven.org", "sprova4j-0.1.0.pom").read_bytes()
- return content
-
-
-@pytest.fixture
-def data_jar_2(datadir):
- content = Path(
- datadir, "https_maven.org", "sprova4j-0.1.1-sources.jar"
- ).read_bytes()
- return content
-
-
-@pytest.fixture
-def data_jar_2_sha1(datadir):
- content = Path(
- datadir, "https_maven.org", "sprova4j-0.1.1-sources.jar.sha1"
- ).read_bytes()
- return content
+@pytest.fixture(autouse=True)
+def network_requests_mock(requests_mock_datadir):
+ pass
@pytest.fixture
-def data_pom_2(datadir):
- content = Path(datadir, "https_maven.org", "sprova4j-0.1.1.pom").read_bytes()
- return content
-
+def jar_dirs(tmp_path):
+ jar_1_path = os.path.join(tmp_path, os.path.basename(MVN_ARTIFACTS[0]["url"]))
+ jar_2_path = os.path.join(tmp_path, os.path.basename(MVN_ARTIFACTS[1]["url"]))
-@pytest.fixture
-def jar_dirs(datadir, tmp_path):
- jar_1_path = os.path.join(datadir, "https_maven.org", "sprova4j-0.1.0-sources.jar")
- jar_2_path = os.path.join(datadir, "https_maven.org", "sprova4j-0.1.1-sources.jar")
+ with open(jar_1_path, "wb") as jar_1, open(jar_2_path, "wb") as jar_2:
+ jar_1.write(requests.get(MVN_ARTIFACTS[0]["url"]).content)
+ jar_2.write(requests.get(MVN_ARTIFACTS[1]["url"]).content)
jar_1_extract_path = os.path.join(tmp_path, "jar_1")
jar_2_extract_path = os.path.join(tmp_path, "jar_2")
uncompress(jar_1_path, jar_1_extract_path)
uncompress(jar_2_path, jar_2_extract_path)
jar_1_dir = Directory.from_disk(path=jar_1_extract_path.encode())
jar_2_dir = Directory.from_disk(path=jar_2_extract_path.encode())
return [jar_1_dir, jar_2_dir]
@pytest.fixture
def expected_contents_and_directories(jar_dirs):
jar_1_cnts, _, jar_1_dirs = iter_directory(jar_dirs[0])
jar_2_cnts, _, jar_2_dirs = iter_directory(jar_dirs[1])
contents = {cnt.sha1 for cnt in chain(jar_1_cnts, jar_2_cnts)}
directories = {dir.id for dir in chain(jar_1_dirs, jar_2_dirs)}
return contents, directories
@pytest.fixture
def expected_releases(jar_dirs):
return [
Release(
name=b"0.1.0",
message=REL_MSGS[0],
author=EMPTY_AUTHOR,
date=REL_DATES[0],
target_type=ModelObjectType.DIRECTORY,
target=jar_dirs[0].hash,
synthetic=True,
metadata=None,
),
Release(
name=b"0.1.1",
message=REL_MSGS[1],
author=EMPTY_AUTHOR,
date=REL_DATES[1],
target_type=ModelObjectType.DIRECTORY,
target=jar_dirs[1].hash,
synthetic=True,
metadata=None,
),
]
@pytest.fixture
def expected_snapshot(expected_releases):
return Snapshot(
branches={
b"HEAD": SnapshotBranch(
target_type=TargetType.ALIAS,
target=b"releases/0.1.1",
),
b"releases/0.1.0": SnapshotBranch(
target_type=TargetType.RELEASE,
target=expected_releases[0].id,
),
b"releases/0.1.1": SnapshotBranch(
target_type=TargetType.RELEASE,
target=expected_releases[1].id,
),
},
)
@pytest.fixture
def expected_json_metadata():
return MVN_ARTIFACTS
@pytest.fixture
-def expected_pom_metadata(data_pom_1, data_pom_2):
- return [data_pom_1, data_pom_2]
-
-
-@pytest.fixture(autouse=True)
-def network_requests_mock(
- requests_mock,
- data_jar_1,
- data_jar_1_sha1,
- data_pom_1,
- data_jar_2,
- data_jar_2_sha1,
- data_pom_2,
-):
- requests_mock.get(MVN_ARTIFACTS[0]["url"], content=data_jar_1)
- requests_mock.get(MVN_ARTIFACTS[0]["url"] + ".sha1", content=data_jar_1_sha1)
- requests_mock.get(MVN_ARTIFACTS_POM[0], content=data_pom_1)
- requests_mock.get(MVN_ARTIFACTS[1]["url"], content=data_jar_2)
- requests_mock.get(MVN_ARTIFACTS[1]["url"] + ".sha1", content=data_jar_2_sha1)
- requests_mock.get(MVN_ARTIFACTS_POM[1], content=data_pom_2)
+def expected_pom_metadata():
+ return [requests.get(pom_url).content for pom_url in MVN_ARTIFACTS_POM]
-def test_maven_loader_visit_with_no_artifact_found(swh_storage, requests_mock_datadir):
+def test_maven_loader_visit_with_no_artifact_found(swh_storage):
origin_url = "https://ftp.g.o/unknown"
unknown_artifact_url = "https://ftp.g.o/unknown/8sync-0.1.0.tar.gz"
loader = MavenLoader(
swh_storage,
origin_url,
artifacts=[
{
"time": "2021-07-18 08:05:05.187000",
"url": unknown_artifact_url, # unknown artifact
"filename": "8sync-0.1.0.tar.gz",
"gid": "al/aldi",
"aid": "sprova4j",
"version": "0.1.0",
"base_url": "https://repo1.maven.org/maven2/",
}
],
)
actual_load_status = loader.load()
assert actual_load_status["status"] == "uneventful"
assert actual_load_status["snapshot_id"] is not None
expected_snapshot_id = "1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"
assert actual_load_status["snapshot_id"] == expected_snapshot_id
stats = get_stats(swh_storage)
assert_last_visit_matches(swh_storage, origin_url, status="partial", type="maven")
assert {
"content": 0,
"directory": 0,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 0,
"skipped_content": 0,
"snapshot": 1,
} == stats
-def test_maven_loader_jar_visit_inconsistent_base_url(
- swh_storage, requests_mock, data_jar_1, data_pom_1
-):
+def test_maven_loader_jar_visit_inconsistent_base_url(swh_storage):
"""With no prior visit, loading a jar ends up with 1 snapshot"""
with pytest.raises(ValueError, match="more than one Maven instance"):
MavenLoader(
swh_storage,
MVN_ORIGIN_URL,
artifacts=[
MVN_ARTIFACTS[0],
{**MVN_ARTIFACTS[1], "base_url": "http://maven.example/"},
],
)
def test_maven_loader_first_visit(
- swh_storage, expected_contents_and_directories, expected_snapshot, expected_releases
+ swh_storage,
+ expected_contents_and_directories,
+ expected_snapshot,
+ expected_releases,
):
"""With no prior visit, loading a jar ends up with 1 snapshot"""
loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS)
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
actual_snapshot = snapshot_get_all_branches(
swh_storage, hash_to_bytes(actual_load_status["snapshot_id"])
)
assert actual_load_status["snapshot_id"] == expected_snapshot.id.hex()
check_snapshot(expected_snapshot, swh_storage)
stats = get_stats(swh_storage)
assert_last_visit_matches(swh_storage, MVN_ORIGIN_URL, status="full", type="maven")
expected_contents, expected_directories = expected_contents_and_directories
assert list(swh_storage.content_missing_per_sha1(expected_contents)) == []
assert list(swh_storage.directory_missing(expected_directories)) == []
rel_id = actual_snapshot.branches[b"releases/0.1.0"].target
rel2_id = actual_snapshot.branches[b"releases/0.1.1"].target
releases = swh_storage.release_get([rel_id, rel2_id])
assert releases == expected_releases
assert {
"content": len(expected_contents),
"directory": len(expected_directories),
"origin": 1,
"origin_visit": 1,
"release": 2,
"revision": 0,
"skipped_content": 0,
"snapshot": 1,
} == stats
def test_maven_loader_2_visits_without_change(
swh_storage, requests_mock, expected_snapshot
):
"""With no prior visit, load a maven project ends up with 1 snapshot"""
+ # reset requests history as some are sent by fixtures
+ requests_mock.reset_mock()
+
loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS)
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
assert actual_load_status["snapshot_id"] == expected_snapshot.id.hex()
assert_last_visit_matches(swh_storage, MVN_ORIGIN_URL, status="full", type="maven")
actual_load_status2 = loader.load()
assert actual_load_status2["status"] == "uneventful"
assert actual_load_status2["snapshot_id"] is not None
assert actual_load_status["snapshot_id"] == actual_load_status2["snapshot_id"]
assert_last_visit_matches(swh_storage, MVN_ORIGIN_URL, status="full", type="maven")
# Make sure we have only one entry in history for the pom fetch, one for
# the actual download of jar, and that they're correct.
urls_history = [str(req.url) for req in list(requests_mock.request_history)]
assert urls_history == [
MVN_ARTIFACTS[0]["url"] + ".sha1",
MVN_ARTIFACTS[1]["url"] + ".sha1",
MVN_ARTIFACTS[0]["url"],
MVN_ARTIFACTS_POM[0],
MVN_ARTIFACTS[1]["url"],
MVN_ARTIFACTS_POM[1],
MVN_ARTIFACTS[0]["url"] + ".sha1",
MVN_ARTIFACTS[1]["url"] + ".sha1",
]
def test_maven_loader_extrinsic_metadata(
- swh_storage, expected_releases, expected_json_metadata, expected_pom_metadata
+ swh_storage,
+ expected_releases,
+ expected_json_metadata,
+ expected_pom_metadata,
):
"""With no prior visit, loading a jar ends up with 1 snapshot.
Extrinsic metadata is the pom file associated to the source jar.
"""
loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS)
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
for i, expected_release in enumerate(expected_releases):
expected_release_id = expected_release.id
release = swh_storage.release_get([expected_release_id])[0]
assert release is not None
release_swhid = CoreSWHID(
object_type=ObjectType.RELEASE, object_id=expected_release_id
)
directory_swhid = ExtendedSWHID(
object_type=ExtendedObjectType.DIRECTORY, object_id=release.target
)
metadata_authority = MetadataAuthority(
type=MetadataAuthorityType.FORGE,
url=REPO_BASE_URL,
)
expected_metadata = [
RawExtrinsicMetadata(
target=directory_swhid,
authority=metadata_authority,
fetcher=MetadataFetcher(
name="swh.loader.package.maven.loader.MavenLoader",
version=__version__,
),
discovery_date=loader.visit_date,
format="maven-pom",
metadata=expected_pom_metadata[i],
origin=MVN_ORIGIN_URL,
release=release_swhid,
),
RawExtrinsicMetadata(
target=directory_swhid,
authority=metadata_authority,
fetcher=MetadataFetcher(
name="swh.loader.package.maven.loader.MavenLoader",
version=__version__,
),
discovery_date=loader.visit_date,
format="maven-json",
metadata=json.dumps(expected_json_metadata[i]).encode(),
origin=MVN_ORIGIN_URL,
release=release_swhid,
),
]
res = swh_storage.raw_extrinsic_metadata_get(
directory_swhid, metadata_authority
)
assert res.next_page_token is None
assert set(res.results) == set(expected_metadata)
def test_maven_loader_extrinsic_metadata_no_pom(
- swh_storage, requests_mock, expected_releases, expected_json_metadata
+ swh_storage,
+ requests_mock,
+ expected_releases,
+ expected_json_metadata,
):
"""With no prior visit, loading a jar ends up with 1 snapshot.
Extrinsic metadata is None if the pom file cannot be retrieved.
"""
requests_mock.get(MVN_ARTIFACTS_POM[0], status_code="404")
loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS)
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
expected_release_id = expected_releases[0].id
release = swh_storage.release_get([expected_release_id])[0]
assert release is not None
release_swhid = CoreSWHID(
object_type=ObjectType.RELEASE, object_id=expected_release_id
)
directory_swhid = ExtendedSWHID(
object_type=ExtendedObjectType.DIRECTORY, object_id=release.target
)
metadata_authority = MetadataAuthority(
type=MetadataAuthorityType.FORGE,
url=REPO_BASE_URL,
)
expected_metadata = [
RawExtrinsicMetadata(
target=directory_swhid,
authority=metadata_authority,
fetcher=MetadataFetcher(
name="swh.loader.package.maven.loader.MavenLoader",
version=__version__,
),
discovery_date=loader.visit_date,
format="maven-pom",
metadata=b"",
origin=MVN_ORIGIN_URL,
release=release_swhid,
),
RawExtrinsicMetadata(
target=directory_swhid,
authority=metadata_authority,
fetcher=MetadataFetcher(
name="swh.loader.package.maven.loader.MavenLoader",
version=__version__,
),
discovery_date=loader.visit_date,
format="maven-json",
metadata=json.dumps(expected_json_metadata[0]).encode(),
origin=MVN_ORIGIN_URL,
release=release_swhid,
),
]
res = swh_storage.raw_extrinsic_metadata_get(directory_swhid, metadata_authority)
assert res.next_page_token is None
assert set(res.results) == set(expected_metadata)
def test_maven_loader_jar_extid():
"""Compute primary key should return the right identity"""
metadata = MVN_ARTIFACTS[0]
p_info = MavenPackageInfo(**metadata)
expected_manifest = "{gid} {aid} {version} {url} {time}".format(**metadata).encode()
actual_id = p_info.extid()
assert actual_id == (
"maven-jar",
0,
hashlib.sha256(expected_manifest).digest(),
)

File Metadata

Mime Type
text/x-diff
Expires
Fri, Jul 4, 12:04 PM (2 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3266304

Event Timeline