Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/maven/tests/test_maven.py
# Copyright (C) 2019-2022 The Software Heritage developers | # Copyright (C) 2019-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import hashlib | import hashlib | ||||
from itertools import chain | from itertools import chain | ||||
import json | import json | ||||
import os | import os | ||||
from pathlib import Path | |||||
import pytest | import pytest | ||||
import requests | |||||
from swh.core.tarball import uncompress | from swh.core.tarball import uncompress | ||||
from swh.loader.package import __version__ | from swh.loader.package import __version__ | ||||
from swh.loader.package.maven.loader import MavenLoader, MavenPackageInfo | from swh.loader.package.maven.loader import MavenLoader, MavenPackageInfo | ||||
from swh.loader.package.utils import EMPTY_AUTHOR | from swh.loader.package.utils import EMPTY_AUTHOR | ||||
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | ||||
from swh.model.from_disk import Directory, iter_directory | from swh.model.from_disk import Directory, iter_directory | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines | TimestampWithTimezone.from_datetime( | ||||
datetime.datetime(2021, 7, 12, 19, 6, 59, 335000, tzinfo=datetime.timezone.utc) | datetime.datetime(2021, 7, 12, 19, 6, 59, 335000, tzinfo=datetime.timezone.utc) | ||||
), | ), | ||||
TimestampWithTimezone.from_datetime( | TimestampWithTimezone.from_datetime( | ||||
datetime.datetime(2021, 7, 12, 19, 37, 5, 534000, tzinfo=datetime.timezone.utc) | datetime.datetime(2021, 7, 12, 19, 37, 5, 534000, tzinfo=datetime.timezone.utc) | ||||
), | ), | ||||
) | ) | ||||
@pytest.fixture | @pytest.fixture(autouse=True) | ||||
def data_jar_1(datadir): | def network_requests_mock(requests_mock_datadir): | ||||
content = Path( | pass | ||||
datadir, "https_maven.org", "sprova4j-0.1.0-sources.jar" | |||||
).read_bytes() | |||||
return content | |||||
@pytest.fixture | |||||
def data_jar_1_sha1(datadir): | |||||
content = Path( | |||||
datadir, "https_maven.org", "sprova4j-0.1.0-sources.jar.sha1" | |||||
).read_bytes() | |||||
return content | |||||
@pytest.fixture | |||||
def data_pom_1(datadir): | |||||
content = Path(datadir, "https_maven.org", "sprova4j-0.1.0.pom").read_bytes() | |||||
return content | |||||
@pytest.fixture | |||||
def data_jar_2(datadir): | |||||
content = Path( | |||||
datadir, "https_maven.org", "sprova4j-0.1.1-sources.jar" | |||||
).read_bytes() | |||||
return content | |||||
@pytest.fixture | |||||
def data_jar_2_sha1(datadir): | |||||
content = Path( | |||||
datadir, "https_maven.org", "sprova4j-0.1.1-sources.jar.sha1" | |||||
).read_bytes() | |||||
return content | |||||
@pytest.fixture | @pytest.fixture | ||||
def data_pom_2(datadir): | def jar_dirs(tmp_path): | ||||
content = Path(datadir, "https_maven.org", "sprova4j-0.1.1.pom").read_bytes() | jar_1_path = os.path.join(tmp_path, os.path.basename(MVN_ARTIFACTS[0]["url"])) | ||||
return content | jar_2_path = os.path.join(tmp_path, os.path.basename(MVN_ARTIFACTS[1]["url"])) | ||||
with open(jar_1_path, "wb") as jar_1, open(jar_2_path, "wb") as jar_2: | |||||
@pytest.fixture | jar_1.write(requests.get(MVN_ARTIFACTS[0]["url"]).content) | ||||
def jar_dirs(datadir, tmp_path): | jar_2.write(requests.get(MVN_ARTIFACTS[1]["url"]).content) | ||||
jar_1_path = os.path.join(datadir, "https_maven.org", "sprova4j-0.1.0-sources.jar") | |||||
jar_2_path = os.path.join(datadir, "https_maven.org", "sprova4j-0.1.1-sources.jar") | |||||
jar_1_extract_path = os.path.join(tmp_path, "jar_1") | jar_1_extract_path = os.path.join(tmp_path, "jar_1") | ||||
jar_2_extract_path = os.path.join(tmp_path, "jar_2") | jar_2_extract_path = os.path.join(tmp_path, "jar_2") | ||||
uncompress(jar_1_path, jar_1_extract_path) | uncompress(jar_1_path, jar_1_extract_path) | ||||
uncompress(jar_2_path, jar_2_extract_path) | uncompress(jar_2_path, jar_2_extract_path) | ||||
jar_1_dir = Directory.from_disk(path=jar_1_extract_path.encode()) | jar_1_dir = Directory.from_disk(path=jar_1_extract_path.encode()) | ||||
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines | |||||
@pytest.fixture | @pytest.fixture | ||||
def expected_json_metadata(): | def expected_json_metadata(): | ||||
return MVN_ARTIFACTS | return MVN_ARTIFACTS | ||||
@pytest.fixture | @pytest.fixture | ||||
def expected_pom_metadata(data_pom_1, data_pom_2): | def expected_pom_metadata(): | ||||
return [data_pom_1, data_pom_2] | return [requests.get(pom_url).content for pom_url in MVN_ARTIFACTS_POM] | ||||
@pytest.fixture(autouse=True) | |||||
def network_requests_mock( | |||||
requests_mock, | |||||
data_jar_1, | |||||
data_jar_1_sha1, | |||||
data_pom_1, | |||||
data_jar_2, | |||||
data_jar_2_sha1, | |||||
data_pom_2, | |||||
): | |||||
requests_mock.get(MVN_ARTIFACTS[0]["url"], content=data_jar_1) | |||||
requests_mock.get(MVN_ARTIFACTS[0]["url"] + ".sha1", content=data_jar_1_sha1) | |||||
requests_mock.get(MVN_ARTIFACTS_POM[0], content=data_pom_1) | |||||
requests_mock.get(MVN_ARTIFACTS[1]["url"], content=data_jar_2) | |||||
requests_mock.get(MVN_ARTIFACTS[1]["url"] + ".sha1", content=data_jar_2_sha1) | |||||
requests_mock.get(MVN_ARTIFACTS_POM[1], content=data_pom_2) | |||||
def test_maven_loader_visit_with_no_artifact_found(swh_storage, requests_mock_datadir): | def test_maven_loader_visit_with_no_artifact_found(swh_storage): | ||||
origin_url = "https://ftp.g.o/unknown" | origin_url = "https://ftp.g.o/unknown" | ||||
unknown_artifact_url = "https://ftp.g.o/unknown/8sync-0.1.0.tar.gz" | unknown_artifact_url = "https://ftp.g.o/unknown/8sync-0.1.0.tar.gz" | ||||
loader = MavenLoader( | loader = MavenLoader( | ||||
swh_storage, | swh_storage, | ||||
origin_url, | origin_url, | ||||
artifacts=[ | artifacts=[ | ||||
{ | { | ||||
"time": "2021-07-18 08:05:05.187000", | "time": "2021-07-18 08:05:05.187000", | ||||
Show All 25 Lines | assert { | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": 0, | "release": 0, | ||||
"revision": 0, | "revision": 0, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == stats | } == stats | ||||
def test_maven_loader_jar_visit_inconsistent_base_url( | def test_maven_loader_jar_visit_inconsistent_base_url(swh_storage): | ||||
swh_storage, requests_mock, data_jar_1, data_pom_1 | |||||
): | |||||
"""With no prior visit, loading a jar ends up with 1 snapshot""" | """With no prior visit, loading a jar ends up with 1 snapshot""" | ||||
with pytest.raises(ValueError, match="more than one Maven instance"): | with pytest.raises(ValueError, match="more than one Maven instance"): | ||||
MavenLoader( | MavenLoader( | ||||
swh_storage, | swh_storage, | ||||
MVN_ORIGIN_URL, | MVN_ORIGIN_URL, | ||||
artifacts=[ | artifacts=[ | ||||
MVN_ARTIFACTS[0], | MVN_ARTIFACTS[0], | ||||
{**MVN_ARTIFACTS[1], "base_url": "http://maven.example/"}, | {**MVN_ARTIFACTS[1], "base_url": "http://maven.example/"}, | ||||
], | ], | ||||
) | ) | ||||
def test_maven_loader_first_visit( | def test_maven_loader_first_visit( | ||||
swh_storage, expected_contents_and_directories, expected_snapshot, expected_releases | swh_storage, | ||||
expected_contents_and_directories, | |||||
expected_snapshot, | |||||
expected_releases, | |||||
): | ): | ||||
"""With no prior visit, loading a jar ends up with 1 snapshot""" | """With no prior visit, loading a jar ends up with 1 snapshot""" | ||||
loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS) | loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status["status"] == "eventful" | assert actual_load_status["status"] == "eventful" | ||||
Show All 29 Lines | ): | ||||
} == stats | } == stats | ||||
def test_maven_loader_2_visits_without_change( | def test_maven_loader_2_visits_without_change( | ||||
swh_storage, requests_mock, expected_snapshot | swh_storage, requests_mock, expected_snapshot | ||||
): | ): | ||||
"""With no prior visit, load a maven project ends up with 1 snapshot""" | """With no prior visit, load a maven project ends up with 1 snapshot""" | ||||
# reset requests history as some are sent by fixtures | |||||
requests_mock.reset_mock() | |||||
loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS) | loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status["status"] == "eventful" | assert actual_load_status["status"] == "eventful" | ||||
assert actual_load_status["snapshot_id"] == expected_snapshot.id.hex() | assert actual_load_status["snapshot_id"] == expected_snapshot.id.hex() | ||||
assert_last_visit_matches(swh_storage, MVN_ORIGIN_URL, status="full", type="maven") | assert_last_visit_matches(swh_storage, MVN_ORIGIN_URL, status="full", type="maven") | ||||
Show All 16 Lines | assert urls_history == [ | ||||
MVN_ARTIFACTS[1]["url"], | MVN_ARTIFACTS[1]["url"], | ||||
MVN_ARTIFACTS_POM[1], | MVN_ARTIFACTS_POM[1], | ||||
MVN_ARTIFACTS[0]["url"] + ".sha1", | MVN_ARTIFACTS[0]["url"] + ".sha1", | ||||
MVN_ARTIFACTS[1]["url"] + ".sha1", | MVN_ARTIFACTS[1]["url"] + ".sha1", | ||||
] | ] | ||||
def test_maven_loader_extrinsic_metadata( | def test_maven_loader_extrinsic_metadata( | ||||
swh_storage, expected_releases, expected_json_metadata, expected_pom_metadata | swh_storage, | ||||
expected_releases, | |||||
expected_json_metadata, | |||||
expected_pom_metadata, | |||||
): | ): | ||||
"""With no prior visit, loading a jar ends up with 1 snapshot. | """With no prior visit, loading a jar ends up with 1 snapshot. | ||||
Extrinsic metadata is the pom file associated to the source jar. | Extrinsic metadata is the pom file associated to the source jar. | ||||
""" | """ | ||||
loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS) | loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status["status"] == "eventful" | assert actual_load_status["status"] == "eventful" | ||||
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | for i, expected_release in enumerate(expected_releases): | ||||
res = swh_storage.raw_extrinsic_metadata_get( | res = swh_storage.raw_extrinsic_metadata_get( | ||||
directory_swhid, metadata_authority | directory_swhid, metadata_authority | ||||
) | ) | ||||
assert res.next_page_token is None | assert res.next_page_token is None | ||||
assert set(res.results) == set(expected_metadata) | assert set(res.results) == set(expected_metadata) | ||||
def test_maven_loader_extrinsic_metadata_no_pom( | def test_maven_loader_extrinsic_metadata_no_pom( | ||||
swh_storage, requests_mock, expected_releases, expected_json_metadata | swh_storage, | ||||
requests_mock, | |||||
expected_releases, | |||||
expected_json_metadata, | |||||
): | ): | ||||
"""With no prior visit, loading a jar ends up with 1 snapshot. | """With no prior visit, loading a jar ends up with 1 snapshot. | ||||
Extrinsic metadata is None if the pom file cannot be retrieved. | Extrinsic metadata is None if the pom file cannot be retrieved. | ||||
""" | """ | ||||
requests_mock.get(MVN_ARTIFACTS_POM[0], status_code="404") | requests_mock.get(MVN_ARTIFACTS_POM[0], status_code="404") | ||||
loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS) | loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS) | ||||
▲ Show 20 Lines • Show All 63 Lines • Show Last 20 Lines |