Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/maven/tests/test_lister.py
- This file was added.
# Copyright (C) 2021 The Software Heritage developers | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
from pathlib import Path | |||||
import pytest | |||||
import requests | |||||
from swh.lister.maven.lister import MavenLister | |||||
MVN_URL = "https://repo1.maven.org/maven2/" # main maven repo url | |||||
INDEX_URL = "https://indexes/export.fld" # index directory url | |||||
URL_POM_1 = MVN_URL + "al/aldi/sprova4j/0.1.0/sprova4j-0.1.0.pom" | |||||
URL_POM_2 = MVN_URL + "al/aldi/sprova4j/0.1.1/sprova4j-0.1.1.pom" | |||||
LIST_GIT = ( | |||||
"git://github.com/aldialimucaj/sprova4j.git", | |||||
"https://github.com/aldialimucaj/sprova4j.git", | |||||
) | |||||
LIST_SRC = ( | |||||
MVN_URL + "al/aldi/sprova4j/0.1.0/sprova4j-0.1.0-sources.jar", | |||||
MVN_URL + "al/aldi/sprova4j/0.1.1/sprova4j-0.1.1-sources.jar", | |||||
) | |||||
LIST_SRC_DATA = ( | |||||
{ | |||||
"type": "jar", | |||||
"url": "https://repo1.maven.org/maven2/al/aldi/sprova4j" | |||||
+ "/0.1.0/sprova4j-0.1.0-sources.jar", | |||||
"time": 1626109619335, | |||||
"gid": "al.aldi", | |||||
"aid": "sprova4j", | |||||
"version": "0.1.0", | |||||
}, | |||||
{ | |||||
"type": "jar", | |||||
"url": "https://repo1.maven.org/maven2/al/aldi/sprova4j" | |||||
+ "/0.1.1/sprova4j-0.1.1-sources.jar", | |||||
"time": 1626111425534, | |||||
"gid": "al.aldi", | |||||
"aid": "sprova4j", | |||||
"version": "0.1.1", | |||||
}, | |||||
) | |||||
@pytest.fixture | |||||
def maven_index(datadir) -> str: | |||||
text = Path(datadir, "https_indexes", "export.fld").read_text() | |||||
return text | |||||
@pytest.fixture | |||||
def maven_pom_1(datadir) -> str: | |||||
text = Path(datadir, "https_maven.org", "sprova4j-0.1.0.pom").read_text() | |||||
return text | |||||
@pytest.fixture | |||||
def maven_pom_2(datadir) -> str: | |||||
text = Path(datadir, "https_maven.org", "sprova4j-0.1.1.pom").read_text() | |||||
return text | |||||
def test_maven_full_listing( | |||||
swh_scheduler, requests_mock, mocker, maven_index, maven_pom_1, maven_pom_2, | |||||
): | |||||
"""Covers full listing of multiple pages, checking page results and listed | |||||
origins, statelessness.""" | |||||
lister = MavenLister( | |||||
scheduler=swh_scheduler, url=MVN_URL, instance="maven.org", index_url=INDEX_URL, | |||||
) | |||||
# Set up test. | |||||
index_text = maven_index | |||||
requests_mock.get(INDEX_URL, text=index_text) | |||||
requests_mock.get(URL_POM_1, text=maven_pom_1) | |||||
requests_mock.get(URL_POM_2, text=maven_pom_2) | |||||
# Then run the lister. | |||||
stats = lister.run() | |||||
# Start test checks. | |||||
assert stats.pages == 4 | |||||
assert stats.origins == 4 | |||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results | |||||
origin_urls = [origin.url for origin in scheduler_origins] | |||||
assert sorted(origin_urls) == sorted(LIST_GIT + LIST_SRC) | |||||
for origin in scheduler_origins: | |||||
if origin.visit_type == "jar": | |||||
for src in LIST_SRC_DATA: | |||||
if src.get("url") == origin.url: | |||||
artifact = origin.extra_loader_arguments["artifacts"][0] | |||||
assert src.get("time") == artifact["time"] | |||||
assert src.get("gid") == artifact["gid"] | |||||
assert src.get("aid") == artifact["aid"] | |||||
assert src.get("version") == artifact["version"] | |||||
break | |||||
else: | |||||
raise AssertionError | |||||
assert lister.get_state_from_scheduler() is None | |||||
@pytest.mark.parametrize("http_code", [400, 404, 500, 502]) | |||||
def test_maven_list_http_error( | |||||
swh_scheduler, requests_mock, mocker, maven_index, http_code | |||||
): | |||||
"""Test handling of some common HTTP errors: | |||||
- 400: Bad request. | |||||
- 404: Resource no found. | |||||
- 500: Internal server error. | |||||
- 502: Bad gateway ou proxy Error. | |||||
""" | |||||
lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL) | |||||
# Test failure of index retrieval. | |||||
requests_mock.get(INDEX_URL, status_code=http_code) | |||||
with pytest.raises(requests.HTTPError): | |||||
lister.run() | |||||
# Test failure of artefacts retrieval. | |||||
requests_mock.get(INDEX_URL, text=maven_index) | |||||
requests_mock.get(URL_POM_1, status_code=http_code) | |||||
with pytest.raises(requests.HTTPError): | |||||
lister.run() | |||||
# If the maven_index step succeeded but not the get_pom step, | |||||
# then we get only the 2 jar origins (and not the 2 additional | |||||
# src origins). | |||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results | |||||
assert len(scheduler_origins) == 2 |