Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/maven/tests/test_lister.py
Show All 12 Lines | |||||
MVN_URL = "https://repo1.maven.org/maven2/" # main maven repo url | MVN_URL = "https://repo1.maven.org/maven2/" # main maven repo url | ||||
INDEX_URL = "http://indexes/export.fld" # index directory url | INDEX_URL = "http://indexes/export.fld" # index directory url | ||||
URL_POM_1 = MVN_URL + "al/aldi/sprova4j/0.1.0/sprova4j-0.1.0.pom" | URL_POM_1 = MVN_URL + "al/aldi/sprova4j/0.1.0/sprova4j-0.1.0.pom" | ||||
URL_POM_2 = MVN_URL + "al/aldi/sprova4j/0.1.1/sprova4j-0.1.1.pom" | URL_POM_2 = MVN_URL + "al/aldi/sprova4j/0.1.1/sprova4j-0.1.1.pom" | ||||
URL_POM_3 = MVN_URL + "com/arangodb/arangodb-graphql/1.2/arangodb-graphql-1.2.pom" | URL_POM_3 = MVN_URL + "com/arangodb/arangodb-graphql/1.2/arangodb-graphql-1.2.pom" | ||||
LIST_GIT = ( | |||||
"git://github.com/aldialimucaj/sprova4j.git", | |||||
"https://github.com/aldialimucaj/sprova4j.git", | |||||
) | |||||
LIST_GIT_INCR = ("git://github.com/ArangoDB-Community/arangodb-graphql-java.git",) | USER_REPO0 = "aldialimucaj/sprova4j" | ||||
GIT_REPO_URL0_HTTPS = f"https://github.com/{USER_REPO0}" | |||||
GIT_REPO_URL0_API = f"https://api.github.com/repos/{USER_REPO0}" | |||||
LIST_GIT = (GIT_REPO_URL0_HTTPS,) | |||||
USER_REPO1 = "ArangoDB-Community/arangodb-graphql-java" | |||||
GIT_REPO_URL1_HTTPS = f"https://github.com/{USER_REPO1}" | |||||
GIT_REPO_URL1_GIT = f"git://github.com/{USER_REPO1}.git" | |||||
GIT_REPO_URL1_API = f"https://api.github.com/repos/{USER_REPO1}" | |||||
LIST_GIT_INCR = (GIT_REPO_URL1_HTTPS,) | |||||
LIST_SRC = (MVN_URL + "al/aldi/sprova4j",) | LIST_SRC = (MVN_URL + "al/aldi/sprova4j",) | ||||
LIST_SRC_DATA = ( | LIST_SRC_DATA = ( | ||||
{ | { | ||||
"type": "maven", | "type": "maven", | ||||
"url": "https://repo1.maven.org/maven2/al/aldi/sprova4j" | "url": "https://repo1.maven.org/maven2/al/aldi/sprova4j" | ||||
+ "/0.1.0/sprova4j-0.1.0-sources.jar", | + "/0.1.0/sprova4j-0.1.0-sources.jar", | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | def maven_pom_2(datadir) -> bytes: | ||||
return Path(datadir, "https_maven.org", "sprova4j-0.1.1.pom").read_bytes() | return Path(datadir, "https_maven.org", "sprova4j-0.1.1.pom").read_bytes() | ||||
@pytest.fixture | @pytest.fixture | ||||
def maven_pom_3(datadir) -> bytes: | def maven_pom_3(datadir) -> bytes: | ||||
return Path(datadir, "https_maven.org", "arangodb-graphql-1.2.pom").read_bytes() | return Path(datadir, "https_maven.org", "arangodb-graphql-1.2.pom").read_bytes() | ||||
@pytest.fixture | |||||
def requests_mock(requests_mock): | |||||
"""If github api calls for the configured scm repository, returns its canonical url.""" | |||||
for url_api, url_html in [ | |||||
(GIT_REPO_URL0_API, GIT_REPO_URL0_HTTPS), | |||||
(GIT_REPO_URL1_API, GIT_REPO_URL1_HTTPS), | |||||
]: | |||||
requests_mock.get( | |||||
url_api, | |||||
json={"html_url": url_html}, | |||||
) | |||||
yield requests_mock | |||||
@pytest.fixture(autouse=True) | @pytest.fixture(autouse=True) | ||||
def network_requests_mock( | def network_requests_mock( | ||||
requests_mock, maven_index_full, maven_pom_1, maven_pom_2, maven_pom_3 | requests_mock, maven_index_full, maven_pom_1, maven_pom_2, maven_pom_3 | ||||
): | ): | ||||
requests_mock.get(INDEX_URL, content=maven_index_full) | requests_mock.get(INDEX_URL, content=maven_index_full) | ||||
requests_mock.get(URL_POM_1, content=maven_pom_1) | requests_mock.get(URL_POM_1, content=maven_pom_1) | ||||
requests_mock.get(URL_POM_2, content=maven_pom_2) | requests_mock.get(URL_POM_2, content=maven_pom_2) | ||||
requests_mock.get(URL_POM_3, content=maven_pom_3) | requests_mock.get(URL_POM_3, content=maven_pom_3) | ||||
Show All 16 Lines | def test_maven_full_listing(swh_scheduler): | ||||
# Start test checks. | # Start test checks. | ||||
assert stats.pages == 5 | assert stats.pages == 5 | ||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results | scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results | ||||
origin_urls = [origin.url for origin in scheduler_origins] | origin_urls = [origin.url for origin in scheduler_origins] | ||||
# 3 git origins + 1 maven origin with 2 releases (one per jar) | # 3 git origins + 1 maven origin with 2 releases (one per jar) | ||||
assert len(origin_urls) == 4 | assert len(origin_urls) == 3 | ||||
assert sorted(origin_urls) == sorted(LIST_GIT + LIST_GIT_INCR + LIST_SRC) | assert sorted(origin_urls) == sorted(LIST_GIT + LIST_GIT_INCR + LIST_SRC) | ||||
for origin in scheduler_origins: | for origin in scheduler_origins: | ||||
if origin.visit_type == "maven": | if origin.visit_type == "maven": | ||||
for src in LIST_SRC_DATA: | for src in LIST_SRC_DATA: | ||||
last_update_src = iso8601.parse_date(src["time"]) | last_update_src = iso8601.parse_date(src["time"]) | ||||
assert last_update_src <= origin.last_update | assert last_update_src <= origin.last_update | ||||
assert origin.extra_loader_arguments["artifacts"] == list(LIST_SRC_DATA) | assert origin.extra_loader_arguments["artifacts"] == list(LIST_SRC_DATA) | ||||
Show All 29 Lines | ): | ||||
# Start test checks. | # Start test checks. | ||||
assert stats.pages == 5 | assert stats.pages == 5 | ||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results | scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results | ||||
origin_urls = [origin.url for origin in scheduler_origins] | origin_urls = [origin.url for origin in scheduler_origins] | ||||
# 2 git origins + 1 maven origin with 2 releases (one per jar) | # 2 git origins + 1 maven origin with 2 releases (one per jar) | ||||
assert len(origin_urls) == 3 | assert len(origin_urls) == 3 | ||||
assert sorted(origin_urls) == sorted((LIST_GIT[1],) + LIST_GIT_INCR + LIST_SRC) | assert sorted(origin_urls) == sorted(LIST_GIT + LIST_GIT_INCR + LIST_SRC) | ||||
for origin in scheduler_origins: | for origin in scheduler_origins: | ||||
if origin.visit_type == "maven": | if origin.visit_type == "maven": | ||||
for src in LIST_SRC_DATA: | for src in LIST_SRC_DATA: | ||||
last_update_src = iso8601.parse_date(src["time"]) | last_update_src = iso8601.parse_date(src["time"]) | ||||
assert last_update_src <= origin.last_update | assert last_update_src <= origin.last_update | ||||
assert origin.extra_loader_arguments["artifacts"] == list(LIST_SRC_DATA) | assert origin.extra_loader_arguments["artifacts"] == list(LIST_SRC_DATA) | ||||
Show All 31 Lines | ): | ||||
assert lister.updated | assert lister.updated | ||||
assert stats.pages == 2 | assert stats.pages == 2 | ||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results | scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results | ||||
origin_urls = [origin.url for origin in scheduler_origins] | origin_urls = [origin.url for origin in scheduler_origins] | ||||
# 1 git origins + 1 maven origin with 1 release (one per jar) | # 1 git origins + 1 maven origin with 1 release (one per jar) | ||||
assert len(origin_urls) == 2 | assert len(origin_urls) == 2 | ||||
assert sorted(origin_urls) == sorted((LIST_GIT[0],) + LIST_SRC) | assert sorted(origin_urls) == sorted(LIST_GIT + LIST_SRC) | ||||
for origin in scheduler_origins: | for origin in scheduler_origins: | ||||
if origin.visit_type == "maven": | if origin.visit_type == "maven": | ||||
last_update_src = iso8601.parse_date(LIST_SRC_DATA[0]["time"]) | last_update_src = iso8601.parse_date(LIST_SRC_DATA[0]["time"]) | ||||
assert last_update_src == origin.last_update | assert last_update_src == origin.last_update | ||||
assert origin.extra_loader_arguments["artifacts"] == [LIST_SRC_DATA[0]] | assert origin.extra_loader_arguments["artifacts"] == [LIST_SRC_DATA[0]] | ||||
# Second execution of the lister, incremental mode | # Second execution of the lister, incremental mode | ||||
▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines | ): | ||||
lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL) | lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL) | ||||
# on artifacts though, that raises but continue listing | # on artifacts though, that raises but continue listing | ||||
lister.run() | lister.run() | ||||
# If the maven_index_full step succeeded but not the get_pom step, | # If the maven_index_full step succeeded but not the get_pom step, | ||||
# then we get only one maven-jar origin and one git origin. | # then we get only one maven-jar origin and one git origin. | ||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results | scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results | ||||
assert len(scheduler_origins) == 3 | assert len(scheduler_origins) == 2 | ||||
def test_maven_lister_null_mtime(swh_scheduler, requests_mock, maven_index_null_mtime): | def test_maven_lister_null_mtime(swh_scheduler, requests_mock, maven_index_null_mtime): | ||||
requests_mock.get(INDEX_URL, content=maven_index_null_mtime) | requests_mock.get(INDEX_URL, content=maven_index_null_mtime) | ||||
# Run the lister. | # Run the lister. | ||||
lister = MavenLister( | lister = MavenLister( | ||||
Show All 20 Lines | def test_maven_list_pom_bad_encoding(swh_scheduler, requests_mock, maven_pom_1): | ||||
lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL) | lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL) | ||||
lister.run() | lister.run() | ||||
# If the maven_index_full step succeeded but not the pom parsing step, | # If the maven_index_full step succeeded but not the pom parsing step, | ||||
# then we get only one maven-jar origin and one git origin. | # then we get only one maven-jar origin and one git origin. | ||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results | scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results | ||||
assert len(scheduler_origins) == 3 | assert len(scheduler_origins) == 2 |