Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/pypi/tests/test_pypi.py
Show All 22 Lines | from swh.loader.package.pypi.loader import ( | ||||
extract_intrinsic_metadata, | extract_intrinsic_metadata, | ||||
artifact_to_revision_id, | artifact_to_revision_id, | ||||
) | ) | ||||
from swh.loader.package.tests.common import ( | from swh.loader.package.tests.common import ( | ||||
check_snapshot, | check_snapshot, | ||||
check_metadata_paths, | check_metadata_paths, | ||||
get_stats, | get_stats, | ||||
) | ) | ||||
from swh.loader.tests.common import assert_last_visit_ok | |||||
def test_author_basic(): | def test_author_basic(): | ||||
data = { | data = { | ||||
"author": "i-am-groot", | "author": "i-am-groot", | ||||
"author_email": "iam@groot.org", | "author_email": "iam@groot.org", | ||||
} | } | ||||
actual_author = author(data) | actual_author = author(data) | ||||
▲ Show 20 Lines • Show All 182 Lines • ▼ Show 20 Lines | assert { | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"person": 0, | "person": 0, | ||||
"release": 0, | "release": 0, | ||||
"revision": 0, | "revision": 0, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == stats | } == stats | ||||
origin_visit = loader.storage.origin_visit_get_latest(url) | assert_last_visit_ok(loader.storage, url, status="partial", type="pypi") | ||||
assert origin_visit["status"] == "partial" | |||||
assert origin_visit["type"] == "pypi" | |||||
# problem during loading: | # problem during loading: | ||||
# {visit: partial, status: uneventful, no snapshot} | # {visit: partial, status: uneventful, no snapshot} | ||||
def test_release_with_traceback(swh_config, requests_mock_datadir): | def test_release_with_traceback(swh_config, requests_mock_datadir): | ||||
url = "https://pypi.org/project/0805nexter" | url = "https://pypi.org/project/0805nexter" | ||||
Show All 15 Lines | ): | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"person": 0, | "person": 0, | ||||
"release": 0, | "release": 0, | ||||
"revision": 0, | "revision": 0, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 0, | "snapshot": 0, | ||||
} == stats | } == stats | ||||
origin_visit = loader.storage.origin_visit_get_latest(url) | assert_last_visit_ok(loader.storage, url, status="partial", type="pypi") | ||||
assert origin_visit["status"] == "partial" | |||||
assert origin_visit["type"] == "pypi" | |||||
# problem during loading: failure early enough in between swh contents... | # problem during loading: failure early enough in between swh contents... | ||||
# some contents (contents, directories, etc...) have been written in storage | # some contents (contents, directories, etc...) have been written in storage | ||||
# {visit: partial, status: eventful, no snapshot} | # {visit: partial, status: eventful, no snapshot} | ||||
# problem during loading: failure late enough we can have snapshots (some | # problem during loading: failure late enough we can have snapshots (some | ||||
# revisions are written in storage already) | # revisions are written in storage already) | ||||
▲ Show 20 Lines • Show All 110 Lines • ▼ Show 20 Lines | def test_visit_with_missing_artifact(swh_config, requests_mock_datadir_missing_one): | ||||
} | } | ||||
expected_snapshot = { | expected_snapshot = { | ||||
"id": expected_snapshot_id, | "id": expected_snapshot_id, | ||||
"branches": expected_branches, | "branches": expected_branches, | ||||
} | } | ||||
check_snapshot(expected_snapshot, storage=loader.storage) | check_snapshot(expected_snapshot, storage=loader.storage) | ||||
origin_visit = loader.storage.origin_visit_get_latest(url) | assert_last_visit_ok(loader.storage, url, status="partial", type="pypi") | ||||
assert origin_visit["status"] == "partial" | |||||
assert origin_visit["type"] == "pypi" | |||||
def test_visit_with_1_release_artifact(swh_config, requests_mock_datadir): | def test_visit_with_1_release_artifact(swh_config, requests_mock_datadir): | ||||
"""With no prior visit, load a pypi project ends up with 1 snapshot | """With no prior visit, load a pypi project ends up with 1 snapshot | ||||
""" | """ | ||||
url = "https://pypi.org/project/0805nexter" | url = "https://pypi.org/project/0805nexter" | ||||
loader = PyPILoader(url) | loader = PyPILoader(url) | ||||
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines | def test_visit_with_1_release_artifact(swh_config, requests_mock_datadir): | ||||
} | } | ||||
expected_snapshot = { | expected_snapshot = { | ||||
"id": expected_snapshot_id, | "id": expected_snapshot_id, | ||||
"branches": expected_branches, | "branches": expected_branches, | ||||
} | } | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(expected_snapshot, loader.storage) | ||||
origin_visit = loader.storage.origin_visit_get_latest(url) | assert_last_visit_ok(loader.storage, url, status="full", type="pypi") | ||||
assert origin_visit["status"] == "full" | |||||
assert origin_visit["type"] == "pypi" | |||||
def test_multiple_visits_with_no_change(swh_config, requests_mock_datadir): | def test_multiple_visits_with_no_change(swh_config, requests_mock_datadir): | ||||
"""Multiple visits with no changes results in 1 same snapshot | """Multiple visits with no changes results in 1 same snapshot | ||||
""" | """ | ||||
url = "https://pypi.org/project/0805nexter" | url = "https://pypi.org/project/0805nexter" | ||||
loader = PyPILoader(url) | loader = PyPILoader(url) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
snapshot_id = "ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a" | snapshot_id = "ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a" | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": snapshot_id, | "snapshot_id": snapshot_id, | ||||
} | } | ||||
assert_last_visit_ok(loader.storage, url, status="full", type="pypi") | |||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert { | assert { | ||||
"content": 6, | "content": 6, | ||||
"directory": 4, | "directory": 4, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
Show All 17 Lines | def test_multiple_visits_with_no_change(swh_config, requests_mock_datadir): | ||||
} | } | ||||
expected_snapshot = { | expected_snapshot = { | ||||
"id": snapshot_id, | "id": snapshot_id, | ||||
"branches": expected_branches, | "branches": expected_branches, | ||||
} | } | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(expected_snapshot, loader.storage) | ||||
origin_visit = loader.storage.origin_visit_get_latest(url) | |||||
assert origin_visit["status"] == "full" | |||||
assert origin_visit["type"] == "pypi" | |||||
actual_load_status2 = loader.load() | actual_load_status2 = loader.load() | ||||
assert actual_load_status2 == { | assert actual_load_status2 == { | ||||
"status": "uneventful", | "status": "uneventful", | ||||
"snapshot_id": actual_load_status2["snapshot_id"], | "snapshot_id": actual_load_status2["snapshot_id"], | ||||
} | } | ||||
visit_status2 = assert_last_visit_ok( | |||||
loader.storage, url, status="full", type="pypi" | |||||
) | |||||
stats2 = get_stats(loader.storage) | stats2 = get_stats(loader.storage) | ||||
expected_stats2 = stats.copy() | expected_stats2 = stats.copy() | ||||
expected_stats2["origin_visit"] = 1 + 1 | expected_stats2["origin_visit"] = 1 + 1 | ||||
assert expected_stats2 == stats2 | assert expected_stats2 == stats2 | ||||
# same snapshot | # same snapshot | ||||
actual_snapshot_id = origin_visit["snapshot"] | actual_snapshot_id = visit_status2.snapshot | ||||
assert actual_snapshot_id == hash_to_bytes(snapshot_id) | assert actual_snapshot_id == hash_to_bytes(snapshot_id) | ||||
def test_incremental_visit(swh_config, requests_mock_datadir_visits): | def test_incremental_visit(swh_config, requests_mock_datadir_visits): | ||||
"""With prior visit, 2nd load will result with a different snapshot | """With prior visit, 2nd load will result with a different snapshot | ||||
""" | """ | ||||
url = "https://pypi.org/project/0805nexter" | url = "https://pypi.org/project/0805nexter" | ||||
loader = PyPILoader(url) | loader = PyPILoader(url) | ||||
visit1_actual_load_status = loader.load() | visit1_actual_load_status = loader.load() | ||||
visit1_stats = get_stats(loader.storage) | visit1_stats = get_stats(loader.storage) | ||||
expected_snapshot_id = "ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a" | expected_snapshot_id = "ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a" | ||||
assert visit1_actual_load_status == { | assert visit1_actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
} | } | ||||
origin_visit1 = loader.storage.origin_visit_get_latest(url) | assert_last_visit_ok(loader.storage, url, status="full", type="pypi") | ||||
assert origin_visit1["status"] == "full" | |||||
assert origin_visit1["type"] == "pypi" | |||||
assert { | assert { | ||||
"content": 6, | "content": 6, | ||||
"directory": 4, | "directory": 4, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"person": 1, | "person": 1, | ||||
"release": 0, | "release": 0, | ||||
Show All 10 Lines | def test_incremental_visit(swh_config, requests_mock_datadir_visits): | ||||
assert visit2_actual_load_status["status"] == "eventful" | assert visit2_actual_load_status["status"] == "eventful" | ||||
expected_snapshot_id2 = "2e5149a7b0725d18231a37b342e9b7c4e121f283" | expected_snapshot_id2 = "2e5149a7b0725d18231a37b342e9b7c4e121f283" | ||||
assert visit2_actual_load_status == { | assert visit2_actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id2, | "snapshot_id": expected_snapshot_id2, | ||||
} | } | ||||
visits = list(loader.storage.origin_visit_get(url)) | assert_last_visit_ok(loader.storage, url, status="full", type="pypi") | ||||
assert len(visits) == 2 | |||||
assert visits[1]["status"] == "full" | |||||
assert visits[1]["type"] == "pypi" | |||||
assert { | assert { | ||||
"content": 6 + 1, # 1 more content | "content": 6 + 1, # 1 more content | ||||
"directory": 4 + 2, # 2 more directories | "directory": 4 + 2, # 2 more directories | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1 + 1, | "origin_visit": 1 + 1, | ||||
"person": 1, | "person": 1, | ||||
"release": 0, | "release": 0, | ||||
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines | def test_incremental_visit(swh_config, requests_mock_datadir_visits): | ||||
} | } | ||||
expected_snapshot = { | expected_snapshot = { | ||||
"id": expected_snapshot_id2, | "id": expected_snapshot_id2, | ||||
"branches": expected_branches, | "branches": expected_branches, | ||||
} | } | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(expected_snapshot, loader.storage) | ||||
origin_visit = loader.storage.origin_visit_get_latest(url) | assert_last_visit_ok(loader.storage, url, status="full", type="pypi") | ||||
assert origin_visit["status"] == "full" | |||||
assert origin_visit["type"] == "pypi" | |||||
urls = [ | urls = [ | ||||
m.url | m.url | ||||
for m in requests_mock_datadir_visits.request_history | for m in requests_mock_datadir_visits.request_history | ||||
if m.url.startswith("https://files.pythonhosted.org") | if m.url.startswith("https://files.pythonhosted.org") | ||||
] | ] | ||||
# visited each artifact once across 2 visits | # visited each artifact once across 2 visits | ||||
assert len(urls) == len(set(urls)) | assert len(urls) == len(set(urls)) | ||||
Show All 36 Lines | def test_visit_1_release_with_2_artifacts(swh_config, requests_mock_datadir): | ||||
} | } | ||||
expected_snapshot = { | expected_snapshot = { | ||||
"id": expected_snapshot_id, | "id": expected_snapshot_id, | ||||
"branches": expected_branches, | "branches": expected_branches, | ||||
} | } | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(expected_snapshot, loader.storage) | ||||
origin_visit = loader.storage.origin_visit_get_latest(url) | assert_last_visit_ok(loader.storage, url, status="full", type="pypi") | ||||
assert origin_visit["status"] == "full" | |||||
assert origin_visit["type"] == "pypi" | |||||
def test_pypi_artifact_to_revision_id_none(): | def test_pypi_artifact_to_revision_id_none(): | ||||
"""Current loader version should stop soon if nothing can be found | """Current loader version should stop soon if nothing can be found | ||||
""" | """ | ||||
artifact_metadata = { | artifact_metadata = { | ||||
"digests": { | "digests": { | ||||
▲ Show 20 Lines • Show All 95 Lines • ▼ Show 20 Lines | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
} | } | ||||
# no branch as one artifact without any intrinsic metadata | # no branch as one artifact without any intrinsic metadata | ||||
expected_snapshot = {"id": expected_snapshot_id, "branches": {}} | expected_snapshot = {"id": expected_snapshot_id, "branches": {}} | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(expected_snapshot, loader.storage) | ||||
origin_visit = loader.storage.origin_visit_get_latest(url) | assert_last_visit_ok(loader.storage, url, status="full", type="pypi") | ||||
assert origin_visit["status"] == "full" | |||||
assert origin_visit["type"] == "pypi" |