Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/core/tests/test_loader.py
Show First 20 Lines • Show All 632 Lines • ▼ Show 20 Lines | def test_content_loader_ok_simple(swh_storage, requests_mock_datadir, content_path): | ||||
) | ) | ||||
assert visit_status.snapshot is not None | assert visit_status.snapshot is not None | ||||
result2 = loader.load() | result2 = loader.load() | ||||
assert result2 == {"status": "uneventful"} | assert result2 == {"status": "uneventful"} | ||||
def test_content_loader_hash_mismatch(swh_storage, requests_mock_datadir, content_path): | |||||
"""It should be an eventful visit on a new file, then uneventful""" | |||||
checksums = compute_hashes(content_path, ["sha1", "sha256", "sha512"]) | |||||
erratic_checksums = { | |||||
algo: chksum.replace("a", "e") # alter checksums to fail integrity check | |||||
for algo, chksum in checksums.items() | |||||
} | |||||
origin = Origin(CONTENT_URL) | |||||
loader = ContentLoader( | |||||
swh_storage, | |||||
origin.url, | |||||
checksums=erratic_checksums, | |||||
) | |||||
result = loader.load() | |||||
assert result == {"status": "failed"} | |||||
assert_last_visit_matches(swh_storage, origin.url, status="failed", type="content") | |||||
DIRECTORY_MIRROR = "https://example.org" | DIRECTORY_MIRROR = "https://example.org" | ||||
DIRECTORY_URL = f"{DIRECTORY_MIRROR}/archives/dummy-hello.tar.gz" | DIRECTORY_URL = f"{DIRECTORY_MIRROR}/archives/dummy-hello.tar.gz" | ||||
def test_directory_loader_missing_field(swh_storage): | def test_directory_loader_missing_field(swh_storage): | ||||
"""It should raise if the DirectoryLoader is missing checksums field""" | """It should raise if the DirectoryLoader is missing checksums field""" | ||||
origin = Origin(DIRECTORY_URL) | origin = Origin(DIRECTORY_URL) | ||||
with pytest.raises(TypeError, match="missing"): | with pytest.raises(TypeError, match="missing"): | ||||
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines | _check_load_failure( | ||||
loader, | loader, | ||||
NotFound, | NotFound, | ||||
"Unknown origin", | "Unknown origin", | ||||
status="not_found", | status="not_found", | ||||
origin=unknown_origin, | origin=unknown_origin, | ||||
) | ) | ||||
def test_directory_loader_404_with_integrity_check_failure( | def test_directory_loader_hash_mismatch( | ||||
caplog, swh_storage, requests_mock_datadir, tarball_with_std_hashes | caplog, swh_storage, requests_mock_datadir, tarball_with_std_hashes | ||||
): | ): | ||||
"""It should not ingest tarball with mismatched checksum""" | """It should not ingest tarball with mismatched checksum""" | ||||
tarball_path, checksums = tarball_with_std_hashes | tarball_path, checksums = tarball_with_std_hashes | ||||
origin = Origin(DIRECTORY_URL) | origin = Origin(DIRECTORY_URL) | ||||
erratic_checksums = { | erratic_checksums = { | ||||
algo: chksum.replace("a", "e") # alter checksums to fail integrity check | algo: chksum.replace("a", "e") # alter checksums to fail integrity check | ||||
for algo, chksum in checksums.items() | for algo, chksum in checksums.items() | ||||
} | } | ||||
loader = DirectoryLoader( | loader = DirectoryLoader( | ||||
swh_storage, | swh_storage, | ||||
origin.url, | origin.url, | ||||
checksums=erratic_checksums, # making the integrity check fail | checksums=erratic_checksums, # making the integrity check fail | ||||
) | ) | ||||
result = loader.load() | result = loader.load() | ||||
assert result == {"status": "uneventful"} | assert result == {"status": "failed"} | ||||
_check_load_failure( | _check_load_failure( | ||||
caplog, | caplog, | ||||
loader, | loader, | ||||
NotFound, | ValueError, | ||||
"Unknown origin", | "mismatched", | ||||
status="not_found", | status="failed", | ||||
origin=origin, | |||||
) | |||||
@pytest.mark.skipif(nix_store_missing, reason="requires nix-bin installed (bullseye)") | |||||
def test_directory_loader_hash_mismatch_nar( | |||||
caplog, swh_storage, requests_mock_datadir, tarball_with_nar_hashes | |||||
): | |||||
"""It should not ingest tarball with mismatched checksum""" | |||||
tarball_path, checksums = tarball_with_nar_hashes | |||||
origin = Origin(DIRECTORY_URL) | |||||
erratic_checksums = { | |||||
algo: chksum.replace("a", "e") # alter checksums to fail integrity check | |||||
for algo, chksum in checksums.items() | |||||
} | |||||
loader = DirectoryLoader( | |||||
swh_storage, | |||||
origin.url, | |||||
checksums=erratic_checksums, # making the integrity check fail | |||||
checksums_computation="nar", | |||||
) | |||||
result = loader.load() | |||||
assert result == {"status": "failed"} | |||||
_check_load_failure( | |||||
caplog, | |||||
loader, | |||||
ValueError, | |||||
"mismatched", | |||||
status="failed", | |||||
origin=origin, | origin=origin, | ||||
) | ) | ||||
@pytest.mark.parametrize("checksum_algo", ["sha1", "sha256", "sha512"]) | @pytest.mark.parametrize("checksum_algo", ["sha1", "sha256", "sha512"]) | ||||
def test_directory_loader_ok_with_fallback( | def test_directory_loader_ok_with_fallback( | ||||
caplog, swh_storage, requests_mock_datadir, tarball_with_std_hashes, checksum_algo | caplog, swh_storage, requests_mock_datadir, tarball_with_std_hashes, checksum_algo | ||||
): | ): | ||||
▲ Show 20 Lines • Show All 69 Lines • Show Last 20 Lines |