Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/nixguix/tests/test_nixguix.py
Show All 16 Lines | |||||
from swh.model.model import Snapshot | from swh.model.model import Snapshot | ||||
from swh.loader.package.archive.loader import ArchiveLoader | from swh.loader.package.archive.loader import ArchiveLoader | ||||
from swh.loader.package.nixguix.loader import ( | from swh.loader.package.nixguix.loader import ( | ||||
NixGuixLoader, | NixGuixLoader, | ||||
retrieve_sources, | retrieve_sources, | ||||
clean_sources, | clean_sources, | ||||
) | ) | ||||
from swh.loader.package.tests.common import get_stats, check_snapshot | from swh.loader.package.tests.common import ( | ||||
assert_last_visit_ok, | |||||
get_stats, | |||||
check_snapshot, | |||||
) | |||||
from swh.loader.package.utils import download | from swh.loader.package.utils import download | ||||
from swh.model.hashutil import hash_to_bytes, hash_to_hex | from swh.model.hashutil import hash_to_bytes, hash_to_hex | ||||
from swh.storage.exc import HashCollision | from swh.storage.exc import HashCollision | ||||
sources_url = "https://nix-community.github.io/nixpkgs-swh/sources.json" | sources_url = "https://nix-community.github.io/nixpkgs-swh/sources.json" | ||||
def test_retrieve_sources(swh_config, requests_mock_datadir): | def test_retrieve_sources(swh_config, requests_mock_datadir): | ||||
▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines | assert { | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"person": 1, | "person": 1, | ||||
"release": 0, | "release": 0, | ||||
"revision": 1, | "revision": 1, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == stats | } == stats | ||||
origin_visit = loader.storage.origin_visit_get_latest(sources_url) | |||||
# The visit is partial because urls pointing to non tarball file | # The visit is partial because urls pointing to non tarball file | ||||
# are not handled yet | # are not handled yet | ||||
assert origin_visit["status"] == "partial" | assert_last_visit_ok(loader.storage, sources_url, status="partial", type="nixguix") | ||||
assert origin_visit["type"] == "nixguix" | |||||
def test_uncompress_failure(swh_config, requests_mock_datadir): | def test_uncompress_failure(swh_config, requests_mock_datadir): | ||||
"""Non tarball files are currently not supported and the uncompress | """Non tarball files are currently not supported and the uncompress | ||||
function fails on such kind of files. | function fails on such kind of files. | ||||
However, even in this case of failure (because of the url | However, even in this case of failure (because of the url | ||||
https://example.com/file.txt), a snapshot and a visit has to be | https://example.com/file.txt), a snapshot and a visit has to be | ||||
created (with a status partial since all files are not archived). | created (with a status partial since all files are not archived). | ||||
""" | """ | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(sources_url) | ||||
loader_status = loader.load() | loader_status = loader.load() | ||||
urls = [s["urls"][0] for s in loader.sources] | urls = [s["urls"][0] for s in loader.sources] | ||||
assert "https://example.com/file.txt" in urls | assert "https://example.com/file.txt" in urls | ||||
assert loader_status["status"] == "eventful" | assert loader_status["status"] == "eventful" | ||||
origin_visit = loader.storage.origin_visit_get_latest(sources_url) | |||||
# The visit is partial because urls pointing to non tarball files | # The visit is partial because urls pointing to non tarball files | ||||
# are not handled yet | # are not handled yet | ||||
assert origin_visit["status"] == "partial" | assert_last_visit_ok(loader.storage, sources_url, status="partial", type="nixguix") | ||||
def test_loader_incremental(swh_config, requests_mock_datadir): | def test_loader_incremental(swh_config, requests_mock_datadir): | ||||
"""Ensure a second visit do not download artifact already | """Ensure a second visit do not download artifact already | ||||
downloaded by the previous visit. | downloaded by the previous visit. | ||||
""" | """ | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(sources_url) | ||||
load_status = loader.load() | load_status = loader.load() | ||||
loader.load() | loader.load() | ||||
expected_snapshot_id = "0c5881c74283793ebe9a09a105a9381e41380383" | expected_snapshot_id = "0c5881c74283793ebe9a09a105a9381e41380383" | ||||
assert load_status == {"status": "eventful", "snapshot_id": expected_snapshot_id} | assert load_status == {"status": "eventful", "snapshot_id": expected_snapshot_id} | ||||
assert_last_visit_ok(loader.storage, sources_url, status="partial", type="nixguix") | |||||
expected_branches = { | expected_branches = { | ||||
"evaluation": { | "evaluation": { | ||||
"target": "cc4e04c26672dd74e5fd0fecb78b435fb55368f7", | "target": "cc4e04c26672dd74e5fd0fecb78b435fb55368f7", | ||||
"target_type": "revision", | "target_type": "revision", | ||||
}, | }, | ||||
"https://github.com/owner-1/repository-1/revision-1.tgz": { | "https://github.com/owner-1/repository-1/revision-1.tgz": { | ||||
"target": "488ad4e7b8e2511258725063cf43a2b897c503b4", | "target": "488ad4e7b8e2511258725063cf43a2b897c503b4", | ||||
"target_type": "revision", | "target_type": "revision", | ||||
Show All 26 Lines | def test_loader_two_visits(swh_config, requests_mock_datadir_visits): | ||||
another tarball. | another tarball. | ||||
""" | """ | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(sources_url) | ||||
load_status = loader.load() | load_status = loader.load() | ||||
expected_snapshot_id = "0c5881c74283793ebe9a09a105a9381e41380383" | expected_snapshot_id = "0c5881c74283793ebe9a09a105a9381e41380383" | ||||
assert load_status == {"status": "eventful", "snapshot_id": expected_snapshot_id} | assert load_status == {"status": "eventful", "snapshot_id": expected_snapshot_id} | ||||
assert_last_visit_ok(loader.storage, sources_url, status="partial", type="nixguix") | |||||
expected_branches = { | expected_branches = { | ||||
"evaluation": { | "evaluation": { | ||||
"target": "cc4e04c26672dd74e5fd0fecb78b435fb55368f7", | "target": "cc4e04c26672dd74e5fd0fecb78b435fb55368f7", | ||||
"target_type": "revision", | "target_type": "revision", | ||||
}, | }, | ||||
"https://github.com/owner-1/repository-1/revision-1.tgz": { | "https://github.com/owner-1/repository-1/revision-1.tgz": { | ||||
"target": "488ad4e7b8e2511258725063cf43a2b897c503b4", | "target": "488ad4e7b8e2511258725063cf43a2b897c503b4", | ||||
"target_type": "revision", | "target_type": "revision", | ||||
Show All 20 Lines | assert { | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == stats | } == stats | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(sources_url) | ||||
load_status = loader.load() | load_status = loader.load() | ||||
expected_snapshot_id = "b0bfa75cbd0cc90aac3b9e95fb0f59c731176d97" | expected_snapshot_id = "b0bfa75cbd0cc90aac3b9e95fb0f59c731176d97" | ||||
assert load_status == {"status": "eventful", "snapshot_id": expected_snapshot_id} | assert load_status == {"status": "eventful", "snapshot_id": expected_snapshot_id} | ||||
assert_last_visit_ok(loader.storage, sources_url, status="partial", type="nixguix") | |||||
# This ensures visits are incremental. Indeed, if we request a | # This ensures visits are incremental. Indeed, if we request a | ||||
# second time an url, because of the requests_mock_datadir_visits | # second time an url, because of the requests_mock_datadir_visits | ||||
# fixture, the file has to end with `_visit1`. | # fixture, the file has to end with `_visit1`. | ||||
expected_branches = { | expected_branches = { | ||||
"evaluation": { | "evaluation": { | ||||
"target": "602140776b2ce6c9159bcf52ada73a297c063d5e", | "target": "602140776b2ce6c9159bcf52ada73a297c063d5e", | ||||
"target_type": "revision", | "target_type": "revision", | ||||
}, | }, | ||||
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | def test_resolve_revision_from(swh_config, requests_mock_datadir): | ||||
assert loader.resolve_revision_from(known_artifacts, metadata) == None # noqa | assert loader.resolve_revision_from(known_artifacts, metadata) == None # noqa | ||||
def test_evaluation_branch(swh_config, requests_mock_datadir): | def test_evaluation_branch(swh_config, requests_mock_datadir): | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(sources_url) | ||||
res = loader.load() | res = loader.load() | ||||
assert res["status"] == "eventful" | assert res["status"] == "eventful" | ||||
assert_last_visit_ok(loader.storage, sources_url, status="partial", type="nixguix") | |||||
expected_branches = { | expected_branches = { | ||||
"https://github.com/owner-1/repository-1/revision-1.tgz": { | "https://github.com/owner-1/repository-1/revision-1.tgz": { | ||||
"target": "488ad4e7b8e2511258725063cf43a2b897c503b4", | "target": "488ad4e7b8e2511258725063cf43a2b897c503b4", | ||||
"target_type": "revision", | "target_type": "revision", | ||||
}, | }, | ||||
"evaluation": { | "evaluation": { | ||||
"target": "cc4e04c26672dd74e5fd0fecb78b435fb55368f7", | "target": "cc4e04c26672dd74e5fd0fecb78b435fb55368f7", | ||||
"target_type": "revision", | "target_type": "revision", | ||||
▲ Show 20 Lines • Show All 84 Lines • ▼ Show 20 Lines | expected_snapshot = { | ||||
"branches": expected_branches, | "branches": expected_branches, | ||||
} | } | ||||
snapshot = check_snapshot(expected_snapshot, storage=loader.storage) | snapshot = check_snapshot(expected_snapshot, storage=loader.storage) | ||||
check_snapshot_revisions_ok(snapshot, loader.storage) | check_snapshot_revisions_ok(snapshot, loader.storage) | ||||
assert len(mock_download.mock_calls) == 2 | assert len(mock_download.mock_calls) == 2 | ||||
origin_visit = loader.storage.origin_visit_get_latest(sources_url) | # The visit is partial because some artifact downloads failed | ||||
assert_last_visit_ok(loader.storage, sources_url, status="partial", type="nixguix") | |||||
# The visit is partial because some hash collision were detected | |||||
assert origin_visit["status"] == "partial" | |||||
assert origin_visit["type"] == "nixguix" | |||||
def test_load_nixguix_one_common_artifact_from_other_loader( | def test_load_nixguix_one_common_artifact_from_other_loader( | ||||
swh_config, datadir, requests_mock_datadir_visits, caplog | swh_config, datadir, requests_mock_datadir_visits, caplog | ||||
): | ): | ||||
"""Misformatted revision should be caught and logged, then loading continues | """Misformatted revision should be caught and logged, then loading continues | ||||
""" | """ | ||||
Show All 13 Lines | gnu_artifacts = [ | ||||
} | } | ||||
] | ] | ||||
archive_loader = ArchiveLoader(url=gnu_url, artifacts=gnu_artifacts) | archive_loader = ArchiveLoader(url=gnu_url, artifacts=gnu_artifacts) | ||||
actual_load_status = archive_loader.load() | actual_load_status = archive_loader.load() | ||||
expected_snapshot_id = "c419397fd912039825ebdbea378bc6283f006bf5" | expected_snapshot_id = "c419397fd912039825ebdbea378bc6283f006bf5" | ||||
assert actual_load_status["status"] == "eventful" | assert actual_load_status["status"] == "eventful" | ||||
assert actual_load_status["snapshot_id"] == expected_snapshot_id # noqa | assert actual_load_status["snapshot_id"] == expected_snapshot_id # noqa | ||||
assert_last_visit_ok(archive_loader.storage, gnu_url, status="full", type="tar") | |||||
gnu_snapshot = archive_loader.storage.snapshot_get( | gnu_snapshot = archive_loader.storage.snapshot_get( | ||||
hash_to_bytes(expected_snapshot_id) | hash_to_bytes(expected_snapshot_id) | ||||
) | ) | ||||
first_revision = gnu_snapshot["branches"][f"releases/{release}".encode("utf-8")] | first_revision = gnu_snapshot["branches"][f"releases/{release}".encode("utf-8")] | ||||
# 2. Then ingest with the nixguix loader which lists the same artifact within its | # 2. Then ingest with the nixguix loader which lists the same artifact within its | ||||
# sources.json | # sources.json | ||||
Show All 12 Lines | for source in all_sources["sources"]: | ||||
), f"test setup error: {artifact_url} must be in {data_sources}" | ), f"test setup error: {artifact_url} must be in {data_sources}" | ||||
# first visit with a snapshot, ok | # first visit with a snapshot, ok | ||||
sources_url = "https://nix-community.github.io/nixpkgs-swh/sources_special.json" | sources_url = "https://nix-community.github.io/nixpkgs-swh/sources_special.json" | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(sources_url) | ||||
actual_load_status2 = loader.load() | actual_load_status2 = loader.load() | ||||
assert actual_load_status2["status"] == "eventful" | assert actual_load_status2["status"] == "eventful" | ||||
assert_last_visit_ok(loader.storage, sources_url, status="full", type="nixguix") | |||||
snapshot_id = actual_load_status2["snapshot_id"] | snapshot_id = actual_load_status2["snapshot_id"] | ||||
snapshot = loader.storage.snapshot_get(hash_to_bytes(snapshot_id)) | snapshot = loader.storage.snapshot_get(hash_to_bytes(snapshot_id)) | ||||
snapshot.pop("next_branch") # snapshot_get endpoint detail to drop | snapshot.pop("next_branch") # snapshot_get endpoint detail to drop | ||||
# simulate a snapshot already seen with a revision with the wrong metadata structure | # simulate a snapshot already seen with a revision with the wrong metadata structure | ||||
# This revision should be skipped, thus making the artifact being ingested again. | # This revision should be skipped, thus making the artifact being ingested again. | ||||
with patch( | with patch( | ||||
"swh.loader.package.loader.PackageLoader.last_snapshot" | "swh.loader.package.loader.PackageLoader.last_snapshot" | ||||
Show All 14 Lines | ) as last_snapshot: | ||||
# a revision written by somebody else (structure different) | # a revision written by somebody else (structure different) | ||||
last_snapshot.return_value = Snapshot.from_dict(snapshot) | last_snapshot.return_value = Snapshot.from_dict(snapshot) | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(sources_url) | ||||
actual_load_status3 = loader.load() | actual_load_status3 = loader.load() | ||||
assert last_snapshot.called | assert last_snapshot.called | ||||
assert actual_load_status3["status"] == "eventful" | assert actual_load_status3["status"] == "eventful" | ||||
assert_last_visit_ok(loader.storage, sources_url, status="full", type="nixguix") | |||||
new_snapshot_id = "32ff641e510aceefc3a6d0dcbf208b2854d2e965" | new_snapshot_id = "32ff641e510aceefc3a6d0dcbf208b2854d2e965" | ||||
assert actual_load_status3["snapshot_id"] == new_snapshot_id | assert actual_load_status3["snapshot_id"] == new_snapshot_id | ||||
last_snapshot = loader.storage.snapshot_get(hash_to_bytes(new_snapshot_id)) | last_snapshot = loader.storage.snapshot_get(hash_to_bytes(new_snapshot_id)) | ||||
new_revision_branch = last_snapshot["branches"][artifact_url.encode("utf-8")] | new_revision_branch = last_snapshot["branches"][artifact_url.encode("utf-8")] | ||||
assert new_revision_branch["target_type"] == "revision" | assert new_revision_branch["target_type"] == "revision" | ||||
new_revision = next( | new_revision = next( | ||||
Show All 24 Lines |