Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/nixguix/tests/test_nixguix.py
# Copyright (C) 2020-2021 The Software Heritage developers | # Copyright (C) 2020-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import logging | import logging | ||||
import os | import os | ||||
from typing import Dict, List, Optional, Tuple | from typing import Dict, Optional, Tuple | ||||
from unittest.mock import patch | |||||
import attr | |||||
import pytest | import pytest | ||||
from swh.loader.package import __version__ | from swh.loader.package import __version__ | ||||
from swh.loader.package.archive.loader import ArchiveLoader | from swh.loader.package.archive.loader import ArchiveLoader | ||||
from swh.loader.package.nixguix.loader import ( | from swh.loader.package.nixguix.loader import ( | ||||
NixGuixLoader, | NixGuixLoader, | ||||
NixGuixPackageInfo, | |||||
clean_sources, | clean_sources, | ||||
make_pattern_unsupported_file_extension, | make_pattern_unsupported_file_extension, | ||||
parse_sources, | parse_sources, | ||||
retrieve_sources, | retrieve_sources, | ||||
) | ) | ||||
from swh.loader.package.utils import download | from swh.loader.package.utils import download | ||||
from swh.loader.tests import assert_last_visit_matches | from swh.loader.tests import assert_last_visit_matches | ||||
from swh.loader.tests import check_snapshot as check_snapshot_full | from swh.loader.tests import check_snapshot as check_snapshot_full | ||||
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines | for name, branch in snapshot.branches.items(): | ||||
continue # skipping that particular branch (cf. previous comment) | continue # skipping that particular branch (cf. previous comment) | ||||
if branch.target_type == TargetType.REVISION: | if branch.target_type == TargetType.REVISION: | ||||
revision_ids.append(branch.target) | revision_ids.append(branch.target) | ||||
revisions = storage.revision_get(revision_ids) | revisions = storage.revision_get(revision_ids) | ||||
for rev in revisions: | for rev in revisions: | ||||
assert rev is not None | assert rev is not None | ||||
metadata = rev.metadata | metadata = rev.metadata | ||||
assert metadata is not None | assert not metadata | ||||
raw = metadata["extrinsic"]["raw"] | |||||
assert "url" in raw | |||||
assert "integrity" in raw | |||||
def test_retrieve_sources(swh_storage, requests_mock_datadir): | def test_retrieve_sources(swh_storage, requests_mock_datadir): | ||||
j = parse_sources(retrieve_sources(sources_url)) | j = parse_sources(retrieve_sources(sources_url)) | ||||
assert "sources" in j.keys() | assert "sources" in j.keys() | ||||
assert len(j["sources"]) == 2 | assert len(j["sources"]) == 2 | ||||
▲ Show 20 Lines • Show All 356 Lines • ▼ Show 20 Lines | assert { | ||||
"origin_visit": 2, | "origin_visit": 2, | ||||
"release": 0, | "release": 0, | ||||
"revision": 2, | "revision": 2, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 2, | "snapshot": 2, | ||||
} == stats | } == stats | ||||
def test_resolve_revision_from_artifacts(swh_storage, requests_mock_datadir, datadir): | |||||
loader = NixGuixLoader(swh_storage, sources_url) | |||||
known_artifacts = { | |||||
"id1": {"extrinsic": {"raw": {"url": "url1", "integrity": "integrity1"}}}, | |||||
"id2": {"extrinsic": {"raw": {"url": "url2", "integrity": "integrity2"}}}, | |||||
} | |||||
p_info = NixGuixPackageInfo.from_metadata( | |||||
{"url": "url1", "integrity": "integrity1"} | |||||
) | |||||
assert loader.resolve_revision_from_artifacts(known_artifacts, p_info) == "id1" | |||||
p_info = NixGuixPackageInfo.from_metadata( | |||||
{"url": "url3", "integrity": "integrity3"} | |||||
) | |||||
assert loader.resolve_revision_from_artifacts(known_artifacts, p_info) is None | |||||
def test_evaluation_branch(swh_storage, requests_mock_datadir): | def test_evaluation_branch(swh_storage, requests_mock_datadir): | ||||
loader = NixGuixLoader(swh_storage, sources_url) | loader = NixGuixLoader(swh_storage, sources_url) | ||||
res = loader.load() | res = loader.load() | ||||
assert res["status"] == "eventful" | assert res["status"] == "eventful" | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
swh_storage, | swh_storage, | ||||
sources_url, | sources_url, | ||||
▲ Show 20 Lines • Show All 98 Lines • ▼ Show 20 Lines | ): | ||||
expected_snapshot_id = "c419397fd912039825ebdbea378bc6283f006bf5" | expected_snapshot_id = "c419397fd912039825ebdbea378bc6283f006bf5" | ||||
assert actual_load_status["status"] == "eventful" | assert actual_load_status["status"] == "eventful" | ||||
assert actual_load_status["snapshot_id"] == expected_snapshot_id # noqa | assert actual_load_status["snapshot_id"] == expected_snapshot_id # noqa | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
archive_loader.storage, gnu_url, status="full", type="tar" | archive_loader.storage, gnu_url, status="full", type="tar" | ||||
) | ) | ||||
gnu_snapshot: Snapshot = snapshot_get_all_branches( | |||||
archive_loader.storage, hash_to_bytes(expected_snapshot_id) | |||||
) | |||||
first_revision = gnu_snapshot.branches[f"releases/{release}".encode("utf-8")] | |||||
# 2. Then ingest with the nixguix loader which lists the same artifact within its | # 2. Then ingest with the nixguix loader which lists the same artifact within its | ||||
# sources.json | # sources.json | ||||
# ensure test setup is ok | # ensure test setup is ok | ||||
data_sources = os.path.join( | data_sources = os.path.join( | ||||
datadir, "https_nix-community.github.io", "nixpkgs-swh_sources_special.json" | datadir, "https_nix-community.github.io", "nixpkgs-swh_sources_special.json" | ||||
) | ) | ||||
all_sources = json.loads(open(data_sources).read()) | all_sources = json.loads(open(data_sources).read()) | ||||
Show All 11 Lines | ): | ||||
actual_load_status2 = loader.load() | actual_load_status2 = loader.load() | ||||
assert actual_load_status2["status"] == "eventful" | assert actual_load_status2["status"] == "eventful" | ||||
assert_last_visit_matches(swh_storage, sources_url, status="full", type="nixguix") | assert_last_visit_matches(swh_storage, sources_url, status="full", type="nixguix") | ||||
snapshot_id = actual_load_status2["snapshot_id"] | snapshot_id = actual_load_status2["snapshot_id"] | ||||
snapshot = snapshot_get_all_branches(swh_storage, hash_to_bytes(snapshot_id)) | snapshot = snapshot_get_all_branches(swh_storage, hash_to_bytes(snapshot_id)) | ||||
assert snapshot | assert snapshot | ||||
# 3. Then ingest again with the nixguix loader, with a different snapshot | |||||
# and different source | |||||
# simulate a snapshot already seen with a revision with the wrong metadata structure | |||||
# This revision should be skipped, thus making the artifact being ingested again. | |||||
with patch( | |||||
"swh.loader.package.loader.PackageLoader.last_snapshot" | |||||
) as last_snapshot: | |||||
# mutate the snapshot to target a revision with the wrong metadata structure | |||||
# snapshot["branches"][artifact_url.encode("utf-8")] = first_revision | |||||
old_revision = swh_storage.revision_get([first_revision.target])[0] | |||||
# assert that revision is not in the right format | |||||
assert old_revision.metadata["extrinsic"]["raw"].get("integrity", {}) == {} | |||||
# mutate snapshot to create a clash | |||||
snapshot = attr.evolve( | |||||
snapshot, | |||||
branches={ | |||||
**snapshot.branches, | |||||
artifact_url.encode("utf-8"): SnapshotBranch( | |||||
target_type=TargetType.REVISION, | |||||
target=hash_to_bytes(old_revision.id), | |||||
), | |||||
}, | |||||
) | |||||
# modify snapshot to actually change revision metadata structure so we simulate | |||||
# a revision written by somebody else (structure different) | |||||
last_snapshot.return_value = snapshot | |||||
loader = NixGuixLoader(swh_storage, sources_url) | |||||
actual_load_status3 = loader.load() | |||||
assert last_snapshot.called | |||||
assert actual_load_status3["status"] == "eventful" | |||||
assert_last_visit_matches( | |||||
swh_storage, sources_url, status="full", type="nixguix" | |||||
) | |||||
new_snapshot_id = "32ff641e510aceefc3a6d0dcbf208b2854d2e965" | |||||
assert actual_load_status3["snapshot_id"] == new_snapshot_id | |||||
last_snapshot = snapshot_get_all_branches( | |||||
swh_storage, hash_to_bytes(new_snapshot_id) | |||||
) | |||||
new_revision_branch = last_snapshot.branches[artifact_url.encode("utf-8")] | |||||
assert new_revision_branch.target_type == TargetType.REVISION | |||||
new_revision = swh_storage.revision_get([new_revision_branch.target])[0] | |||||
# the new revision has the correct structure, so it got ingested alright by the | |||||
# new run | |||||
assert new_revision.metadata["extrinsic"]["raw"]["integrity"] is not None | |||||
actual_detections: List[Dict] = [] | |||||
for record in caplog.records: | |||||
logtext = record.getMessage() | |||||
if "Unexpected metadata revision structure detected:" in logtext: | |||||
actual_detections.append(record.args["context"]) | |||||
expected_detections = [ | |||||
{"reason": "'integrity'", "known_artifact": old_revision.metadata,}, | |||||
] | |||||
# less calls than there are sources listed in the sources.json; | |||||
# as some of them are skipped using the ExtID from a previous run | |||||
assert len(expected_detections) <= len(all_sources["sources"]) | |||||
assert actual_detections == expected_detections |