swh_storage = <swh.storage.retry.RetryingProxyStorage object at 0x7fdfe9382e10>
datadir = '/var/lib/jenkins/workspace/DLDBASE/tests-on-diff/.tox/py3/lib/python3.7/site-packages/swh/loader/package/nixguix/tests/data'
requests_mock_datadir_visits = <requests_mock.mocker.Mocker object at 0x7fdfe93a0908>
caplog = <_pytest.logging.LogCaptureFixture object at 0x7fdfe93a0a20>
def test_load_nixguix_one_common_artifact_from_other_loader(
swh_storage, datadir, requests_mock_datadir_visits, caplog
):
"""Misformatted revision should be caught and logged, then loading continues
"""
caplog.set_level(logging.ERROR, "swh.loader.package.nixguix.loader")
# 1. first ingest with for example the archive loader
gnu_url = "https://ftp.gnu.org/gnu/8sync/"
release = "0.1.0"
artifact_url = f"https://ftp.gnu.org/gnu/8sync/8sync-{release}.tar.gz"
gnu_artifacts = [
{
"time": 944729610,
"url": artifact_url,
"length": 221837,
"filename": f"8sync-{release}.tar.gz",
"version": release,
}
]
archive_loader = ArchiveLoader(swh_storage, url=gnu_url, artifacts=gnu_artifacts)
actual_load_status = archive_loader.load()
expected_snapshot_id = "c419397fd912039825ebdbea378bc6283f006bf5"
assert actual_load_status["status"] == "eventful"
assert actual_load_status["snapshot_id"] == expected_snapshot_id # noqa
assert_last_visit_matches(
archive_loader.storage, gnu_url, status="full", type="tar"
)
gnu_snapshot: Snapshot = snapshot_get_all_branches(
archive_loader.storage, hash_to_bytes(expected_snapshot_id)
)
first_revision = gnu_snapshot.branches[f"releases/{release}".encode("utf-8")]
# 2. Then ingest with the nixguix loader which lists the same artifact within its
# sources.json
# ensure test setup is ok
data_sources = os.path.join(
datadir, "https_nix-community.github.io", "nixpkgs-swh_sources_special.json"
)
all_sources = json.loads(open(data_sources).read())
found = False
for source in all_sources["sources"]:
if source["urls"][0] == artifact_url:
found = True
assert (
found is True
), f"test setup error: {artifact_url} must be in {data_sources}"
# first visit with a snapshot, ok
sources_url = "https://nix-community.github.io/nixpkgs-swh/sources_special.json"
loader = NixGuixLoader(swh_storage, sources_url)
actual_load_status2 = loader.load()
assert actual_load_status2["status"] == "eventful"
assert_last_visit_matches(swh_storage, sources_url, status="full", type="nixguix")
snapshot_id = actual_load_status2["snapshot_id"]
snapshot = snapshot_get_all_branches(swh_storage, hash_to_bytes(snapshot_id))
assert snapshot
# 3. Then ingest again with the nixguix loader, with a different snapshot
# and different source
# simulate a snapshot already seen with a revision with the wrong metadata structure
# This revision should be skipped, thus making the artifact being ingested again.
with patch(
"swh.loader.package.loader.PackageLoader.last_snapshot"
) as last_snapshot:
# mutate the snapshot to target a revision with the wrong metadata structure
# snapshot["branches"][artifact_url.encode("utf-8")] = first_revision
old_revision = swh_storage.revision_get([first_revision.target])[0]
# assert that revision is not in the right format
assert old_revision.metadata["extrinsic"]["raw"].get("integrity", {}) == {}
# mutate snapshot to create a clash
snapshot = attr.evolve(
snapshot,
branches={
**snapshot.branches,
artifact_url.encode("utf-8"): SnapshotBranch(
target_type=TargetType.REVISION,
target=hash_to_bytes(old_revision.id),
),
},
)
# modify snapshot to actually change revision metadata structure so we simulate
# a revision written by somebody else (structure different)
last_snapshot.return_value = snapshot
loader = NixGuixLoader(swh_storage, sources_url)
actual_load_status3 = loader.load()
assert last_snapshot.called
assert actual_load_status3["status"] == "eventful"
assert_last_visit_matches(
swh_storage, sources_url, status="full", type="nixguix"
)
new_snapshot_id = "32ff641e510aceefc3a6d0dcbf208b2854d2e965"
assert actual_load_status3["snapshot_id"] == new_snapshot_id
last_snapshot = snapshot_get_all_branches(
swh_storage, hash_to_bytes(new_snapshot_id)
)
new_revision_branch = last_snapshot.branches[artifact_url.encode("utf-8")]
assert new_revision_branch.target_type == TargetType.REVISION
new_revision = swh_storage.revision_get([new_revision_branch.target])[0]
# the new revision has the correct structure, so it got ingested alright by the
# new run
assert new_revision.metadata["extrinsic"]["raw"]["integrity"] is not None
actual_detections: List[Dict] = []
for record in caplog.records:
logtext = record.getMessage()
if "Unexpected metadata revision structure detected:" in logtext:
actual_detections.append(record.args["context"])
expected_detections = [
{"reason": "'integrity'", "known_artifact": old_revision.metadata,},
{"reason": "'integrity'", "known_artifact": old_revision.metadata,},
]
# as many calls as there are sources listed in the sources.json
assert len(expected_detections) == len(all_sources["sources"])
> assert actual_detections == expected_detections
E assert [{'known_arti...'integrity'"}] == [{'known_arti...'integrity'"}]
E Right contains one more item: {'known_artifact': ImmutableDict({'extrinsic': {'raw': {'url': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz', 'ti...43ce97e2c', 'sha256': '56859e0f8966931754bbbdf2f7229ea61db26091e7e7b42a73f9a996560a0cd0'}}]}), 'reason': "'integrity'"}
E Full diff:
E [
E {'known_artifact': ImmutableDict({'extrinsic': {'raw': {'url': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz', 'time': 944729610, 'length': 221837, 'version': '0.1.0', 'filename': '8sync-0.1.0.tar.gz'}, 'when': '2021-03-29T12:47:13.611865+00:00', 'provider': 'https://ftp.gnu.org/gnu/8s...
E
E ...Full output truncated (5 lines hidden), use '-vv' to show
.tox/py3/lib/python3.7/site-packages/swh/loader/package/nixguix/tests/test_nixguix.py:706: AssertionError
TEST RESULT
TEST RESULT
- Run At
- Mar 29 2021, 2:47 PM