Page MenuHomeSoftware Heritage

Jenkins > .tox.py3.lib.python3.7.site-packages.swh.loader.package.nixguix.tests.test_nixguix::test_load_nixguix_one_common_artifact_from_other_loader
Failed

TEST RESULT

Run At
Mar 29 2021, 2:47 PM
Details
swh_storage = <swh.storage.retry.RetryingProxyStorage object at 0x7fdfe9382e10> datadir = '/var/lib/jenkins/workspace/DLDBASE/tests-on-diff/.tox/py3/lib/python3.7/site-packages/swh/loader/package/nixguix/tests/data' requests_mock_datadir_visits = <requests_mock.mocker.Mocker object at 0x7fdfe93a0908> caplog = <_pytest.logging.LogCaptureFixture object at 0x7fdfe93a0a20> def test_load_nixguix_one_common_artifact_from_other_loader( swh_storage, datadir, requests_mock_datadir_visits, caplog ): """Misformatted revision should be caught and logged, then loading continues """ caplog.set_level(logging.ERROR, "swh.loader.package.nixguix.loader") # 1. first ingest with for example the archive loader gnu_url = "https://ftp.gnu.org/gnu/8sync/" release = "0.1.0" artifact_url = f"https://ftp.gnu.org/gnu/8sync/8sync-{release}.tar.gz" gnu_artifacts = [ { "time": 944729610, "url": artifact_url, "length": 221837, "filename": f"8sync-{release}.tar.gz", "version": release, } ] archive_loader = ArchiveLoader(swh_storage, url=gnu_url, artifacts=gnu_artifacts) actual_load_status = archive_loader.load() expected_snapshot_id = "c419397fd912039825ebdbea378bc6283f006bf5" assert actual_load_status["status"] == "eventful" assert actual_load_status["snapshot_id"] == expected_snapshot_id # noqa assert_last_visit_matches( archive_loader.storage, gnu_url, status="full", type="tar" ) gnu_snapshot: Snapshot = snapshot_get_all_branches( archive_loader.storage, hash_to_bytes(expected_snapshot_id) ) first_revision = gnu_snapshot.branches[f"releases/{release}".encode("utf-8")] # 2. Then ingest with the nixguix loader which lists the same artifact within its # sources.json # ensure test setup is ok data_sources = os.path.join( datadir, "https_nix-community.github.io", "nixpkgs-swh_sources_special.json" ) all_sources = json.loads(open(data_sources).read()) found = False for source in all_sources["sources"]: if source["urls"][0] == artifact_url: found = True assert ( found is True ), f"test setup error: {artifact_url} must be in {data_sources}" # first visit with a snapshot, ok sources_url = "https://nix-community.github.io/nixpkgs-swh/sources_special.json" loader = NixGuixLoader(swh_storage, sources_url) actual_load_status2 = loader.load() assert actual_load_status2["status"] == "eventful" assert_last_visit_matches(swh_storage, sources_url, status="full", type="nixguix") snapshot_id = actual_load_status2["snapshot_id"] snapshot = snapshot_get_all_branches(swh_storage, hash_to_bytes(snapshot_id)) assert snapshot # 3. Then ingest again with the nixguix loader, with a different snapshot # and different source # simulate a snapshot already seen with a revision with the wrong metadata structure # This revision should be skipped, thus making the artifact being ingested again. with patch( "swh.loader.package.loader.PackageLoader.last_snapshot" ) as last_snapshot: # mutate the snapshot to target a revision with the wrong metadata structure # snapshot["branches"][artifact_url.encode("utf-8")] = first_revision old_revision = swh_storage.revision_get([first_revision.target])[0] # assert that revision is not in the right format assert old_revision.metadata["extrinsic"]["raw"].get("integrity", {}) == {} # mutate snapshot to create a clash snapshot = attr.evolve( snapshot, branches={ **snapshot.branches, artifact_url.encode("utf-8"): SnapshotBranch( target_type=TargetType.REVISION, target=hash_to_bytes(old_revision.id), ), }, ) # modify snapshot to actually change revision metadata structure so we simulate # a revision written by somebody else (structure different) last_snapshot.return_value = snapshot loader = NixGuixLoader(swh_storage, sources_url) actual_load_status3 = loader.load() assert last_snapshot.called assert actual_load_status3["status"] == "eventful" assert_last_visit_matches( swh_storage, sources_url, status="full", type="nixguix" ) new_snapshot_id = "32ff641e510aceefc3a6d0dcbf208b2854d2e965" assert actual_load_status3["snapshot_id"] == new_snapshot_id last_snapshot = snapshot_get_all_branches( swh_storage, hash_to_bytes(new_snapshot_id) ) new_revision_branch = last_snapshot.branches[artifact_url.encode("utf-8")] assert new_revision_branch.target_type == TargetType.REVISION new_revision = swh_storage.revision_get([new_revision_branch.target])[0] # the new revision has the correct structure, so it got ingested alright by the # new run assert new_revision.metadata["extrinsic"]["raw"]["integrity"] is not None actual_detections: List[Dict] = [] for record in caplog.records: logtext = record.getMessage() if "Unexpected metadata revision structure detected:" in logtext: actual_detections.append(record.args["context"]) expected_detections = [ {"reason": "'integrity'", "known_artifact": old_revision.metadata,}, {"reason": "'integrity'", "known_artifact": old_revision.metadata,}, ] # as many calls as there are sources listed in the sources.json assert len(expected_detections) == len(all_sources["sources"]) > assert actual_detections == expected_detections E assert [{'known_arti...'integrity'"}] == [{'known_arti...'integrity'"}] E Right contains one more item: {'known_artifact': ImmutableDict({'extrinsic': {'raw': {'url': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz', 'ti...43ce97e2c', 'sha256': '56859e0f8966931754bbbdf2f7229ea61db26091e7e7b42a73f9a996560a0cd0'}}]}), 'reason': "'integrity'"} E Full diff: E [ E {'known_artifact': ImmutableDict({'extrinsic': {'raw': {'url': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz', 'time': 944729610, 'length': 221837, 'version': '0.1.0', 'filename': '8sync-0.1.0.tar.gz'}, 'when': '2021-03-29T12:47:13.611865+00:00', 'provider': 'https://ftp.gnu.org/gnu/8s... E E ...Full output truncated (5 lines hidden), use '-vv' to show .tox/py3/lib/python3.7/site-packages/swh/loader/package/nixguix/tests/test_nixguix.py:706: AssertionError