diff --git a/swh/loader/package/archive/tests/test_archive.py b/swh/loader/package/archive/tests/test_archive.py --- a/swh/loader/package/archive/tests/test_archive.py +++ b/swh/loader/package/archive/tests/test_archive.py @@ -153,15 +153,6 @@ "snapshot": 1, } == stats - expected_contents = map(hash_to_bytes, _expected_new_contents_first_visit) - assert list(swh_storage.content_missing_per_sha1(expected_contents)) == [] - - expected_dirs = map(hash_to_bytes, _expected_new_directories_first_visit) - assert list(swh_storage.directory_missing(expected_dirs)) == [] - - expected_revs = map(hash_to_bytes, _expected_new_revisions_first_visit) - assert list(swh_storage.revision_missing(expected_revs)) == [] - expected_snapshot = Snapshot( id=expected_snapshot_first_visit_id, branches={ @@ -170,13 +161,22 @@ ), b"releases/0.1.0": SnapshotBranch( target_type=TargetType.REVISION, - target=hash_to_bytes("44183488c0774ce3c957fa19ba695cf18a4a42b3"), + target=hash_to_bytes(list(_expected_new_revisions_first_visit)[0]), ), }, ) check_snapshot(expected_snapshot, swh_storage) + expected_contents = map(hash_to_bytes, _expected_new_contents_first_visit) + assert list(swh_storage.content_missing_per_sha1(expected_contents)) == [] + + expected_dirs = map(hash_to_bytes, _expected_new_directories_first_visit) + assert list(swh_storage.directory_missing(expected_dirs)) == [] + + expected_revs = map(hash_to_bytes, _expected_new_revisions_first_visit) + assert list(swh_storage.revision_missing(expected_revs)) == [] + def test_archive_2_visits_without_change(swh_storage, requests_mock_datadir): """With no prior visit, load a gnu project ends up with 1 snapshot diff --git a/swh/loader/package/cran/tests/test_cran.py b/swh/loader/package/cran/tests/test_cran.py --- a/swh/loader/package/cran/tests/test_cran.py +++ b/swh/loader/package/cran/tests/test_cran.py @@ -182,9 +182,11 @@ "snapshot_id": SNAPSHOT.id.hex(), } - check_snapshot(SNAPSHOT, swh_storage) + assert_last_visit_matches( + swh_storage, origin_url, status="full", type="cran", snapshot=SNAPSHOT.id + ) - assert_last_visit_matches(swh_storage, origin_url, status="full", type="cran") + check_snapshot(SNAPSHOT, swh_storage) visit_stats = get_stats(swh_storage) assert { @@ -230,7 +232,9 @@ check_snapshot(SNAPSHOT, swh_storage) - assert_last_visit_matches(swh_storage, origin_url, status="full", type="cran") + assert_last_visit_matches( + swh_storage, origin_url, status="full", type="cran", snapshot=SNAPSHOT.id + ) visit_stats = get_stats(swh_storage) assert { @@ -252,7 +256,13 @@ "snapshot_id": expected_snapshot_id, } - assert_last_visit_matches(swh_storage, origin_url, status="full", type="cran") + assert_last_visit_matches( + swh_storage, + origin_url, + status="full", + type="cran", + snapshot=hash_to_bytes(expected_snapshot_id), + ) visit_stats2 = get_stats(swh_storage) visit_stats["origin_visit"] += 1 @@ -360,5 +370,5 @@ } == visit_stats assert_last_visit_matches( - swh_storage, origin_url, status="partial", type="cran" + swh_storage, origin_url, status="partial", type="cran", snapshot=SNAPSHOT.id ) diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py --- a/swh/loader/package/debian/tests/test_debian.py +++ b/swh/loader/package/debian/tests/test_debian.py @@ -116,19 +116,13 @@ "snapshot_id": expected_snapshot_id, } - assert_last_visit_matches(swh_storage, URL, status="full", type="deb") - - stats = get_stats(swh_storage) - assert { - "content": 42, - "directory": 2, - "origin": 1, - "origin_visit": 1, - "release": 0, - "revision": 1, # all artifacts under 1 revision - "skipped_content": 0, - "snapshot": 1, - } == stats + assert_last_visit_matches( + swh_storage, + URL, + status="full", + type="deb", + snapshot=hash_to_bytes(expected_snapshot_id), + ) expected_snapshot = Snapshot( id=hash_to_bytes(expected_snapshot_id), @@ -142,6 +136,18 @@ check_snapshot(expected_snapshot, swh_storage) + stats = get_stats(swh_storage) + assert { + "content": 42, + "directory": 2, + "origin": 1, + "origin_visit": 1, + "release": 0, + "revision": 1, # all artifacts under 1 revision + "skipped_content": 0, + "snapshot": 1, + } == stats + def test_debian_first_visit_then_another_visit(swh_storage, requests_mock_datadir): """With no prior visit, load a debian project ends up with 1 snapshot @@ -162,19 +168,13 @@ "snapshot_id": expected_snapshot_id, } - assert_last_visit_matches(swh_storage, URL, status="full", type="deb") - - stats = get_stats(swh_storage) - assert { - "content": 42, - "directory": 2, - "origin": 1, - "origin_visit": 1, - "release": 0, - "revision": 1, # all artifacts under 1 revision - "skipped_content": 0, - "snapshot": 1, - } == stats + assert_last_visit_matches( + swh_storage, + URL, + status="full", + type="deb", + snapshot=hash_to_bytes(expected_snapshot_id), + ) expected_snapshot = Snapshot( id=hash_to_bytes(expected_snapshot_id), @@ -188,10 +188,28 @@ check_snapshot(expected_snapshot, swh_storage) + stats = get_stats(swh_storage) + assert { + "content": 42, + "directory": 2, + "origin": 1, + "origin_visit": 1, + "release": 0, + "revision": 1, # all artifacts under 1 revision + "skipped_content": 0, + "snapshot": 1, + } == stats + # No change in between load actual_load_status2 = loader.load() assert actual_load_status2["status"] == "uneventful" - assert_last_visit_matches(swh_storage, URL, status="full", type="deb") + assert_last_visit_matches( + swh_storage, + URL, + status="full", + type="deb", + snapshot=hash_to_bytes(expected_snapshot_id), + ) stats2 = get_stats(swh_storage) assert { @@ -406,7 +424,13 @@ "snapshot_id": expected_snapshot_id, } - assert_last_visit_matches(swh_storage, URL, status="full", type="deb") + assert_last_visit_matches( + swh_storage, + URL, + status="full", + type="deb", + snapshot=hash_to_bytes(expected_snapshot_id), + ) expected_snapshot = Snapshot( id=hash_to_bytes(expected_snapshot_id), diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py --- a/swh/loader/package/deposit/tests/test_deposit.py +++ b/swh/loader/package/deposit/tests/test_deposit.py @@ -177,19 +177,13 @@ "snapshot_id": expected_snapshot_id, } - assert_last_visit_matches(loader.storage, url, status="full", type="deposit") - - stats = get_stats(loader.storage) - assert { - "content": 303, - "directory": 12, - "origin": 1, - "origin_visit": 1, - "release": 0, - "revision": 1, - "skipped_content": 0, - "snapshot": 1, - } == stats + assert_last_visit_matches( + loader.storage, + url, + status="full", + type="deposit", + snapshot=hash_to_bytes(expected_snapshot_id), + ) revision_id_hex = "637318680351f5d78856d13264faebbd91efe9bb" revision_id = hash_to_bytes(revision_id_hex) @@ -287,6 +281,18 @@ assert body == expected_body + stats = get_stats(loader.storage) + assert { + "content": 303, + "directory": 12, + "origin": 1, + "origin_visit": 1, + "release": 0, + "revision": 1, + "skipped_content": 0, + "snapshot": 1, + } == stats + def test_deposit_loading_ok_2(swh_storage, deposit_client, requests_mock_datadir): """Field dates should be se appropriately @@ -306,7 +312,13 @@ "status": "eventful", "snapshot_id": expected_snapshot_id, } - assert_last_visit_matches(loader.storage, url, status="full", type="deposit") + assert_last_visit_matches( + loader.storage, + url, + status="full", + type="deposit", + snapshot=hash_to_bytes(expected_snapshot_id), + ) revision_id = "564d18943d71be80d0d73b43a77cfb205bcde96c" expected_snapshot = Snapshot( @@ -479,4 +491,10 @@ "status": "eventful", "snapshot_id": expected_snapshot_id, } - assert_last_visit_matches(loader.storage, url, status="full", type="deposit") + assert_last_visit_matches( + loader.storage, + url, + status="full", + type="deposit", + snapshot=hash_to_bytes(expected_snapshot_id), + ) diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py --- a/swh/loader/package/nixguix/tests/test_nixguix.py +++ b/swh/loader/package/nixguix/tests/test_nixguix.py @@ -536,15 +536,19 @@ "snapshot_id": SNAPSHOT1.id.hex(), } - check_snapshot(SNAPSHOT1, storage=swh_storage) - - assert len(mock_download.mock_calls) == 2 - # The visit is partial because some artifact downloads failed assert_last_visit_matches( - swh_storage, sources_url, status="partial", type="nixguix" + swh_storage, + sources_url, + status="partial", + type="nixguix", + snapshot=SNAPSHOT1.id, ) + check_snapshot(SNAPSHOT1, storage=swh_storage) + + assert len(mock_download.mock_calls) == 2 + def test_load_nixguix_one_common_artifact_from_other_loader( swh_storage, datadir, requests_mock_datadir_visits, caplog @@ -574,7 +578,11 @@ assert actual_load_status["snapshot_id"] == expected_snapshot_id # noqa assert_last_visit_matches( - archive_loader.storage, gnu_url, status="full", type="tar" + archive_loader.storage, + gnu_url, + status="full", + type="tar", + snapshot=hash_to_bytes(expected_snapshot_id), ) # 2. Then ingest with the nixguix loader which lists the same artifact within its @@ -599,8 +607,15 @@ actual_load_status2 = loader.load() assert actual_load_status2["status"] == "eventful" - assert_last_visit_matches(swh_storage, sources_url, status="full", type="nixguix") - snapshot_id = actual_load_status2["snapshot_id"] + + assert_last_visit_matches( + swh_storage, + sources_url, + status="full", + type="nixguix", + snapshot=hash_to_bytes(snapshot_id), + ) + snapshot = snapshot_get_all_branches(swh_storage, hash_to_bytes(snapshot_id)) assert snapshot diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py --- a/swh/loader/package/npm/tests/test_npm.py +++ b/swh/loader/package/npm/tests/test_npm.py @@ -318,29 +318,6 @@ swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id ) - stats = get_stats(swh_storage) - - assert { - "content": len(_expected_new_contents_first_visit), - "directory": len(_expected_new_directories_first_visit), - "origin": 1, - "origin_visit": 1, - "release": 0, - "revision": len(_expected_new_revisions_first_visit), - "skipped_content": 0, - "snapshot": 1, - } == stats - - contents = swh_storage.content_get(_expected_new_contents_first_visit) - count = sum(0 if content is None else 1 for content in contents) - assert count == len(_expected_new_contents_first_visit) - - assert ( - list(swh_storage.directory_missing(_expected_new_directories_first_visit)) == [] - ) - - assert list(swh_storage.revision_missing(_expected_new_revisions_first_visit)) == [] - versions = [ ("0.0.2", "d8a1c7474d2956ac598a19f0f27d52f7015f117e"), ("0.0.3", "5f9eb78af37ffd12949f235e86fac04898f9f72a"), @@ -364,6 +341,16 @@ ) check_snapshot(expected_snapshot, swh_storage) + contents = swh_storage.content_get(_expected_new_contents_first_visit) + count = sum(0 if content is None else 1 for content in contents) + assert count == len(_expected_new_contents_first_visit) + + assert ( + list(swh_storage.directory_missing(_expected_new_directories_first_visit)) == [] + ) + + assert list(swh_storage.revision_missing(_expected_new_revisions_first_visit)) == [] + metadata_authority = MetadataAuthority( type=MetadataAuthorityType.FORGE, url="https://npmjs.com/", ) @@ -397,6 +384,19 @@ directory_swhid, metadata_authority, ) == PagedResult(next_page_token=None, results=expected_metadata,) + stats = get_stats(swh_storage) + + assert { + "content": len(_expected_new_contents_first_visit), + "directory": len(_expected_new_directories_first_visit), + "origin": 1, + "origin_visit": 1, + "release": 0, + "revision": len(_expected_new_revisions_first_visit), + "skipped_content": 0, + "snapshot": 1, + } == stats + def test_npm_loader_incremental_visit(swh_storage, requests_mock_datadir_visits): package = "org" @@ -475,19 +475,6 @@ swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id ) - stats = get_stats(swh_storage) - - assert { # 1 new releases artifacts - "content": 534, - "directory": 153, - "origin": 1, - "origin_visit": 1, - "release": 0, - "revision": 2, - "skipped_content": 0, - "snapshot": 1, - } == stats - expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ @@ -506,6 +493,19 @@ ) check_snapshot(expected_snapshot, swh_storage) + stats = get_stats(swh_storage) + + assert { # 1 new releases artifacts + "content": 534, + "directory": 153, + "origin": 1, + "origin_visit": 1, + "release": 0, + "revision": 2, + "skipped_content": 0, + "snapshot": 1, + } == stats + def test_npm_artifact_with_no_intrinsic_metadata(swh_storage, requests_mock_datadir): """Skip artifact with no intrinsic metadata during ingestion @@ -525,12 +525,12 @@ "snapshot_id": expected_snapshot.id.hex(), } - check_snapshot(expected_snapshot, swh_storage) - assert_last_visit_matches( swh_storage, url, status="full", type="npm", snapshot=expected_snapshot.id ) + check_snapshot(expected_snapshot, swh_storage) + def test_npm_artifact_with_no_upload_time(swh_storage, requests_mock_datadir): """With no time upload, artifact is skipped @@ -550,12 +550,12 @@ "snapshot_id": expected_snapshot.id.hex(), } - check_snapshot(expected_snapshot, swh_storage) - assert_last_visit_matches( swh_storage, url, status="partial", type="npm", snapshot=expected_snapshot.id ) + check_snapshot(expected_snapshot, swh_storage) + def test_npm_artifact_use_mtime_if_no_time(swh_storage, requests_mock_datadir): """With no time upload, artifact is skipped @@ -586,12 +586,13 @@ ), }, ) - check_snapshot(expected_snapshot, swh_storage) assert_last_visit_matches( swh_storage, url, status="full", type="npm", snapshot=expected_snapshot.id ) + check_snapshot(expected_snapshot, swh_storage) + def test_npm_no_artifact(swh_storage, requests_mock_datadir): """If no artifacts at all is found for origin, the visit fails completely diff --git a/swh/loader/package/opam/tests/test_opam.py b/swh/loader/package/opam/tests/test_opam.py --- a/swh/loader/package/opam/tests/test_opam.py +++ b/swh/loader/package/opam/tests/test_opam.py @@ -86,12 +86,13 @@ ), }, ) - check_snapshot(expected_snapshot, swh_storage) assert_last_visit_matches( swh_storage, url, status="full", type="opam", snapshot=expected_snapshot_id ) + check_snapshot(expected_snapshot, swh_storage) + stats = get_stats(swh_storage) assert { @@ -154,12 +155,12 @@ }, ) - check_snapshot(expected_snapshot, swh_storage) - assert_last_visit_matches( swh_storage, url, status="full", type="opam", snapshot=expected_snapshot_id ) + check_snapshot(expected_snapshot, swh_storage) + def test_opam_revision(tmpdir, requests_mock_datadir, swh_storage, datadir): diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py --- a/swh/loader/package/pypi/tests/test_pypi.py +++ b/swh/loader/package/pypi/tests/test_pypi.py @@ -218,6 +218,12 @@ assert actual_load_status["status"] == "uneventful" assert actual_load_status["snapshot_id"] is not None + empty_snapshot = Snapshot(branches={}) + + assert_last_visit_matches( + swh_storage, url, status="partial", type="pypi", snapshot=empty_snapshot.id + ) + stats = get_stats(swh_storage) assert { "content": 0, @@ -230,8 +236,6 @@ "snapshot": 1, } == stats - assert_last_visit_matches(swh_storage, url, status="partial", type="pypi") - def test_pypi_fail__load_snapshot(swh_storage, requests_mock_datadir): """problem during loading: {visit: failed, status: failed, no snapshot} @@ -247,6 +251,8 @@ actual_load_status = loader.load() assert actual_load_status == {"status": "failed"} + assert_last_visit_matches(swh_storage, url, status="failed", type="pypi") + stats = get_stats(loader.storage) assert { @@ -260,8 +266,6 @@ "snapshot": 0, } == stats - assert_last_visit_matches(swh_storage, url, status="failed", type="pypi") - # problem during loading: # {visit: partial, status: uneventful, no snapshot} @@ -278,6 +282,8 @@ actual_load_status = loader.load() assert actual_load_status == {"status": "failed"} + assert_last_visit_matches(swh_storage, url, status="failed", type="pypi") + stats = get_stats(swh_storage) assert { @@ -291,8 +297,6 @@ "snapshot": 0, } == stats - assert_last_visit_matches(swh_storage, url, status="failed", type="pypi") - # problem during loading: failure early enough in between swh contents... # some contents (contents, directories, etc...) have been written in storage @@ -375,18 +379,23 @@ "snapshot_id": expected_snapshot_id.hex(), } - stats = get_stats(swh_storage) + assert_last_visit_matches( + swh_storage, url, status="partial", type="pypi", snapshot=expected_snapshot_id, + ) - assert { - "content": 3, - "directory": 2, - "origin": 1, - "origin_visit": 1, - "release": 0, - "revision": 1, - "skipped_content": 0, - "snapshot": 1, - } == stats + expected_snapshot = Snapshot( + id=hash_to_bytes(expected_snapshot_id), + branches={ + b"releases/1.2.0": SnapshotBranch( + target=hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21"), + target_type=TargetType.REVISION, + ), + b"HEAD": SnapshotBranch( + target=b"releases/1.2.0", target_type=TargetType.ALIAS, + ), + }, + ) + check_snapshot(expected_snapshot, storage=swh_storage) expected_contents = map( hash_to_bytes, @@ -417,23 +426,18 @@ } assert list(swh_storage.revision_missing(expected_revs)) == [] - expected_snapshot = Snapshot( - id=hash_to_bytes(expected_snapshot_id), - branches={ - b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21"), - target_type=TargetType.REVISION, - ), - b"HEAD": SnapshotBranch( - target=b"releases/1.2.0", target_type=TargetType.ALIAS, - ), - }, - ) - check_snapshot(expected_snapshot, storage=swh_storage) + stats = get_stats(swh_storage) - assert_last_visit_matches( - swh_storage, url, status="partial", type="pypi", snapshot=expected_snapshot_id, - ) + assert { + "content": 3, + "directory": 2, + "origin": 1, + "origin_visit": 1, + "release": 0, + "revision": 1, + "skipped_content": 0, + "snapshot": 1, + } == stats def test_pypi_visit_with_1_release_artifact(swh_storage, requests_mock_datadir): @@ -450,6 +454,28 @@ "snapshot_id": expected_snapshot_id.hex(), } + assert_last_visit_matches( + swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id + ) + + expected_snapshot = Snapshot( + id=expected_snapshot_id, + branches={ + b"releases/1.1.0": SnapshotBranch( + target=hash_to_bytes("4c99891f93b81450385777235a37b5e966dd1571"), + target_type=TargetType.REVISION, + ), + b"releases/1.2.0": SnapshotBranch( + target=hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21"), + target_type=TargetType.REVISION, + ), + b"HEAD": SnapshotBranch( + target=b"releases/1.2.0", target_type=TargetType.ALIAS, + ), + }, + ) + check_snapshot(expected_snapshot, swh_storage) + stats = get_stats(swh_storage) assert { "content": 6, @@ -499,28 +525,6 @@ } assert list(swh_storage.revision_missing(expected_revs)) == [] - expected_snapshot = Snapshot( - id=expected_snapshot_id, - branches={ - b"releases/1.1.0": SnapshotBranch( - target=hash_to_bytes("4c99891f93b81450385777235a37b5e966dd1571"), - target_type=TargetType.REVISION, - ), - b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21"), - target_type=TargetType.REVISION, - ), - b"HEAD": SnapshotBranch( - target=b"releases/1.2.0", target_type=TargetType.ALIAS, - ), - }, - ) - check_snapshot(expected_snapshot, swh_storage) - - assert_last_visit_matches( - swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id - ) - def test_pypi_multiple_visits_with_no_change(swh_storage, requests_mock_datadir): """Multiple visits with no changes results in 1 same snapshot @@ -539,19 +543,6 @@ swh_storage, url, status="full", type="pypi", snapshot=snapshot_id ) - stats = get_stats(swh_storage) - - assert { - "content": 6, - "directory": 4, - "origin": 1, - "origin_visit": 1, - "release": 0, - "revision": 2, - "skipped_content": 0, - "snapshot": 1, - } == stats - expected_snapshot = Snapshot( id=snapshot_id, branches={ @@ -570,6 +561,19 @@ ) check_snapshot(expected_snapshot, swh_storage) + stats = get_stats(swh_storage) + + assert { + "content": 6, + "directory": 4, + "origin": 1, + "origin_visit": 1, + "release": 0, + "revision": 2, + "skipped_content": 0, + "snapshot": 1, + } == stats + actual_load_status2 = loader.load() assert actual_load_status2 == { "status": "uneventful", @@ -637,6 +641,33 @@ swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id2 ) + expected_snapshot = Snapshot( + id=expected_snapshot_id2, + branches={ + b"releases/1.1.0": SnapshotBranch( + target=hash_to_bytes("4c99891f93b81450385777235a37b5e966dd1571"), + target_type=TargetType.REVISION, + ), + b"releases/1.2.0": SnapshotBranch( + target=hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21"), + target_type=TargetType.REVISION, + ), + b"releases/1.3.0": SnapshotBranch( + target=hash_to_bytes("51247143b01445c9348afa9edfae31bf7c5d86b1"), + target_type=TargetType.REVISION, + ), + b"HEAD": SnapshotBranch( + target=b"releases/1.3.0", target_type=TargetType.ALIAS, + ), + }, + ) + + assert_last_visit_matches( + swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot.id + ) + + check_snapshot(expected_snapshot, swh_storage) + assert { "content": 6 + 1, # 1 more content "directory": 4 + 2, # 2 more directories @@ -692,33 +723,6 @@ assert list(swh_storage.revision_missing(expected_revs)) == [] - expected_snapshot = Snapshot( - id=expected_snapshot_id2, - branches={ - b"releases/1.1.0": SnapshotBranch( - target=hash_to_bytes("4c99891f93b81450385777235a37b5e966dd1571"), - target_type=TargetType.REVISION, - ), - b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21"), - target_type=TargetType.REVISION, - ), - b"releases/1.3.0": SnapshotBranch( - target=hash_to_bytes("51247143b01445c9348afa9edfae31bf7c5d86b1"), - target_type=TargetType.REVISION, - ), - b"HEAD": SnapshotBranch( - target=b"releases/1.3.0", target_type=TargetType.ALIAS, - ), - }, - ) - - check_snapshot(expected_snapshot, swh_storage) - - assert_last_visit_matches( - swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot.id - ) - urls = [ m.url for m in requests_mock_datadir_visits.request_history @@ -753,6 +757,10 @@ "snapshot_id": expected_snapshot_id.hex(), } + assert_last_visit_matches( + swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id + ) + expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ @@ -768,10 +776,6 @@ ) check_snapshot(expected_snapshot, swh_storage) - assert_last_visit_matches( - swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot.id - ) - def test_pypi_artifact_with_no_intrinsic_metadata(swh_storage, requests_mock_datadir): """Skip artifact with no intrinsic metadata during ingestion @@ -789,12 +793,13 @@ # no branch as one artifact without any intrinsic metadata expected_snapshot = Snapshot(id=expected_snapshot_id, branches={}) - check_snapshot(expected_snapshot, swh_storage) assert_last_visit_matches( swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot.id ) + check_snapshot(expected_snapshot, swh_storage) + def test_pypi_origin_not_found(swh_storage, requests_mock_datadir): url = "https://pypi.org/project/unknown" diff --git a/swh/loader/tests/__init__.py b/swh/loader/tests/__init__.py --- a/swh/loader/tests/__init__.py +++ b/swh/loader/tests/__init__.py @@ -115,7 +115,7 @@ def check_snapshot( - snapshot: Snapshot, + expected_snapshot: Snapshot, storage: StorageInterface, allowed_empty: Iterable[Tuple[TargetType, bytes]] = [], ) -> Snapshot: @@ -124,7 +124,7 @@ - each object reference up to the revision/release targets exists Args: - snapshot: full snapshot to check for existence and consistency + expected_snapshot: full snapshot to check for existence and consistency storage: storage to lookup information into allowed_empty: Iterable of branch we allow to be empty (some edge case loaders allows this case to happen, nixguix for example allows the branch evaluation" @@ -136,12 +136,14 @@ needed. """ - if not isinstance(snapshot, Snapshot): - raise AssertionError(f"variable 'snapshot' must be a snapshot: {snapshot!r}") + if not isinstance(expected_snapshot, Snapshot): + raise AssertionError( + f"argument 'expected_snapshot' must be a snapshot: {expected_snapshot!r}" + ) - expected_snapshot = snapshot_get_all_branches(storage, snapshot.id) - if expected_snapshot is None: - raise AssertionError(f"Snapshot {snapshot.id.hex()} is not found") + snapshot = snapshot_get_all_branches(storage, expected_snapshot.id) + if snapshot is None: + raise AssertionError(f"Snapshot {expected_snapshot.id.hex()} is not found") assert snapshot == expected_snapshot diff --git a/swh/loader/tests/test_init.py b/swh/loader/tests/test_init.py --- a/swh/loader/tests/test_init.py +++ b/swh/loader/tests/test_init.py @@ -400,7 +400,9 @@ ) # 0. not a Snapshot object, raise! - with pytest.raises(AssertionError, match="variable 'snapshot' must be a snapshot"): + with pytest.raises( + AssertionError, match="argument 'expected_snapshot' must be a snapshot" + ): check_snapshot(ORIGIN_VISIT, swh_storage) # 1. snapshot id is correct but branches mismatched