Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/tests/test_from_disk.py
Show First 20 Lines • Show All 171 Lines • ▼ Show 20 Lines | expected_stats = { | ||||
"release": 0, | "release": 0, | ||||
"revision": 58, | "revision": 58, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} | } | ||||
assert stats == expected_stats | assert stats == expected_stats | ||||
loader2 = HgLoaderFromDisk(swh_storage, url=repo_url) | loader2 = HgLoaderFromDisk(swh_storage, url=repo_url) | ||||
assert loader2.load() == {"status": "uneventful"} | assert loader2.load() == {"status": "uneventful"} # nothing new happened | ||||
stats2 = get_stats(loader2.storage) | stats2 = get_stats(loader2.storage) | ||||
expected_stats2 = expected_stats.copy() | expected_stats2 = expected_stats.copy() | ||||
expected_stats2["origin_visit"] = 2 # one new visit recorded | expected_stats2["origin_visit"] = 2 # one new visit recorded | ||||
assert stats2 == expected_stats2 | assert stats2 == expected_stats2 | ||||
visit_status = assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader2.storage, repo_url, status="full", type="hg", | loader2.storage, | ||||
) | repo_url, | ||||
assert visit_status.snapshot is None | status="full", | ||||
# FIXME: Already seen objects are filtered out, so no new snapshot. | type="hg", | ||||
# Current behavior but is it ok? | snapshot=expected_snapshot.id, | ||||
) # but we got a snapshot nonetheless | |||||
# This test has as been adapted from the historical `HgBundle20Loader` tests | # This test has as been adapted from the historical `HgBundle20Loader` tests | ||||
# to ensure compatibility of `HgLoaderFromDisk`. | # to ensure compatibility of `HgLoaderFromDisk`. | ||||
# Hashes as been produced by copy pasting the result of the implementation | # Hashes as been produced by copy pasting the result of the implementation | ||||
# to prevent regressions. | # to prevent regressions. | ||||
def test_loader_hg_new_visit_with_release(swh_storage, datadir, tmp_path): | def test_loader_hg_new_visit_with_release(swh_storage, datadir, tmp_path): | ||||
"""Eventful visit with release should yield 1 snapshot""" | """Eventful visit with release should yield 1 snapshot""" | ||||
▲ Show 20 Lines • Show All 137 Lines • ▼ Show 20 Lines | ): | ||||
new_storage.origin_visit_add([visit]) | new_storage.origin_visit_add([visit]) | ||||
statuses = old_storage.origin_visit_status_get(origin_url, visit.visit).results | statuses = old_storage.origin_visit_status_get(origin_url, visit.visit).results | ||||
new_storage.origin_visit_status_add(statuses) | new_storage.origin_visit_status_add(statuses) | ||||
new_storage.snapshot_add([snapshot]) | new_storage.snapshot_add([snapshot]) | ||||
return new_storage | return new_storage | ||||
@pytest.mark.parametrize("mechanism", ("extid", "same storage")) | |||||
def test_load_unchanged_repo_should_be_uneventful( | def test_load_unchanged_repo_should_be_uneventful( | ||||
swh_storage, datadir, tmp_path, mechanism | swh_storage, datadir, tmp_path, | ||||
): | ): | ||||
"""Checks the loader can find which revisions it already loaded, using ExtIDs.""" | """Checks the loader can find which revisions it already loaded, using ExtIDs.""" | ||||
archive_name = "hello" | archive_name = "hello" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
repo_path = repo_url.replace("file://", "") | repo_path = repo_url.replace("file://", "") | ||||
loader = HgLoaderFromDisk(swh_storage, repo_path) | loader = HgLoaderFromDisk(swh_storage, repo_path) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
assert get_stats(loader.storage) == { | assert get_stats(loader.storage) == { | ||||
"content": 3, | "content": 3, | ||||
"directory": 3, | "directory": 3, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": 1, | "release": 1, | ||||
"revision": 3, | "revision": 3, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} | } | ||||
visit_status = assert_last_visit_matches( | |||||
old_storage = swh_storage | loader.storage, repo_path, type=RevisionType.MERCURIAL.value, status="full", | ||||
# Create a new storage, and only copy ExtIDs or head revisions to it. | |||||
# This should be enough for the loader to know revisions were already loaded | |||||
new_storage = _partial_copy_storage( | |||||
old_storage, repo_path, mechanism=mechanism, copy_revisions=True | |||||
) | ) | ||||
assert visit_status.snapshot is not None | |||||
# Create a new loader (to start with a clean slate, eg. remove the caches), | # Create a new loader (to start with a clean slate, eg. remove the caches), | ||||
# with the new, partial, storage | # with the new, partial, storage | ||||
loader = HgLoaderFromDisk(new_storage, repo_path) | loader2 = HgLoaderFromDisk(swh_storage, repo_path) | ||||
assert loader.load() == {"status": "uneventful"} | assert loader2.load() == {"status": "uneventful"} | ||||
if mechanism == "same storage": | |||||
# Should have all the objects | # Should have all the objects | ||||
assert get_stats(loader.storage) == { | assert get_stats(loader.storage) == { | ||||
"content": 3, | "content": 3, | ||||
"directory": 3, | "directory": 3, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 2, | "origin_visit": 2, | ||||
"release": 1, | "release": 1, | ||||
"revision": 3, | "revision": 3, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} | } | ||||
else: | visit_status2 = assert_last_visit_matches( | ||||
# Should have only the objects we directly inserted from the test, plus | loader2.storage, repo_path, type=RevisionType.MERCURIAL.value, status="full", | ||||
# a new visit | ) | ||||
assert get_stats(loader.storage) == { | assert visit_status2.snapshot == visit_status.snapshot | ||||
"content": 0, | |||||
"directory": 0, | |||||
"origin": 1, | |||||
"origin_visit": 2, | |||||
"release": 0, | |||||
"revision": 1, | |||||
"skipped_content": 0, | |||||
"snapshot": 1, | |||||
} | |||||
def test_closed_branch_incremental(swh_storage, datadir, tmp_path): | def test_closed_branch_incremental(swh_storage, datadir, tmp_path): | ||||
"""Test that a repository with a closed branch does not trip an incremental load""" | """Test that a repository with a closed branch does not trip an incremental load""" | ||||
archive_name = "example" | archive_name = "example" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
repo_path = repo_url.replace("file://", "") | repo_path = repo_url.replace("file://", "") | ||||
▲ Show 20 Lines • Show All 315 Lines • ▼ Show 20 Lines | def test_loader_hg_extid_filtering(swh_storage, datadir, tmp_path): | ||||
expected_stats2.update( | expected_stats2.update( | ||||
{"origin": 1 + 1, "origin_visit": 1 + 1,} | {"origin": 1 + 1, "origin_visit": 1 + 1,} | ||||
) | ) | ||||
assert stats == expected_stats2 | assert stats == expected_stats2 | ||||
visit_status2 = assert_last_visit_matches( | visit_status2 = assert_last_visit_matches( | ||||
loader.storage, fork_url, status="full", type="hg", | loader.storage, fork_url, status="full", type="hg", | ||||
) | ) | ||||
assert visit_status.snapshot is not None | assert visit_status.snapshot is not None | ||||
assert visit_status2.snapshot is None | assert visit_status2.snapshot == visit_status.snapshot | ||||
olasd: You can keep only the last line, but yeah. | |||||
Done Inline Actionsthe last 2 lines, i also want to ensure the snapshot is truthy. ardumont: the last 2 lines, i also want to ensure the snapshot is truthy. | |||||
# FIXME: Consistent behavior with filtering data out from already seen snapshot (on | |||||
# a given origin). But, the other fork origin has no snapshot at all. We should | |||||
# though, shouldn't we? Otherwise, that would mean fork could end up with no | |||||
# snapshot at all. |
You can keep only the last line, but yeah.