Changeset View
Standalone View
swh/loader/mercurial/tests/test_from_disk.py
Show First 20 Lines • Show All 158 Lines • ▼ Show 20 Lines | assert_last_visit_matches( | ||||
repo_url, | repo_url, | ||||
status="full", | status="full", | ||||
type="hg", | type="hg", | ||||
snapshot=expected_snapshot.id, | snapshot=expected_snapshot.id, | ||||
) | ) | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(expected_snapshot, loader.storage) | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats == { | expected_stats = { | ||||
"content": 2, | "content": 2, | ||||
"directory": 3, | "directory": 3, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": 0, | "release": 0, | ||||
"revision": 58, | "revision": 58, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} | } | ||||
assert stats == expected_stats | |||||
loader2 = HgLoaderFromDisk(swh_storage, url=repo_url) | |||||
assert loader2.load() == {"status": "uneventful"} | |||||
stats2 = get_stats(loader2.storage) | |||||
expected_stats2 = expected_stats.copy() | |||||
expected_stats2.update( | |||||
{"origin_visit": 1 + 1,} | |||||
) | |||||
assert stats2 == expected_stats2 | |||||
# FIXME: Already seen objects are filtered out, so no new snapshot. | |||||
# Current behavior but is it ok? | |||||
# This test has as been adapted from the historical `HgBundle20Loader` tests | # This test has as been adapted from the historical `HgBundle20Loader` tests | ||||
# to ensure compatibility of `HgLoaderFromDisk`. | # to ensure compatibility of `HgLoaderFromDisk`. | ||||
# Hashes as been produced by copy pasting the result of the implementation | # Hashes as been produced by copy pasting the result of the implementation | ||||
# to prevent regressions. | # to prevent regressions. | ||||
def test_loader_hg_new_visit_with_release(swh_storage, datadir, tmp_path): | def test_loader_hg_new_visit_with_release(swh_storage, datadir, tmp_path): | ||||
"""Eventful visit with release should yield 1 snapshot""" | """Eventful visit with release should yield 1 snapshot""" | ||||
▲ Show 20 Lines • Show All 479 Lines • ▼ Show 20 Lines | def test_load_new_extid_should_be_eventful(swh_storage, datadir, tmp_path): | ||||
assert loader.load() == {"status": "uneventful"} | assert loader.load() == {"status": "uneventful"} | ||||
with unittest.mock.patch("swh.loader.mercurial.from_disk.EXTID_VERSION", 10000): | with unittest.mock.patch("swh.loader.mercurial.from_disk.EXTID_VERSION", 10000): | ||||
loader = HgLoaderFromDisk(swh_storage, repo_path) | loader = HgLoaderFromDisk(swh_storage, repo_path) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
loader = HgLoaderFromDisk(swh_storage, repo_path) | loader = HgLoaderFromDisk(swh_storage, repo_path) | ||||
assert loader.load() == {"status": "uneventful"} | assert loader.load() == {"status": "uneventful"} | ||||
def test_loader_hg_extid_filtering(swh_storage, datadir, tmp_path): | |||||
"""The first visit of a fork should filter already seen revisions (through extids) | |||||
""" | |||||
archive_name = "the-sandbox" | |||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | |||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | |||||
loader = HgLoaderFromDisk(swh_storage, url=repo_url) | |||||
assert loader.load() == {"status": "eventful"} | |||||
stats = get_stats(loader.storage) | |||||
expected_stats = { | |||||
"content": 2, | |||||
"directory": 3, | |||||
"origin": 1, | |||||
"origin_visit": 1, | |||||
"release": 0, | |||||
"revision": 58, | |||||
"skipped_content": 0, | |||||
"snapshot": 1, | |||||
} | |||||
assert stats == expected_stats | |||||
visit_status = assert_last_visit_matches( | |||||
loader.storage, repo_url, status="full", type="hg", | |||||
) | |||||
# Make a fork of the first repository we ingested | |||||
fork_url = prepare_repository_from_archive( | |||||
archive_path, "the-sandbox-reloaded", tmp_path | |||||
) | |||||
loader2 = HgLoaderFromDisk( | |||||
swh_storage, url=fork_url, directory=str(tmp_path / archive_name) | |||||
) | |||||
assert loader2.load() == {"status": "uneventful"} | |||||
stats = get_stats(loader.storage) | |||||
expected_stats2 = expected_stats.copy() | |||||
expected_stats2.update( | |||||
{"origin": 1 + 1, "origin_visit": 1 + 1,} | |||||
) | |||||
assert stats == expected_stats2 | |||||
visit_status2 = assert_last_visit_matches( | |||||
loader.storage, fork_url, status="full", type="hg", | |||||
) | |||||
assert visit_status.snapshot is not None | |||||
assert visit_status2.snapshot is None | |||||
ardumont: That does not feel right here.
Then again, that's the current behavior for the same origin. | |||||
Done Inline Actionsardumont: @olasd @vlorentz Note this though ^ and the comment below. | |||||
Done Inline ActionsA full visit shouldn't be able to have snapshot = None, so, yeah, there's something fishy going on. olasd: A full visit shouldn't be able to have snapshot = None, so, yeah, there's something fishy going… | |||||
Done Inline ActionsIt's the current behavior of the implementation though. That's one of the reason i added some checks in a previous test I'll double check that adaptations in the current master (line 189 onwards in the master tomorrow). ardumont: It's the current behavior of the implementation though.
That's one of the reason i added some… | |||||
Done Inline ActionsWhat i meant about current behavior is that when a revision is currently detected as seen. While it can be "somewhat" ok for the same origin, it becomes a "nono" for a fork since, for a given order, we won't be able to make a snapshot for forks. The thing is, if we adapt the behavior to actually have a snapshot with already seen revisions (which would be closer to the truth), that will change the behavior for both scenario (same origin and forks). To do this is something that would be clearer to me and more to the point. What do you think? [1] in another diff to make things clearer [2] That begs the question of "do we keep the old implementations? (because *sigh* at the idea of adapting this code as well...) ardumont: What i meant about current behavior is that when a revision is currently detected as seen.
It's… | |||||
Done Inline Actionsardumont: See D6262 to explicit the wrong behavior.
And T3571 to actually track and fix it.
I'm unsure… | |||||
Done Inline Actions
The reverse order may be easier: fixing the snapshot logic in the "simpler" case when extids are only involved for existing heads, then introducing the extid matching logic for arbitrary revisions. Either way, it doesn't matter much, as I assume both will get deployed at the same time. olasd: > I think landing this and then adapt correctly the snapshot handling after this makes sense. | |||||
Done Inline Actions
yeah, i'm in that mind logic as well. ardumont: > Either way, it doesn't matter much, as I assume both will get deployed at the same time. | |||||
# FIXME: Consistent behavior with filtering data out from already seen snapshot (on | |||||
# a given origin). But, the other fork origin has no snapshot at all. We should | |||||
# though, shouldn't we? Otherwise, that would mean fork could end up with no | |||||
# snapshot at all. |
That does not feel right here.
Then again, that's the current behavior for the same origin.