Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/tests/test_from_disk.py
- This file was copied from swh/loader/mercurial/tests/test_loader.py.
# Copyright (C) 2018-2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
marmoute: This need a module documentation. | |||||
Done Inline ActionsWhere does this test comes from ? Is this a copy of the test for the older loader ? If so, what re the difference ? marmoute: Where does this test comes from ? Is this a copy of the test for the older loader ? If so, what… | |||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import copy | |||||
import logging | |||||
import os | import os | ||||
import time | |||||
import hglib | |||||
import pytest | |||||
from swh.loader.tests import ( | from swh.loader.tests import ( | ||||
assert_last_visit_matches, | assert_last_visit_matches, | ||||
check_snapshot, | check_snapshot, | ||||
get_stats, | get_stats, | ||||
prepare_repository_from_archive, | prepare_repository_from_archive, | ||||
) | ) | ||||
from swh.model.from_disk import Content | |||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import RevisionType, Snapshot, SnapshotBranch, TargetType | from swh.model.model import RevisionType, Snapshot, SnapshotBranch, TargetType | ||||
Done Inline Actionsplease document the testcase marmoute: please document the testcase | |||||
from swh.storage.algos.snapshot import snapshot_get_latest | from swh.storage.algos.snapshot import snapshot_get_latest | ||||
from ..loader import CloneTimeoutError, HgArchiveBundle20Loader, HgBundle20Loader | from ..from_disk import HgDirectory, HgLoaderFromDisk | ||||
from .loader_checker import ExpectedSwhids, LoaderChecker | from .loader_checker import ExpectedSwhids, LoaderChecker | ||||
def test_hg_directory_creates_missing_directories(): | |||||
directory = HgDirectory() | |||||
directory[b"path/to/some/content"] = Content() | |||||
# Those tests assert expectations on repository loading | |||||
Done Inline Actionswhere does this hash comes from. marmoute: where does this hash comes from. | |||||
# by reading expected values from associated json files | |||||
# produced by the `swh-hg-identify` command line utility. | |||||
# | |||||
# It has more granularity than historical tests. | |||||
# Assertions will tell if the error comes from the directories | |||||
# revisions or release rather than only checking the snapshot. | |||||
# | |||||
# With more work it should event be possible to know which part | |||||
# of an object is faulty. | |||||
def test_examples(swh_config, datadir, tmp_path): | def test_examples(swh_config, datadir, tmp_path): | ||||
for archive_name in ("hello", "transplant", "the-sandbox", "example"): | for archive_name in ("hello", "transplant", "the-sandbox", "example"): | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
json_path = os.path.join(datadir, f"{archive_name}.json") | json_path = os.path.join(datadir, f"{archive_name}.json") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
LoaderChecker( | LoaderChecker( | ||||
loader=HgBundle20Loader(repo_url), expected=ExpectedSwhids.load(json_path), | loader=HgLoaderFromDisk(repo_url), expected=ExpectedSwhids.load(json_path), | ||||
).check() | ).check() | ||||
# This test has as been adapted from the historical `HgBundle20Loader` tests | |||||
# to ensure compatibility of `HgLoaderFromDisk`. | |||||
# Hashes as been produced by copy pasting the result of the implementation | |||||
# to prevent regressions. | |||||
def test_loader_hg_new_visit_no_release(swh_config, datadir, tmp_path): | def test_loader_hg_new_visit_no_release(swh_config, datadir, tmp_path): | ||||
"""Eventful visit should yield 1 snapshot""" | """Eventful visit should yield 1 snapshot""" | ||||
archive_name = "the-sandbox" | archive_name = "the-sandbox" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = HgBundle20Loader(repo_url) | loader = HgLoaderFromDisk(url=repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
tip_revision_develop = "a9c4534552df370f43f0ef97146f393ef2f2a08c" | tip_revision_develop = "a9c4534552df370f43f0ef97146f393ef2f2a08c" | ||||
tip_revision_default = "70e750bb046101fdced06f428e73fee471509c56" | tip_revision_default = "70e750bb046101fdced06f428e73fee471509c56" | ||||
expected_snapshot = Snapshot( | expected_snapshot = Snapshot( | ||||
id=hash_to_bytes("3b8fe58e467deb7597b12a5fd3b2c096b8c02028"), | id=hash_to_bytes("3b8fe58e467deb7597b12a5fd3b2c096b8c02028"), | ||||
branches={ | branches={ | ||||
Show All 25 Lines | assert stats == { | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": 0, | "release": 0, | ||||
"revision": 58, | "revision": 58, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} | } | ||||
# Ensure archive loader yields the same snapshot | |||||
loader2 = HgArchiveBundle20Loader( | |||||
url=archive_path, | |||||
archive_path=archive_path, | |||||
visit_date="2016-05-03 15:16:32+00", | |||||
) | |||||
actual_load_status = loader2.load() | |||||
assert actual_load_status == {"status": "eventful"} | |||||
stats2 = get_stats(loader2.storage) | |||||
expected_stats = copy.deepcopy(stats) | |||||
expected_stats["origin"] += 1 | |||||
expected_stats["origin_visit"] += 1 | |||||
assert stats2 == expected_stats | |||||
# That visit yields the same snapshot | |||||
assert_last_visit_matches( | |||||
loader2.storage, | |||||
archive_path, | |||||
status="full", | |||||
type="hg", | |||||
snapshot=expected_snapshot.id, | |||||
) | |||||
# This test has as been adapted from the historical `HgBundle20Loader` tests | |||||
# to ensure compatibility of `HgLoaderFromDisk`. | |||||
# Hashes as been produced by copy pasting the result of the implementation | |||||
# to prevent regressions. | |||||
def test_loader_hg_new_visit_with_release(swh_config, datadir, tmp_path): | def test_loader_hg_new_visit_with_release(swh_config, datadir, tmp_path): | ||||
"""Eventful visit with release should yield 1 snapshot""" | """Eventful visit with release should yield 1 snapshot""" | ||||
archive_name = "hello" | archive_name = "hello" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
Done Inline ActionsWhat is going on here, and why is it correct ? marmoute: What is going on here, and why is it correct ? | |||||
loader = HgBundle20Loader(url=repo_url, visit_date="2016-05-03 15:16:32+00",) | loader = HgLoaderFromDisk(url=repo_url, visit_date="2016-05-03 15:16:32+00") | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status == {"status": "eventful"} | assert actual_load_status == {"status": "eventful"} | ||||
# then | # then | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats == { | assert stats == { | ||||
"content": 3, | "content": 3, | ||||
Show All 30 Lines | def test_loader_hg_new_visit_with_release(swh_config, datadir, tmp_path): | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
repo_url, | repo_url, | ||||
type=RevisionType.MERCURIAL.value, | type=RevisionType.MERCURIAL.value, | ||||
status="full", | status="full", | ||||
snapshot=expected_snapshot.id, | snapshot=expected_snapshot.id, | ||||
) | ) | ||||
# Ensure archive loader yields the same snapshot | |||||
loader2 = HgArchiveBundle20Loader( | |||||
url=archive_path, | |||||
archive_path=archive_path, | |||||
visit_date="2016-05-03 15:16:32+00", | |||||
) | |||||
actual_load_status = loader2.load() | |||||
assert actual_load_status == {"status": "eventful"} | |||||
stats2 = get_stats(loader2.storage) | |||||
expected_stats = copy.deepcopy(stats) | |||||
expected_stats["origin"] += 1 | |||||
expected_stats["origin_visit"] += 1 | |||||
assert stats2 == expected_stats | |||||
# That visit yields the same snapshot | |||||
assert_last_visit_matches( | |||||
loader2.storage, | |||||
archive_path, | |||||
status="full", | |||||
type="hg", | |||||
snapshot=expected_snapshot.id, | |||||
) | |||||
def test_visit_with_archive_decompression_failure(swh_config, mocker, datadir): | |||||
"""Failure to decompress should fail early, no data is ingested""" | |||||
mock_patoo = mocker.patch("swh.loader.mercurial.archive_extract.patoolib") | |||||
mock_patoo.side_effect = ValueError | |||||
archive_name = "hello" | |||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | |||||
# Ensure archive loader yields the same snapshot | |||||
loader = HgArchiveBundle20Loader( | |||||
url=archive_path, visit_date="2016-05-03 15:16:32+00", | |||||
) | |||||
actual_load_status = loader.load() | |||||
assert actual_load_status == {"status": "failed"} | |||||
stats = get_stats(loader.storage) | |||||
assert stats == { | |||||
"content": 0, | |||||
"directory": 0, | |||||
"origin": 1, | |||||
"origin_visit": 1, | |||||
"release": 0, | |||||
"revision": 0, | |||||
"skipped_content": 0, | |||||
"snapshot": 0, | |||||
} | |||||
# That visit yields the same snapshot | |||||
assert_last_visit_matches( | |||||
loader.storage, archive_path, status="partial", type="hg", snapshot=None | |||||
) | |||||
# This test has as been adapted from the historical `HgBundle20Loader` tests | |||||
# to ensure compatibility of `HgLoaderFromDisk`. | |||||
# Hashes as been produced by copy pasting the result of the implementation | |||||
# to prevent regressions. | |||||
def test_visit_repository_with_transplant_operations(swh_config, datadir, tmp_path): | def test_visit_repository_with_transplant_operations(swh_config, datadir, tmp_path): | ||||
"""Visit a mercurial repository visit transplant operations within should yield a | """Visit a mercurial repository visit transplant operations within should yield a | ||||
snapshot as well. | snapshot as well. | ||||
""" | """ | ||||
archive_name = "transplant" | archive_name = "transplant" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = HgBundle20Loader(url=repo_url, visit_date="2019-05-23 12:06:00+00",) | |||||
loader = HgLoaderFromDisk(url=repo_url, visit_date="2016-05-03 15:16:32+00") | |||||
# load hg repository | # load hg repository | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status == {"status": "eventful"} | assert actual_load_status == {"status": "eventful"} | ||||
# collect swh revisions | # collect swh revisions | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, repo_url, type=RevisionType.MERCURIAL.value, status="full" | loader.storage, repo_url, type=RevisionType.MERCURIAL.value, status="full" | ||||
Show All 14 Lines | for rev in loader.storage.revision_log(revisions): | ||||
for k, v in rev["extra_headers"]: | for k, v in rev["extra_headers"]: | ||||
if k == b"transplant_source": | if k == b"transplant_source": | ||||
transplant_sources.add(v.decode("ascii")) | transplant_sources.add(v.decode("ascii")) | ||||
# check extracted data are valid | # check extracted data are valid | ||||
assert len(hg_changesets) > 0 | assert len(hg_changesets) > 0 | ||||
assert len(transplant_sources) > 0 | assert len(transplant_sources) > 0 | ||||
assert transplant_sources.issubset(hg_changesets) | assert transplant_sources.issubset(hg_changesets) | ||||
def test_clone_with_timeout_timeout(caplog, tmp_path, monkeypatch): | |||||
log = logging.getLogger("test_clone_with_timeout") | |||||
def clone_timeout(source, dest): | |||||
time.sleep(60) | |||||
monkeypatch.setattr(hglib, "clone", clone_timeout) | |||||
with pytest.raises(CloneTimeoutError): | |||||
HgBundle20Loader.clone_with_timeout( | |||||
log, "https://www.mercurial-scm.org/repo/hello", tmp_path, 1 | |||||
) | |||||
for record in caplog.records: | |||||
assert record.levelname == "WARNING" | |||||
assert "https://www.mercurial-scm.org/repo/hello" in record.getMessage() | |||||
assert record.args == ("https://www.mercurial-scm.org/repo/hello", 1) | |||||
def test_clone_with_timeout_returns(caplog, tmp_path, monkeypatch): | |||||
log = logging.getLogger("test_clone_with_timeout") | |||||
def clone_return(source, dest): | |||||
return (source, dest) | |||||
monkeypatch.setattr(hglib, "clone", clone_return) | |||||
assert HgBundle20Loader.clone_with_timeout( | |||||
log, "https://www.mercurial-scm.org/repo/hello", tmp_path, 1 | |||||
) == ("https://www.mercurial-scm.org/repo/hello", tmp_path) | |||||
def test_clone_with_timeout_exception(caplog, tmp_path, monkeypatch): | |||||
log = logging.getLogger("test_clone_with_timeout") | |||||
def clone_return(source, dest): | |||||
raise ValueError("Test exception") | |||||
monkeypatch.setattr(hglib, "clone", clone_return) | |||||
with pytest.raises(ValueError) as excinfo: | |||||
HgBundle20Loader.clone_with_timeout( | |||||
log, "https://www.mercurial-scm.org/repo/hello", tmp_path, 1 | |||||
) | |||||
assert "Test exception" in excinfo.value.args[0] |
This need a module documentation.