diff --git a/swh/loader/mercurial/tests/data/example.json b/swh/loader/mercurial/tests/data/example.json new file mode 100644 index 0000000..67ba3fa --- /dev/null +++ b/swh/loader/mercurial/tests/data/example.json @@ -0,0 +1 @@ +{"directories": ["048960a9eff9a9f22ce2fc2e2bc9b5f73cdfc26a", "09a1bb68db049b4e37540e52ebde76f59126b3a8", "0dad640e1eb9f31cb9d874158318f1f180be9b3a", "181a22e7ad8bbad9bb5846f51c377a7597a0c914", "218ccb1594f7026492c72309974b44aba353d7dc", "93e88b135dc8c3420cd4984e21d8d1eb2781ddce", "d476a11ddfcfce07236a0a03f78e3c1a73bc20ae", "ecf37a29314efe473b399b700c7e5eacc063ba6e", "fa5e6af79e30fc26ab4acbd96388fde22b4c2f36"], "revisions": ["1171aa960a675f8b8327199ff084b6e7c879361d", "23459c9c498542cde67d8d130bc4c0b3084edf5f", "486d227d252ee8a3a01ef40348964f68e21018a5", "65de9d553502aa1f1cb20df179a6ea04e6d2039e", "8f392d7f64419bf4672a75a07f61ce243a7f2c67", "acd77cd84bfca51b3d4f928109a9de52a45618f2", "bb1befca13ceb1a8ebde25cec05966be3eed9bca", "bf91ae31bdb938c2927e741b53af815380340ea7", "c88ea3f5892a5e726739a10eca3afe5d7fa648ce"], "releases": [], "snapshot": "831e126d30afd81b62030547778225577fe5d0d6"} \ No newline at end of file diff --git a/swh/loader/mercurial/tests/data/example.sh b/swh/loader/mercurial/tests/data/example.sh new file mode 100644 index 0000000..4019e4f --- /dev/null +++ b/swh/loader/mercurial/tests/data/example.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +set -euo pipefail + +# TODO HG_REPO from $1 else from environment +if [ ! -z "$1" ]; then + HG_REPO="$1" +fi + +# prepare repository +hg init "$HG_REPO" +cd "$HG_REPO" +cat > .hg/hgrc << EOL + [ui] + username = Full Name +EOL + +# simple file +echo "# My Project" >> README.md +hg add README.md +hg commit -m "Add README" + +# file with modification +echo "Short project description." >> README.md +hg commit -m "Add project description" + +# file in directory +mkdir -p myproject +echo '__version__ = "0.0.1"' >> myproject/__init__.py +hg add myproject/__init__.py +hg commit -m "Create python package" + +# public changesets +hg phase --public -r ::. + +# closed branch +hg branch v0.0.2 +echo '__version__ = "0.0.2"' > myproject/__init__.py +hg commit -m "Bump version to 0.0.2" +hg update default +echo "# This is the CLI module" >> myproject/cli.py +hg add myproject/cli.py +hg commit -m "Create myproject.cli module" +hg update v0.0.2 +hg merge -r default +hg commit --close-branch -m "Close branch v0.0.2" +hg update default + +# living branch +hg branch v0.1.x +echo '__version__ = "0.1.0"' > myproject/__init__.py +hg commit -m "Bump version to 0.1.0" +hg update default +echo "# This is the utils module" >> myproject/utils.py +hg add myproject/utils.py +hg commit -m "Create myproject.utils module" +hg update v0.1.x +hg merge -r default +hg commit -m "Merge default" +hg update default diff --git a/swh/loader/mercurial/tests/data/example.tgz b/swh/loader/mercurial/tests/data/example.tgz new file mode 100644 index 0000000..0790608 Binary files /dev/null and b/swh/loader/mercurial/tests/data/example.tgz differ diff --git a/swh/loader/mercurial/tests/test_loader.py b/swh/loader/mercurial/tests/test_loader.py index 9ca6e7c..c84d27e 100644 --- a/swh/loader/mercurial/tests/test_loader.py +++ b/swh/loader/mercurial/tests/test_loader.py @@ -1,310 +1,310 @@ # Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import copy import logging import os import time import hglib import pytest from swh.loader.tests import ( assert_last_visit_matches, check_snapshot, get_stats, prepare_repository_from_archive, ) from swh.model.hashutil import hash_to_bytes from swh.model.model import RevisionType, Snapshot, SnapshotBranch, TargetType from swh.storage.algos.snapshot import snapshot_get_latest from ..loader import CloneTimeoutError, HgArchiveBundle20Loader, HgBundle20Loader from .loader_checker import ExpectedSwhids, LoaderChecker def test_examples(swh_config, datadir, tmp_path): - for archive_name in ("hello", "transplant", "the-sandbox"): + for archive_name in ("hello", "transplant", "the-sandbox", "example"): archive_path = os.path.join(datadir, f"{archive_name}.tgz") json_path = os.path.join(datadir, f"{archive_name}.json") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) LoaderChecker( loader=HgBundle20Loader(repo_url), expected=ExpectedSwhids.load(json_path), ).check() def test_loader_hg_new_visit_no_release(swh_config, datadir, tmp_path): """Eventful visit should yield 1 snapshot""" archive_name = "the-sandbox" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) loader = HgBundle20Loader(repo_url) assert loader.load() == {"status": "eventful"} tip_revision_develop = "a9c4534552df370f43f0ef97146f393ef2f2a08c" tip_revision_default = "70e750bb046101fdced06f428e73fee471509c56" expected_snapshot = Snapshot( id=hash_to_bytes("3b8fe58e467deb7597b12a5fd3b2c096b8c02028"), branches={ b"develop": SnapshotBranch( target=hash_to_bytes(tip_revision_develop), target_type=TargetType.REVISION, ), b"default": SnapshotBranch( target=hash_to_bytes(tip_revision_default), target_type=TargetType.REVISION, ), b"HEAD": SnapshotBranch(target=b"develop", target_type=TargetType.ALIAS,), }, ) assert_last_visit_matches( loader.storage, repo_url, status="full", type="hg", snapshot=expected_snapshot.id, ) check_snapshot(expected_snapshot, loader.storage) stats = get_stats(loader.storage) assert stats == { "content": 2, "directory": 3, "origin": 1, "origin_visit": 1, "release": 0, "revision": 58, "skipped_content": 0, "snapshot": 1, } # Ensure archive loader yields the same snapshot loader2 = HgArchiveBundle20Loader( url=archive_path, archive_path=archive_path, visit_date="2016-05-03 15:16:32+00", ) actual_load_status = loader2.load() assert actual_load_status == {"status": "eventful"} stats2 = get_stats(loader2.storage) expected_stats = copy.deepcopy(stats) expected_stats["origin"] += 1 expected_stats["origin_visit"] += 1 assert stats2 == expected_stats # That visit yields the same snapshot assert_last_visit_matches( loader2.storage, archive_path, status="full", type="hg", snapshot=expected_snapshot.id, ) def test_loader_hg_new_visit_with_release(swh_config, datadir, tmp_path): """Eventful visit with release should yield 1 snapshot""" archive_name = "hello" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) loader = HgBundle20Loader(url=repo_url, visit_date="2016-05-03 15:16:32+00",) actual_load_status = loader.load() assert actual_load_status == {"status": "eventful"} # then stats = get_stats(loader.storage) assert stats == { "content": 3, "directory": 3, "origin": 1, "origin_visit": 1, "release": 1, "revision": 3, "skipped_content": 0, "snapshot": 1, } # cf. test_loader.org for explaining from where those hashes tip_release = hash_to_bytes("515c4d72e089404356d0f4b39d60f948b8999140") release = loader.storage.release_get([tip_release])[0] assert release is not None tip_revision_default = hash_to_bytes("c3dbe4fbeaaa98dd961834e4007edb3efb0e2a27") revision = loader.storage.revision_get([tip_revision_default])[0] assert revision is not None expected_snapshot = Snapshot( id=hash_to_bytes("d35668e02e2ba4321dc951cd308cf883786f918a"), branches={ b"default": SnapshotBranch( target=tip_revision_default, target_type=TargetType.REVISION, ), b"0.1": SnapshotBranch(target=tip_release, target_type=TargetType.RELEASE,), b"HEAD": SnapshotBranch(target=b"default", target_type=TargetType.ALIAS,), }, ) check_snapshot(expected_snapshot, loader.storage) assert_last_visit_matches( loader.storage, repo_url, type=RevisionType.MERCURIAL.value, status="full", snapshot=expected_snapshot.id, ) # Ensure archive loader yields the same snapshot loader2 = HgArchiveBundle20Loader( url=archive_path, archive_path=archive_path, visit_date="2016-05-03 15:16:32+00", ) actual_load_status = loader2.load() assert actual_load_status == {"status": "eventful"} stats2 = get_stats(loader2.storage) expected_stats = copy.deepcopy(stats) expected_stats["origin"] += 1 expected_stats["origin_visit"] += 1 assert stats2 == expected_stats # That visit yields the same snapshot assert_last_visit_matches( loader2.storage, archive_path, status="full", type="hg", snapshot=expected_snapshot.id, ) def test_visit_with_archive_decompression_failure(swh_config, mocker, datadir): """Failure to decompress should fail early, no data is ingested""" mock_patoo = mocker.patch("swh.loader.mercurial.archive_extract.patoolib") mock_patoo.side_effect = ValueError archive_name = "hello" archive_path = os.path.join(datadir, f"{archive_name}.tgz") # Ensure archive loader yields the same snapshot loader = HgArchiveBundle20Loader( url=archive_path, visit_date="2016-05-03 15:16:32+00", ) actual_load_status = loader.load() assert actual_load_status == {"status": "failed"} stats = get_stats(loader.storage) assert stats == { "content": 0, "directory": 0, "origin": 1, "origin_visit": 1, "release": 0, "revision": 0, "skipped_content": 0, "snapshot": 0, } # That visit yields the same snapshot assert_last_visit_matches( loader.storage, archive_path, status="partial", type="hg", snapshot=None ) def test_visit_repository_with_transplant_operations(swh_config, datadir, tmp_path): """Visit a mercurial repository visit transplant operations within should yield a snapshot as well. """ archive_name = "transplant" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) loader = HgBundle20Loader(url=repo_url, visit_date="2019-05-23 12:06:00+00",) # load hg repository actual_load_status = loader.load() assert actual_load_status == {"status": "eventful"} # collect swh revisions assert_last_visit_matches( loader.storage, repo_url, type=RevisionType.MERCURIAL.value, status="full" ) revisions = [] snapshot = snapshot_get_latest(loader.storage, repo_url) for branch in snapshot.branches.values(): if branch.target_type.value != "revision": continue revisions.append(branch.target) # extract original changesets info and the transplant sources hg_changesets = set() transplant_sources = set() for rev in loader.storage.revision_log(revisions): hg_changesets.add(rev["metadata"]["node"]) for k, v in rev["extra_headers"]: if k == b"transplant_source": transplant_sources.add(v.decode("ascii")) # check extracted data are valid assert len(hg_changesets) > 0 assert len(transplant_sources) > 0 assert transplant_sources.issubset(hg_changesets) def test_clone_with_timeout_timeout(caplog, tmp_path, monkeypatch): log = logging.getLogger("test_clone_with_timeout") def clone_timeout(source, dest): time.sleep(60) monkeypatch.setattr(hglib, "clone", clone_timeout) with pytest.raises(CloneTimeoutError): HgBundle20Loader.clone_with_timeout( log, "https://www.mercurial-scm.org/repo/hello", tmp_path, 1 ) for record in caplog.records: assert record.levelname == "WARNING" assert "https://www.mercurial-scm.org/repo/hello" in record.getMessage() assert record.args == ("https://www.mercurial-scm.org/repo/hello", 1) def test_clone_with_timeout_returns(caplog, tmp_path, monkeypatch): log = logging.getLogger("test_clone_with_timeout") def clone_return(source, dest): return (source, dest) monkeypatch.setattr(hglib, "clone", clone_return) assert HgBundle20Loader.clone_with_timeout( log, "https://www.mercurial-scm.org/repo/hello", tmp_path, 1 ) == ("https://www.mercurial-scm.org/repo/hello", tmp_path) def test_clone_with_timeout_exception(caplog, tmp_path, monkeypatch): log = logging.getLogger("test_clone_with_timeout") def clone_return(source, dest): raise ValueError("Test exception") monkeypatch.setattr(hglib, "clone", clone_return) with pytest.raises(ValueError) as excinfo: HgBundle20Loader.clone_with_timeout( log, "https://www.mercurial-scm.org/repo/hello", tmp_path, 1 ) assert "Test exception" in excinfo.value.args[0]