Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/tests/test_loader.py
# Copyright (C) 2018-2020 The Software Heritage developers | # Copyright (C) 2018-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import copy | |||||
import logging | import logging | ||||
import os | import os | ||||
import time | import time | ||||
from typing import Any, Dict | |||||
from unittest.mock import patch | |||||
import hglib | import hglib | ||||
import pytest | import pytest | ||||
from swh.model import hashutil | from swh.model import hashutil | ||||
from swh.model.model import RevisionType | from swh.model.model import RevisionType | ||||
from swh.loader.core.tests import BaseLoaderTest | |||||
from swh.storage.algos.snapshot import snapshot_get_latest | from swh.storage.algos.snapshot import snapshot_get_latest | ||||
from swh.loader.tests import ( | from swh.loader.tests import ( | ||||
assert_last_visit_matches, | assert_last_visit_matches, | ||||
check_snapshot, | check_snapshot, | ||||
get_stats, | get_stats, | ||||
prepare_repository_from_archive, | prepare_repository_from_archive, | ||||
) | ) | ||||
from .common import HgLoaderMemoryStorage, HgArchiveLoaderMemoryStorage | from ..loader import HgBundle20Loader, HgArchiveBundle20Loader, CloneTimeoutError | ||||
from ..loader import HgBundle20Loader, CloneTimeoutError | |||||
class BaseHgLoaderTest(BaseLoaderTest): | |||||
"""Mixin base loader test to prepare the mercurial | |||||
repository to uncompress, load and test the results. | |||||
""" | |||||
def setUp( | |||||
self, | |||||
archive_name="the-sandbox.tgz", | |||||
filename="the-sandbox", | |||||
uncompress_archive=True, | |||||
): | |||||
super().setUp( | |||||
archive_name=archive_name, | |||||
filename=filename, | |||||
prefix_tmp_folder_name="swh.loader.mercurial.", | |||||
start_path=os.path.dirname(__file__), | |||||
uncompress_archive=uncompress_archive, | |||||
) | |||||
def test_loader_hg_new_visit(swh_config, datadir, tmp_path): | def test_loader_hg_new_visit_no_release(swh_config, datadir, tmp_path): | ||||
"""Eventful visit should yield 1 snapshot""" | """Eventful visit should yield 1 snapshot""" | ||||
archive_name = "the-sandbox" | archive_name = "the-sandbox" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = HgBundle20Loader(repo_url) | loader = HgBundle20Loader(repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
Show All 27 Lines | expected_snapshot = { | ||||
"develop": {"target": tip_revision_develop, "target_type": "revision"}, | "develop": {"target": tip_revision_develop, "target_type": "revision"}, | ||||
"default": {"target": tip_revision_default, "target_type": "revision"}, | "default": {"target": tip_revision_default, "target_type": "revision"}, | ||||
"HEAD": {"target": "develop", "target_type": "alias",}, | "HEAD": {"target": "develop", "target_type": "alias",}, | ||||
}, | }, | ||||
} | } | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(expected_snapshot, loader.storage) | ||||
# Ensure archive loader yields the same snapshot | |||||
loader2 = HgArchiveBundle20Loader( | |||||
url=archive_path, | |||||
archive_path=archive_path, | |||||
visit_date="2016-05-03 15:16:32+00", | |||||
) | |||||
actual_load_status = loader2.load() | |||||
assert actual_load_status == {"status": "eventful"} | |||||
stats2 = get_stats(loader2.storage) | |||||
expected_stats = copy.deepcopy(stats) | |||||
expected_stats["origin"] += 1 | |||||
expected_stats["origin_visit"] += 1 | |||||
assert stats2 == expected_stats | |||||
# That visit yields the same snapshot | |||||
assert_last_visit_matches( | |||||
loader2.storage, | |||||
archive_path, | |||||
status="full", | |||||
type="hg", | |||||
snapshot=hashutil.hash_to_bytes("3b8fe58e467deb7597b12a5fd3b2c096b8c02028"), | |||||
) | |||||
def test_loader_hg_new_visit_with_release(swh_config, datadir, tmp_path): | |||||
"""Eventful visit with release should yield 1 snapshot""" | |||||
archive_name = "hello" | |||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | |||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | |||||
loader = HgBundle20Loader(url=repo_url, visit_date="2016-05-03 15:16:32+00",) | |||||
actual_load_status = loader.load() | |||||
assert actual_load_status == {"status": "eventful"} | |||||
class CommonHgLoaderData: | |||||
def assert_data_ok(self, actual_load_status: Dict[str, Any]): | |||||
# then | # then | ||||
self.assertCountContents(3) # type: ignore | stats = get_stats(loader.storage) | ||||
self.assertCountDirectories(3) # type: ignore | assert stats == { | ||||
self.assertCountReleases(1) # type: ignore | "content": 3, | ||||
self.assertCountRevisions(3) # type: ignore | "directory": 3, | ||||
"origin": 1, | |||||
"origin_visit": 1, | |||||
"person": 3, | |||||
"release": 1, | |||||
"revision": 3, | |||||
"skipped_content": 0, | |||||
"snapshot": 1, | |||||
} | |||||
# cf. test_loader.org for explaining from where those hashes | |||||
tip_release = "515c4d72e089404356d0f4b39d60f948b8999140" | tip_release = "515c4d72e089404356d0f4b39d60f948b8999140" | ||||
self.assertReleasesContain([tip_release]) # type: ignore | release = loader.storage.release_get([hashutil.hash_to_bytes(tip_release)]) | ||||
assert release is not None | |||||
tip_revision_default = "c3dbe4fbeaaa98dd961834e4007edb3efb0e2a27" | tip_revision_default = "c3dbe4fbeaaa98dd961834e4007edb3efb0e2a27" | ||||
# cf. test_loader.org for explaining from where those hashes | revision = loader.storage.revision_get( | ||||
# come from | [hashutil.hash_to_bytes(tip_revision_default)] | ||||
expected_revisions = { | ) | ||||
# revision hash | directory hash # noqa | assert revision is not None | ||||
"93b48d515580522a05f389bec93227fc8e43d940": "43d727f2f3f2f7cb3b098ddad1d7038464a4cee2", # noqa | |||||
"8dd3db5d5519e4947f035d141581d304565372d2": "b3f85f210ff86d334575f64cb01c5bf49895b63e", # noqa | |||||
tip_revision_default: "8f2be433c945384c85920a8e60f2a68d2c0f20fb", | |||||
} | |||||
self.assertRevisionsContain(expected_revisions) # type: ignore | |||||
self.assertCountSnapshots(1) # type: ignore | |||||
expected_snapshot_id = "d35668e02e2ba4321dc951cd308cf883786f918a" | |||||
expected_snapshot = { | expected_snapshot = { | ||||
"id": "d35668e02e2ba4321dc951cd308cf883786f918a", | "id": expected_snapshot_id, | ||||
"branches": { | "branches": { | ||||
"default": {"target": tip_revision_default, "target_type": "revision"}, | "default": {"target": tip_revision_default, "target_type": "revision"}, | ||||
"0.1": {"target": tip_release, "target_type": "release"}, | "0.1": {"target": tip_release, "target_type": "release"}, | ||||
"HEAD": {"target": "default", "target_type": "alias",}, | "HEAD": {"target": "default", "target_type": "alias",}, | ||||
}, | }, | ||||
} | } | ||||
self.assertSnapshotEqual(expected_snapshot) # type: ignore | check_snapshot(expected_snapshot, loader.storage) | ||||
assert actual_load_status == {"status": "eventful"} | |||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
self.storage, # type: ignore | loader.storage, repo_url, type=RevisionType.MERCURIAL.value, status="full", | ||||
self.repo_url, # type: ignore | |||||
type=RevisionType.MERCURIAL.value, | |||||
status="full", | |||||
) | ) | ||||
# Ensure archive loader yields the same snapshot | |||||
class WithReleaseLoaderTest(BaseHgLoaderTest, CommonHgLoaderData): | loader2 = HgArchiveBundle20Loader( | ||||
"""Load a mercurial repository with release | url=archive_path, | ||||
archive_path=archive_path, | |||||
""" | |||||
def setUp(self): | |||||
super().setUp(archive_name="hello.tgz", filename="hello") | |||||
self.loader = HgLoaderMemoryStorage( | |||||
url=self.repo_url, | |||||
visit_date="2016-05-03 15:16:32+00", | visit_date="2016-05-03 15:16:32+00", | ||||
directory=self.destination_path, | |||||
) | ) | ||||
self.storage = self.loader.storage | |||||
def test_load(self): | |||||
"""Load a repository with tags results in 1 snapshot | |||||
""" | actual_load_status = loader2.load() | ||||
# when | assert actual_load_status == {"status": "eventful"} | ||||
actual_load_status = self.loader.load() | |||||
self.assert_data_ok(actual_load_status) | |||||
class ArchiveLoaderTest(BaseHgLoaderTest, CommonHgLoaderData): | |||||
"""Load a mercurial repository archive with release | |||||
""" | stats2 = get_stats(loader2.storage) | ||||
expected_stats = copy.deepcopy(stats) | |||||
expected_stats["origin"] += 1 | |||||
expected_stats["origin_visit"] += 1 | |||||
assert stats2 == expected_stats | |||||
def setUp(self): | # That visit yields the same snapshot | ||||
super().setUp( | assert_last_visit_matches( | ||||
archive_name="hello.tgz", filename="hello", uncompress_archive=False | loader2.storage, | ||||
) | archive_path, | ||||
self.loader = HgArchiveLoaderMemoryStorage( | status="full", | ||||
url=self.repo_url, | type="hg", | ||||
visit_date="2016-05-03 15:16:32+00", | snapshot=hashutil.hash_to_bytes(expected_snapshot_id), | ||||
archive_path=self.destination_path, | |||||
) | ) | ||||
self.storage = self.loader.storage | |||||
def test_load(self): | |||||
"""Load a mercurial repository archive with tags results in 1 snapshot | |||||
""" | |||||
# when | |||||
actual_load_status = self.loader.load() | |||||
self.assert_data_ok(actual_load_status) | |||||
@patch("swh.loader.mercurial.archive_extract.patoolib") | def test_visit_with_archive_decompression_failure(swh_config, mocker, datadir): | ||||
def test_load_with_failure(self, mock_patoo): | """Failure to decompress should fail early, no data is ingested""" | ||||
mock_patoo = mocker.patch("swh.loader.mercurial.archive_extract.patoolib") | |||||
mock_patoo.side_effect = ValueError | mock_patoo.side_effect = ValueError | ||||
# when | archive_name = "hello" | ||||
r = self.loader.load() | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
# Ensure archive loader yields the same snapshot | |||||
loader = HgArchiveBundle20Loader( | |||||
url=archive_path, visit_date="2016-05-03 15:16:32+00", | |||||
) | |||||
self.assertEqual(r, {"status": "failed"}) | actual_load_status = loader.load() | ||||
self.assertCountContents(0) | assert actual_load_status == {"status": "failed"} | ||||
self.assertCountDirectories(0) | |||||
self.assertCountRevisions(0) | stats = get_stats(loader.storage) | ||||
self.assertCountReleases(0) | assert stats == { | ||||
self.assertCountSnapshots(0) | "content": 0, | ||||
"directory": 0, | |||||
"origin": 1, | |||||
"origin_visit": 1, | |||||
"person": 0, | |||||
"release": 0, | |||||
"revision": 0, | |||||
"skipped_content": 0, | |||||
"snapshot": 0, | |||||
} | |||||
# That visit yields the same snapshot | |||||
assert_last_visit_matches( | |||||
loader.storage, archive_path, status="partial", type="hg", snapshot=None | |||||
) | |||||
class WithTransplantLoaderTest(BaseHgLoaderTest): | def test_visit_repository_with_transplant_operations(swh_config, datadir, tmp_path): | ||||
"""Load a mercurial repository where transplant operations | """Visit a mercurial repository visit transplant operations within should yield a | ||||
have been used. | snapshot as well. | ||||
ardumont: I don't recall exactly what exactly was the bug about but that's the scenario to ensure we… | |||||
Not Done Inline ActionsTest context: T1729 anlambert: Test context: T1729 | |||||
Done Inline ActionsThank you, sir ;) ardumont: Thank you, sir ;) | |||||
""" | """ | ||||
def setUp(self): | archive_name = "transplant" | ||||
super().setUp(archive_name="transplant.tgz", filename="transplant") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
self.loader = HgLoaderMemoryStorage( | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
url=self.repo_url, | loader = HgBundle20Loader(url=repo_url, visit_date="2019-05-23 12:06:00+00",) | ||||
visit_date="2019-05-23 12:06:00+00", | |||||
directory=self.destination_path, | |||||
) | |||||
self.storage = self.loader.storage | |||||
def test_load(self): | |||||
# load hg repository | # load hg repository | ||||
actual_load_status = self.loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status == {"status": "eventful"} | assert actual_load_status == {"status": "eventful"} | ||||
# collect swh revisions | # collect swh revisions | ||||
origin_url = self.storage.origin_get([{"type": "hg", "url": self.repo_url}])[0][ | |||||
"url" | |||||
] | |||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
self.storage, origin_url, type=RevisionType.MERCURIAL.value, status="full" | loader.storage, repo_url, type=RevisionType.MERCURIAL.value, status="full" | ||||
) | ) | ||||
revisions = [] | revisions = [] | ||||
snapshot = snapshot_get_latest(self.storage, origin_url) | snapshot = snapshot_get_latest(loader.storage, repo_url) | ||||
for branch in snapshot.branches.values(): | for branch in snapshot.branches.values(): | ||||
if branch.target_type.value != "revision": | if branch.target_type.value != "revision": | ||||
continue | continue | ||||
revisions.append(branch.target) | revisions.append(branch.target) | ||||
# extract original changesets info and the transplant sources | # extract original changesets info and the transplant sources | ||||
hg_changesets = set() | hg_changesets = set() | ||||
transplant_sources = set() | transplant_sources = set() | ||||
for rev in self.storage.revision_log(revisions): | for rev in loader.storage.revision_log(revisions): | ||||
hg_changesets.add(rev["metadata"]["node"]) | hg_changesets.add(rev["metadata"]["node"]) | ||||
for k, v in rev["metadata"]["extra_headers"]: | for k, v in rev["metadata"]["extra_headers"]: | ||||
if k == "transplant_source": | if k == "transplant_source": | ||||
transplant_sources.add(v.decode("ascii")) | transplant_sources.add(v.decode("ascii")) | ||||
# check extracted data are valid | # check extracted data are valid | ||||
self.assertTrue(len(hg_changesets) > 0) | assert len(hg_changesets) > 0 | ||||
self.assertTrue(len(transplant_sources) > 0) | assert len(transplant_sources) > 0 | ||||
self.assertTrue(transplant_sources.issubset(hg_changesets)) | assert transplant_sources.issubset(hg_changesets) | ||||
def test_clone_with_timeout_timeout(caplog, tmp_path, monkeypatch): | def test_clone_with_timeout_timeout(caplog, tmp_path, monkeypatch): | ||||
log = logging.getLogger("test_clone_with_timeout") | log = logging.getLogger("test_clone_with_timeout") | ||||
def clone_timeout(source, dest): | def clone_timeout(source, dest): | ||||
time.sleep(60) | time.sleep(60) | ||||
Show All 39 Lines |
I don't recall exactly what exactly was the bug about but that's the scenario to ensure we don't fail on it again [1]
[1] https://www.mercurial-scm.org/wiki/TransplantExtension