Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/tests/test_from_disk.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | |||||
from datetime import datetime | from datetime import datetime | ||||
from hashlib import sha1 | from hashlib import sha1 | ||||
import os | |||||
from swh.loader.mercurial.utils import parse_visit_date | |||||
from swh.loader.tests import ( | from swh.loader.tests import ( | ||||
assert_last_visit_matches, | assert_last_visit_matches, | ||||
check_snapshot, | check_snapshot, | ||||
get_stats, | get_stats, | ||||
prepare_repository_from_archive, | prepare_repository_from_archive, | ||||
) | ) | ||||
from swh.model.from_disk import Content, DentryPerms | from swh.model.from_disk import Content, DentryPerms | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import RevisionType, Snapshot, SnapshotBranch, TargetType | from swh.model.model import RevisionType, Snapshot, SnapshotBranch, TargetType | ||||
from swh.storage.algos.snapshot import snapshot_get_latest | from swh.storage.algos.snapshot import snapshot_get_latest | ||||
from ..from_disk import HgDirectory, HgLoaderFromDisk | from ..from_disk import HgDirectory, HgLoaderFromDisk | ||||
from .loader_checker import ExpectedSwhids, LoaderChecker | from .loader_checker import ExpectedSwhids, LoaderChecker | ||||
VISIT_DATE = parse_visit_date("2016-05-03 15:16:32+00") | |||||
assert VISIT_DATE is not None | |||||
def random_content() -> Content: | def random_content() -> Content: | ||||
"""Create minimal content object.""" | """Create minimal content object.""" | ||||
data = str(datetime.now()).encode() | data = str(datetime.now()).encode() | ||||
return Content({"sha1_git": sha1(data).digest(), "perms": DentryPerms.content}) | return Content({"sha1_git": sha1(data).digest(), "perms": DentryPerms.content}) | ||||
def test_hg_directory_creates_missing_directories(): | def test_hg_directory_creates_missing_directories(): | ||||
Show All 36 Lines | |||||
# produced by the `swh-hg-identify` command line utility. | # produced by the `swh-hg-identify` command line utility. | ||||
# | # | ||||
# It has more granularity than historical tests. | # It has more granularity than historical tests. | ||||
# Assertions will tell if the error comes from the directories | # Assertions will tell if the error comes from the directories | ||||
# revisions or release rather than only checking the snapshot. | # revisions or release rather than only checking the snapshot. | ||||
# | # | ||||
# With more work it should event be possible to know which part | # With more work it should event be possible to know which part | ||||
# of an object is faulty. | # of an object is faulty. | ||||
def test_examples(swh_config, datadir, tmp_path): | def test_examples(swh_storage, datadir, tmp_path): | ||||
for archive_name in ("hello", "transplant", "the-sandbox", "example"): | for archive_name in ("hello", "transplant", "the-sandbox", "example"): | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
json_path = os.path.join(datadir, f"{archive_name}.json") | json_path = os.path.join(datadir, f"{archive_name}.json") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
LoaderChecker( | LoaderChecker( | ||||
loader=HgLoaderFromDisk(repo_url), expected=ExpectedSwhids.load(json_path), | loader=HgLoaderFromDisk(swh_storage, repo_url), | ||||
expected=ExpectedSwhids.load(json_path), | |||||
).check() | ).check() | ||||
# This test has as been adapted from the historical `HgBundle20Loader` tests | # This test has as been adapted from the historical `HgBundle20Loader` tests | ||||
# to ensure compatibility of `HgLoaderFromDisk`. | # to ensure compatibility of `HgLoaderFromDisk`. | ||||
# Hashes as been produced by copy pasting the result of the implementation | # Hashes as been produced by copy pasting the result of the implementation | ||||
# to prevent regressions. | # to prevent regressions. | ||||
def test_loader_hg_new_visit_no_release(swh_config, datadir, tmp_path): | def test_loader_hg_new_visit_no_release(swh_storage, datadir, tmp_path): | ||||
"""Eventful visit should yield 1 snapshot""" | """Eventful visit should yield 1 snapshot""" | ||||
archive_name = "the-sandbox" | archive_name = "the-sandbox" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = HgLoaderFromDisk(url=repo_url) | loader = HgLoaderFromDisk(swh_storage, url=repo_url) | ||||
assert loader.load() == {"status": "eventful"} | assert loader.load() == {"status": "eventful"} | ||||
tip_revision_develop = "a9c4534552df370f43f0ef97146f393ef2f2a08c" | tip_revision_develop = "a9c4534552df370f43f0ef97146f393ef2f2a08c" | ||||
tip_revision_default = "70e750bb046101fdced06f428e73fee471509c56" | tip_revision_default = "70e750bb046101fdced06f428e73fee471509c56" | ||||
expected_snapshot = Snapshot( | expected_snapshot = Snapshot( | ||||
id=hash_to_bytes("3b8fe58e467deb7597b12a5fd3b2c096b8c02028"), | id=hash_to_bytes("3b8fe58e467deb7597b12a5fd3b2c096b8c02028"), | ||||
branches={ | branches={ | ||||
Show All 30 Lines | assert stats == { | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} | } | ||||
# This test has as been adapted from the historical `HgBundle20Loader` tests | # This test has as been adapted from the historical `HgBundle20Loader` tests | ||||
# to ensure compatibility of `HgLoaderFromDisk`. | # to ensure compatibility of `HgLoaderFromDisk`. | ||||
# Hashes as been produced by copy pasting the result of the implementation | # Hashes as been produced by copy pasting the result of the implementation | ||||
# to prevent regressions. | # to prevent regressions. | ||||
def test_loader_hg_new_visit_with_release(swh_config, datadir, tmp_path): | def test_loader_hg_new_visit_with_release(swh_storage, datadir, tmp_path): | ||||
"""Eventful visit with release should yield 1 snapshot""" | """Eventful visit with release should yield 1 snapshot""" | ||||
archive_name = "hello" | archive_name = "hello" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = HgLoaderFromDisk(url=repo_url, visit_date="2016-05-03 15:16:32+00") | loader = HgLoaderFromDisk(swh_storage, url=repo_url, visit_date=VISIT_DATE,) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status == {"status": "eventful"} | assert actual_load_status == {"status": "eventful"} | ||||
# then | # then | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert stats == { | assert stats == { | ||||
"content": 3, | "content": 3, | ||||
Show All 35 Lines | assert_last_visit_matches( | ||||
snapshot=expected_snapshot.id, | snapshot=expected_snapshot.id, | ||||
) | ) | ||||
# This test has as been adapted from the historical `HgBundle20Loader` tests | # This test has as been adapted from the historical `HgBundle20Loader` tests | ||||
# to ensure compatibility of `HgLoaderFromDisk`. | # to ensure compatibility of `HgLoaderFromDisk`. | ||||
# Hashes as been produced by copy pasting the result of the implementation | # Hashes as been produced by copy pasting the result of the implementation | ||||
# to prevent regressions. | # to prevent regressions. | ||||
def test_visit_repository_with_transplant_operations(swh_config, datadir, tmp_path): | def test_visit_repository_with_transplant_operations(swh_storage, datadir, tmp_path): | ||||
"""Visit a mercurial repository visit transplant operations within should yield a | """Visit a mercurial repository visit transplant operations within should yield a | ||||
snapshot as well. | snapshot as well. | ||||
""" | """ | ||||
archive_name = "transplant" | archive_name = "transplant" | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
loader = HgLoaderFromDisk(url=repo_url, visit_date="2016-05-03 15:16:32+00") | loader = HgLoaderFromDisk(swh_storage, url=repo_url, visit_date=VISIT_DATE,) | ||||
# load hg repository | # load hg repository | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status == {"status": "eventful"} | assert actual_load_status == {"status": "eventful"} | ||||
# collect swh revisions | # collect swh revisions | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, repo_url, type=RevisionType.MERCURIAL.value, status="full" | loader.storage, repo_url, type=RevisionType.MERCURIAL.value, status="full" | ||||
Show All 22 Lines |