Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/tests/test_from_disk.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | import os | ||||
from datetime import datetime | |||||
from hashlib import sha1 | |||||
from swh.loader.tests import ( | from swh.loader.tests import ( | ||||
assert_last_visit_matches, | assert_last_visit_matches, | ||||
check_snapshot, | check_snapshot, | ||||
get_stats, | get_stats, | ||||
prepare_repository_from_archive, | prepare_repository_from_archive, | ||||
) | ) | ||||
from swh.model.from_disk import Content | from swh.model.from_disk import Content, DentryPerms | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import RevisionType, Snapshot, SnapshotBranch, TargetType | from swh.model.model import RevisionType, Snapshot, SnapshotBranch, TargetType | ||||
from swh.storage.algos.snapshot import snapshot_get_latest | from swh.storage.algos.snapshot import snapshot_get_latest | ||||
from ..from_disk import HgDirectory, HgLoaderFromDisk | from ..from_disk import HgDirectory, HgLoaderFromDisk | ||||
from .loader_checker import ExpectedSwhids, LoaderChecker | from .loader_checker import ExpectedSwhids, LoaderChecker | ||||
def random_content() -> Content: | |||||
"""Create minimal content object.""" | |||||
data = str(datetime.now()).encode() | |||||
return Content({"sha1_git": sha1(data).digest(), "perms": DentryPerms.content}) | |||||
def test_hg_directory_creates_missing_directories(): | def test_hg_directory_creates_missing_directories(): | ||||
directory = HgDirectory() | directory = HgDirectory() | ||||
directory[b"path/to/some/content"] = Content() | directory[b"path/to/some/content"] = random_content() | ||||
def test_hg_directory_get(): | |||||
content = random_content() | |||||
directory = HgDirectory() | |||||
assert directory.get(b"path/to/content") is None | |||||
assert directory.get(b"path/to/content", content) == content | |||||
directory[b"path/to/content"] = content | |||||
assert directory.get(b"path/to/content") == content | |||||
def test_hg_directory_deletes_empty_directories(): | |||||
directory = HgDirectory() | |||||
content = random_content() | |||||
directory[b"path/to/content"] = content | |||||
directory[b"path/to/some/deep/content"] = random_content() | |||||
del directory[b"path/to/some/deep/content"] | |||||
assert directory.get(b"path/to/some/deep") is None | |||||
assert directory.get(b"path/to/some") is None | |||||
assert directory.get(b"path/to/content") == content | |||||
def test_hg_directory_when_directory_replaces_file(): | |||||
directory = HgDirectory() | |||||
directory[b"path/to/some"] = random_content() | |||||
directory[b"path/to/some/content"] = random_content() | |||||
# Those tests assert expectations on repository loading | # Those tests assert expectations on repository loading | ||||
# by reading expected values from associated json files | # by reading expected values from associated json files | ||||
# produced by the `swh-hg-identify` command line utility. | # produced by the `swh-hg-identify` command line utility. | ||||
# | # | ||||
# It has more granularity than historical tests. | # It has more granularity than historical tests. | ||||
# Assertions will tell if the error comes from the directories | # Assertions will tell if the error comes from the directories | ||||
marmoute: You should add a variant with a mix of empty and non empty directory. To make sure we do not… | |||||
# revisions or release rather than only checking the snapshot. | # revisions or release rather than only checking the snapshot. | ||||
Done Inline ActionsThe length on that directory will always be (1), even if some was not deleted. So you need to check that len('path/some') is 1. marmoute: The length on that directory will always be (1), even if `some` was not deleted. So you need to… | |||||
# | # | ||||
# With more work it should event be possible to know which part | # With more work it should event be possible to know which part | ||||
# of an object is faulty. | # of an object is faulty. | ||||
def test_examples(swh_config, datadir, tmp_path): | def test_examples(swh_config, datadir, tmp_path): | ||||
for archive_name in ("hello", "transplant", "the-sandbox", "example"): | for archive_name in ("hello", "transplant", "the-sandbox", "example"): | ||||
archive_path = os.path.join(datadir, f"{archive_name}.tgz") | archive_path = os.path.join(datadir, f"{archive_name}.tgz") | ||||
json_path = os.path.join(datadir, f"{archive_name}.json") | json_path = os.path.join(datadir, f"{archive_name}.json") | ||||
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) | ||||
▲ Show 20 Lines • Show All 163 Lines • Show Last 20 Lines |
You should add a variant with a mix of empty and non empty directory. To make sure we do not over delete.