Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/tests/test_from_disk.py
- This file was added.
# Copyright (C) 2020 The Software Heritage developers | |||||
marmoute: This need a module documentation. | |||||
Done Inline ActionsWhere does this test comes from ? Is this a copy of the test for the older loader ? If so, what re the difference ? marmoute: Where does this test comes from ? Is this a copy of the test for the older loader ? If so, what… | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
import json | |||||
import os | |||||
import pytest | |||||
from swh.loader.mercurial.from_disk import HgLoaderFromDisk | |||||
from swh.loader.tests import ( | |||||
assert_last_visit_matches, | |||||
check_snapshot, | |||||
get_stats, | |||||
prepare_repository_from_archive, | |||||
Done Inline Actionsplease document the testcase marmoute: please document the testcase | |||||
) | |||||
from swh.model.hashutil import hash_to_bytes | |||||
from swh.model.identifiers import snapshot_identifier | |||||
from swh.model.model import RevisionType | |||||
@pytest.fixture | |||||
def expected_data_to_check(swh_config, datadir): | |||||
"""Fixture to load expected.json data to check against a loading visit | |||||
@acezar: | |||||
Done Inline Actionswhere does this hash comes from. marmoute: where does this hash comes from. | |||||
-> swh_config: check conftest/swh_config fixture pytest (that actually write to disk | |||||
some config the loader is aupposed to know how to load). You can actually | |||||
override it here by redefining here as you see fit. A priori, you should not have | |||||
to. | |||||
-> datadir: target folder from an "absolute" point of view the "./data" folder so it | |||||
makes sense for the tests to retrieve file for example | |||||
""" | |||||
pathname = "expected-load-from-disk.json" | |||||
# store file at current "./data/expected-load-from-disk.json" | |||||
path = os.path.join(datadir, pathname) | |||||
assert os.path.exists(path), f"No file {pathname} found! Could not test." | |||||
return json.load(open(path, "r")) | |||||
def test_visit_from_disk(expected_data_to_check, datadir, tmp_path): | |||||
"""Loading from a disk a repository should yield 1 snapshot | |||||
""" | |||||
archive_path = os.path.join(datadir, "example.tar.gz") | |||||
repo_url = prepare_repository_from_archive(archive_path, "repo", tmp_path) | |||||
# better yet, use an existing repository already ingested? <- Why ? | |||||
loader = HgLoaderFromDisk(repo_url, repo_url) | |||||
# at what point does this need to be loaded? | |||||
expected_data = expected_data_to_check | |||||
expected_contents = [ | |||||
obj["id"] for obj in expected_data["objects"].values() if obj["type"] == "blob" | |||||
] | |||||
expected_dir_count = sum( | |||||
[1 for obj in expected_data["objects"].values() if obj["type"] == "tree"] | |||||
) | |||||
expected_person_count = len( | |||||
{ | |||||
obj["author"].rsplit(" ", 2)[0] | |||||
for obj in expected_data["objects"].values() | |||||
if obj["type"] == "commit" | |||||
} | |||||
) | |||||
expected_revisions = { | |||||
obj["id"]: obj["tree"] | |||||
for obj in expected_data["objects"].values() | |||||
if obj["type"] == "commit" | |||||
} | |||||
expected_snapshot = {"branches": expected_data["branches"]} | |||||
expected_snapshot["id"] = snapshot_identifier( | |||||
{ | |||||
"branches": { | |||||
name.encode(): { | |||||
"target": branch["target"].encode() | |||||
if name == "HEAD" | |||||
else hash_to_bytes(branch["target"]), | |||||
"target_type": branch["target_type"], | |||||
} | |||||
for name, branch in expected_snapshot["branches"].items() | |||||
} | |||||
} | |||||
) | |||||
actual_load_status = loader.load() | |||||
assert actual_load_status == {"status": "eventful"} | |||||
stats = get_stats(loader.storage) | |||||
assert stats == { | |||||
"content": len(expected_contents), | |||||
"directory": expected_dir_count, | |||||
"origin": 1, # 1 origin | |||||
"origin_visit": 1, # with 1 visit | |||||
"person": expected_person_count, | |||||
"release": 1, | |||||
"revision": len(expected_revisions), | |||||
"skipped_content": 0, | |||||
"snapshot": 1, | |||||
} | |||||
assert_last_visit_matches( | |||||
loader.storage, | |||||
repo_url, | |||||
Done Inline ActionsWhat is going on here, and why is it correct ? marmoute: What is going on here, and why is it correct ? | |||||
type=RevisionType.MERCURIAL.value, | |||||
status="full", | |||||
snapshot=hash_to_bytes(expected_snapshot["id"]), | |||||
) | |||||
check_snapshot(expected_snapshot, loader.storage) |
This need a module documentation.