diff --git a/swh/loader/package/archive/tests/test_archive.py b/swh/loader/package/archive/tests/test_archive.py --- a/swh/loader/package/archive/tests/test_archive.py +++ b/swh/loader/package/archive/tests/test_archive.py @@ -6,12 +6,12 @@ from swh.model.hashutil import hash_to_bytes from swh.loader.package.archive.loader import ArchiveLoader -from swh.loader.package.tests.common import ( +from swh.loader.package.tests.common import check_metadata_paths +from swh.loader.tests import ( + assert_last_visit_matches, check_snapshot, - check_metadata_paths, get_stats, ) -from swh.loader.tests import assert_last_visit_matches URL = "https://ftp.gnu.org/gnu/8sync/" diff --git a/swh/loader/package/cran/tests/test_cran.py b/swh/loader/package/cran/tests/test_cran.py --- a/swh/loader/package/cran/tests/test_cran.py +++ b/swh/loader/package/cran/tests/test_cran.py @@ -20,11 +20,11 @@ from swh.core.tarball import uncompress from swh.model.model import TimestampWithTimezone -from swh.loader.package.tests.common import ( +from swh.loader.tests import ( + assert_last_visit_matches, check_snapshot, get_stats, ) -from swh.loader.tests import assert_last_visit_matches def test_cran_parse_date(): diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py --- a/swh/loader/package/debian/tests/test_debian.py +++ b/swh/loader/package/debian/tests/test_debian.py @@ -19,11 +19,11 @@ get_package_metadata, extract_package, ) -from swh.loader.package.tests.common import ( +from swh.loader.tests import ( + assert_last_visit_matches, check_snapshot, get_stats, ) -from swh.loader.tests import assert_last_visit_matches from swh.loader.package.debian.loader import resolve_revision_from diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py --- a/swh/loader/package/deposit/tests/test_deposit.py +++ b/swh/loader/package/deposit/tests/test_deposit.py @@ -11,12 +11,12 @@ from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.loader.package.deposit.loader import DepositLoader -from swh.loader.package.tests.common import ( +from swh.loader.package.tests.common import check_metadata_paths +from swh.loader.tests import ( + assert_last_visit_matches, check_snapshot, - check_metadata_paths, get_stats, ) -from swh.loader.tests import assert_last_visit_matches from swh.core.pytest_plugin import requests_mock_datadir_factory diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py --- a/swh/loader/package/nixguix/tests/test_nixguix.py +++ b/swh/loader/package/nixguix/tests/test_nixguix.py @@ -22,14 +22,15 @@ clean_sources, ) -from swh.loader.package.tests.common import ( - get_stats, - check_snapshot, -) from swh.loader.package.utils import download from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.storage.exc import HashCollision -from swh.loader.tests import assert_last_visit_matches + +from swh.loader.tests import ( + assert_last_visit_matches, + get_stats, + check_snapshot, +) sources_url = "https://nix-community.github.io/nixpkgs-swh/sources.json" diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py --- a/swh/loader/package/npm/tests/test_npm.py +++ b/swh/loader/package/npm/tests/test_npm.py @@ -15,12 +15,12 @@ extract_npm_package_author, artifact_to_revision_id, ) -from swh.loader.package.tests.common import ( +from swh.loader.package.tests.common import check_metadata_paths +from swh.loader.tests import ( + assert_last_visit_matches, check_snapshot, - check_metadata_paths, get_stats, ) -from swh.loader.tests import assert_last_visit_matches def test_extract_npm_package_author(datadir): diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py --- a/swh/loader/package/pypi/tests/test_pypi.py +++ b/swh/loader/package/pypi/tests/test_pypi.py @@ -23,12 +23,12 @@ extract_intrinsic_metadata, artifact_to_revision_id, ) -from swh.loader.package.tests.common import ( +from swh.loader.package.tests.common import check_metadata_paths +from swh.loader.tests import ( + assert_last_visit_matches, check_snapshot, - check_metadata_paths, get_stats, ) -from swh.loader.tests import assert_last_visit_matches def test_author_basic(): diff --git a/swh/loader/package/tests/common.py b/swh/loader/package/tests/common.py --- a/swh/loader/package/tests/common.py +++ b/swh/loader/package/tests/common.py @@ -9,72 +9,12 @@ from typing import Dict, List, Tuple -from swh.model.hashutil import hash_to_bytes, hash_to_hex - logger = logging.getLogger(__file__) DATADIR = path.join(path.abspath(path.dirname(__file__)), "resources") -def decode_target(target): - """Test helper to ease readability in test - - """ - if not target: - return target - target_type = target["target_type"] - - if target_type == "alias": - decoded_target = target["target"].decode("utf-8") - else: - decoded_target = hash_to_hex(target["target"]) - - return {"target": decoded_target, "target_type": target_type} - - -def check_snapshot(expected_snapshot, storage): - """Check for snapshot match. - - Provide the hashes as hexadecimal, the conversion is done - within the method. - - Args: - expected_snapshot (dict): full snapshot with hex ids - storage (Storage): expected storage - - Returns: - the snapshot stored in the storage for further test assertion if any is - needed. - - """ - expected_snapshot_id = expected_snapshot["id"] - expected_branches = expected_snapshot["branches"] - snap = storage.snapshot_get(hash_to_bytes(expected_snapshot_id)) - if snap is None: - # display known snapshots instead if possible - if hasattr(storage, "_snapshots"): # in-mem storage - from pprint import pprint - - for snap_id, (_snap, _) in storage._snapshots.items(): - snapd = _snap.to_dict() - snapd["id"] = hash_to_hex(snapd["id"]) - branches = { - branch.decode("utf-8"): decode_target(target) - for branch, target in snapd["branches"].items() - } - snapd["branches"] = branches - pprint(snapd) - raise AssertionError("Snapshot is not found") - - branches = { - branch.decode("utf-8"): decode_target(target) - for branch, target in snap["branches"].items() - } - assert expected_branches == branches - return snap - - def check_metadata(metadata: Dict, key_path: str, raw_type: str): """Given a metadata dict, ensure the associated key_path value is of type raw_type. @@ -114,25 +54,3 @@ """ for key_path, raw_type in paths: check_metadata(metadata, key_path, raw_type) - - -def get_stats(storage) -> Dict: - """Adaptation utils to unify the stats counters across storage - implementation. - - """ - storage.refresh_stat_counters() - stats = storage.stat_counters() - - keys = [ - "content", - "directory", - "origin", - "origin_visit", - "person", - "release", - "revision", - "skipped_content", - "snapshot", - ] - return {k: stats.get(k) for k in keys} diff --git a/swh/loader/package/tests/test_common.py b/swh/loader/package/tests/test_common.py --- a/swh/loader/package/tests/test_common.py +++ b/swh/loader/package/tests/test_common.py @@ -1,101 +1,14 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest -from swh.model.hashutil import hash_to_bytes -from swh.model.model import Snapshot, SnapshotBranch, TargetType from swh.loader.package.tests.common import ( - decode_target, - check_snapshot, check_metadata, check_metadata_paths, ) -from swh.storage import get_storage - -hash_hex = "43e45d56f88993aae6a0198013efa80716fd8920" - - -storage_config = {"cls": "pipeline", "steps": [{"cls": "memory",}]} - - -def test_decode_target_edge(): - assert not decode_target(None) - - -def test_decode_target(): - actual_alias_decode_target = decode_target( - {"target_type": "alias", "target": b"something",} - ) - - assert actual_alias_decode_target == { - "target_type": "alias", - "target": "something", - } - - actual_decode_target = decode_target( - {"target_type": "revision", "target": hash_to_bytes(hash_hex),} - ) - - assert actual_decode_target == { - "target_type": "revision", - "target": hash_hex, - } - - -def test_check_snapshot(): - storage = get_storage(**storage_config) - - snap_id = "2498dbf535f882bc7f9a18fb16c9ad27fda7bab7" - snapshot = Snapshot( - id=hash_to_bytes(snap_id), - branches={ - b"master": SnapshotBranch( - target=hash_to_bytes(hash_hex), target_type=TargetType.REVISION, - ), - }, - ) - - s = storage.snapshot_add([snapshot]) - assert s == { - "snapshot:add": 1, - } - - expected_snapshot = { - "id": snap_id, - "branches": {"master": {"target": hash_hex, "target_type": "revision",}}, - } - check_snapshot(expected_snapshot, storage) - - -def test_check_snapshot_failure(): - storage = get_storage(**storage_config) - - snapshot = Snapshot( - id=hash_to_bytes("2498dbf535f882bc7f9a18fb16c9ad27fda7bab7"), - branches={ - b"master": SnapshotBranch( - target=hash_to_bytes(hash_hex), target_type=TargetType.REVISION, - ), - }, - ) - - s = storage.snapshot_add([snapshot]) - assert s == { - "snapshot:add": 1, - } - - unexpected_snapshot = { - "id": "2498dbf535f882bc7f9a18fb16c9ad27fda7bab7", - "branches": { - "master": {"target": hash_hex, "target_type": "release",} # wrong value - }, - } - - with pytest.raises(AssertionError): - check_snapshot(unexpected_snapshot, storage) def test_check_metadata(): diff --git a/swh/loader/tests/__init__.py b/swh/loader/tests/__init__.py --- a/swh/loader/tests/__init__.py +++ b/swh/loader/tests/__init__.py @@ -7,8 +7,10 @@ import subprocess from pathlib import PosixPath -from typing import Optional, Union +from typing import Dict, Optional, Union + from swh.model.model import OriginVisitStatus +from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.storage.algos.origin import origin_get_latest_visit_status @@ -77,3 +79,83 @@ _fname = filename if filename else os.path.basename(archive_path) repo_url = f"file://{tmp_path}/{_fname}" return repo_url + + +def decode_target(target): + """Test helper to ease readability in test + + """ + if not target: + return target + target_type = target["target_type"] + + if target_type == "alias": + decoded_target = target["target"].decode("utf-8") + else: + decoded_target = hash_to_hex(target["target"]) + + return {"target": decoded_target, "target_type": target_type} + + +def check_snapshot(expected_snapshot, storage): + """Check for snapshot match. + + Provide the hashes as hexadecimal, the conversion is done + within the method. + + Args: + expected_snapshot (dict): full snapshot with hex ids + storage (Storage): expected storage + + Returns: + the snapshot stored in the storage for further test assertion if any is + needed. + + """ + expected_snapshot_id = expected_snapshot["id"] + expected_branches = expected_snapshot["branches"] + snap = storage.snapshot_get(hash_to_bytes(expected_snapshot_id)) + if snap is None: + # display known snapshots instead if possible + if hasattr(storage, "_snapshots"): # in-mem storage + from pprint import pprint + + for snap_id, (_snap, _) in storage._snapshots.items(): + snapd = _snap.to_dict() + snapd["id"] = hash_to_hex(snapd["id"]) + branches = { + branch.decode("utf-8"): decode_target(target) + for branch, target in snapd["branches"].items() + } + snapd["branches"] = branches + pprint(snapd) + raise AssertionError("Snapshot is not found") + + branches = { + branch.decode("utf-8"): decode_target(target) + for branch, target in snap["branches"].items() + } + assert expected_branches == branches + return snap + + +def get_stats(storage) -> Dict: + """Adaptation utils to unify the stats counters across storage + implementation. + + """ + storage.refresh_stat_counters() + stats = storage.stat_counters() + + keys = [ + "content", + "directory", + "origin", + "origin_visit", + "person", + "release", + "revision", + "skipped_content", + "snapshot", + ] + return {k: stats.get(k) for k in keys} diff --git a/swh/loader/tests/test_init.py b/swh/loader/tests/test_init.py --- a/swh/loader/tests/test_init.py +++ b/swh/loader/tests/test_init.py @@ -9,10 +9,28 @@ import os import subprocess +from swh.storage import get_storage + from swh.loader.tests import prepare_repository_from_archive, assert_last_visit_matches -from swh.model.model import OriginVisit, OriginVisitStatus +from swh.model.model import ( + OriginVisit, + OriginVisitStatus, + Snapshot, + SnapshotBranch, + TargetType, +) from swh.model.hashutil import hash_to_bytes +from swh.loader.tests import ( + decode_target, + check_snapshot, +) + + +hash_hex = "43e45d56f88993aae6a0198013efa80716fd8920" + + +storage_config = {"cls": "pipeline", "steps": [{"cls": "memory",}]} ORIGIN_VISIT = OriginVisit( origin="some-url", @@ -156,3 +174,80 @@ # passing along the filename does not influence the on-disk extraction # just the repo-url computation assert os.path.exists(expected_uncompressed_archive_path) + + +def test_decode_target_edge(): + assert not decode_target(None) + + +def test_decode_target(): + actual_alias_decode_target = decode_target( + {"target_type": "alias", "target": b"something",} + ) + + assert actual_alias_decode_target == { + "target_type": "alias", + "target": "something", + } + + actual_decode_target = decode_target( + {"target_type": "revision", "target": hash_to_bytes(hash_hex),} + ) + + assert actual_decode_target == { + "target_type": "revision", + "target": hash_hex, + } + + +def test_check_snapshot(): + storage = get_storage(**storage_config) + + snap_id = "2498dbf535f882bc7f9a18fb16c9ad27fda7bab7" + snapshot = Snapshot( + id=hash_to_bytes(snap_id), + branches={ + b"master": SnapshotBranch( + target=hash_to_bytes(hash_hex), target_type=TargetType.REVISION, + ), + }, + ) + + s = storage.snapshot_add([snapshot]) + assert s == { + "snapshot:add": 1, + } + + expected_snapshot = { + "id": snap_id, + "branches": {"master": {"target": hash_hex, "target_type": "revision",}}, + } + check_snapshot(expected_snapshot, storage) + + +def test_check_snapshot_failure(): + storage = get_storage(**storage_config) + + snapshot = Snapshot( + id=hash_to_bytes("2498dbf535f882bc7f9a18fb16c9ad27fda7bab7"), + branches={ + b"master": SnapshotBranch( + target=hash_to_bytes(hash_hex), target_type=TargetType.REVISION, + ), + }, + ) + + s = storage.snapshot_add([snapshot]) + assert s == { + "snapshot:add": 1, + } + + unexpected_snapshot = { + "id": "2498dbf535f882bc7f9a18fb16c9ad27fda7bab7", + "branches": { + "master": {"target": hash_hex, "target_type": "release",} # wrong value + }, + } + + with pytest.raises(AssertionError): + check_snapshot(unexpected_snapshot, storage)