Changeset View
Standalone View
swh/loader/tests/__init__.py
# Copyright (C) 2018-2020 The Software Heritage developers | # Copyright (C) 2018-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | import os | ||||
import subprocess | import subprocess | ||||
from pathlib import PosixPath | from pathlib import PosixPath | ||||
from typing import Dict, Optional, Union | from typing import Any, Dict, Optional, Union | ||||
from swh.model.model import OriginVisitStatus | from swh.model.model import OriginVisitStatus, Snapshot | ||||
from swh.model.hashutil import hash_to_bytes, hash_to_hex | from swh.model.hashutil import hash_to_bytes | ||||
from swh.storage.interface import StorageInterface | |||||
from swh.storage.algos.origin import origin_get_latest_visit_status | from swh.storage.algos.origin import origin_get_latest_visit_status | ||||
def assert_last_visit_matches( | def assert_last_visit_matches( | ||||
storage, | storage, | ||||
url: str, | url: str, | ||||
status: str, | status: str, | ||||
type: Optional[str] = None, | type: Optional[str] = None, | ||||
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines | ) -> str: | ||||
# uncompress folder/repositories/dump for the loader to ingest | # uncompress folder/repositories/dump for the loader to ingest | ||||
subprocess.check_output(["tar", "xf", archive_path, "-C", tmp_path]) | subprocess.check_output(["tar", "xf", archive_path, "-C", tmp_path]) | ||||
# build the origin url (or some derivative form) | # build the origin url (or some derivative form) | ||||
_fname = filename if filename else os.path.basename(archive_path) | _fname = filename if filename else os.path.basename(archive_path) | ||||
repo_url = f"file://{tmp_path}/{_fname}" | repo_url = f"file://{tmp_path}/{_fname}" | ||||
return repo_url | return repo_url | ||||
def decode_target(target): | def encode_target(target: Dict) -> Dict: | ||||
"""Test helper to ease readability in test | """Test helper to ease readability in test | ||||
""" | """ | ||||
if not target: | if not target: | ||||
return target | return target | ||||
target_type = target["target_type"] | target_type = target["target_type"] | ||||
target_data = target["target"] | |||||
if target_type == "alias": | if target_type == "alias" and isinstance(target_data, str): | ||||
decoded_target = target["target"].decode("utf-8") | encoded_target = target_data.encode("utf-8") | ||||
elif isinstance(target_data, str): | |||||
encoded_target = hash_to_bytes(target_data) | |||||
else: | else: | ||||
decoded_target = hash_to_hex(target["target"]) | encoded_target = target_data | ||||
return {"target": decoded_target, "target_type": target_type} | return {"target": encoded_target, "target_type": target_type} | ||||
def check_snapshot(expected_snapshot, storage): | def check_snapshot( | ||||
snapshot: Union[Dict[str, Any], Snapshot], storage: StorageInterface | |||||
): | |||||
"""Check for snapshot match. | """Check for snapshot match. | ||||
Provide the hashes as hexadecimal, the conversion is done | The hashes can be both in hex or bytes, the necessary conversion will happen prior | ||||
within the method. | to check. | ||||
Args: | Args: | ||||
expected_snapshot (dict): full snapshot with hex ids | snapshot: full snapshot to check for existence and consistency | ||||
storage (Storage): expected storage | storage: storage to lookup information into | ||||
douardda: expected_snapshot or snapshot ? | |||||
Done Inline ActionsIt's the expected_snapshot but i renamed it so it can reuse the name internally without mypy annoying me with its type. I forgot to rename the docstring, i'll fix that. ardumont: It's the `expected_snapshot` but i renamed it so it can reuse the name internally without mypy… | |||||
Returns: | Returns: | ||||
the snapshot stored in the storage for further test assertion if any is | the snapshot stored in the storage for further test assertion if any is | ||||
needed. | needed. | ||||
""" | """ | ||||
expected_snapshot_id = expected_snapshot["id"] | if isinstance(snapshot, Snapshot): | ||||
Done Inline Actionsoh i wanted to avoid the isinstance call initially but i recall i can't in the end (mypy)... ardumont: oh i wanted to avoid the isinstance call initially but i recall i can't in the end (mypy)... | |||||
Not Done Inline ActionsSince we try to go full "model object everywhere", why not use the Snapshot within this test rather than the dict form? douardda: Since we try to go full "model object everywhere", why not use the Snapshot within this test… | |||||
Done Inline ActionsBecause I went with the existing code which already manipulated the dict. ardumont: Because I went with the existing code which already manipulated the dict.
(also i try to keep… | |||||
Done Inline ActionsAnd it means more code, for example, i'd need to code the symmetric encode_target here so i can Snapshot.from_dict the all thing. ardumont: And it means more code, for example, i'd need to code the symmetric `encode_target` here so i… | |||||
Done Inline ActionsAnd then convert the snapshot dict returned from the storage endpoint... ardumont: And then convert the snapshot dict returned from the storage endpoint...
(which i think started… | |||||
Done Inline Actionscontext: trying to do as you proposed. Even though unfinished, i found the result quite unreadable in terms of error message... ardumont: context: trying to do as you proposed.
Even though unfinished, i found the result quite… | |||||
expected_branches = expected_snapshot["branches"] | expected_snapshot = snapshot | ||||
snap = storage.snapshot_get(hash_to_bytes(expected_snapshot_id)) | elif isinstance(snapshot, dict): | ||||
# dict must be snapshot compliant | |||||
snapshot_dict = {"id": hash_to_bytes(snapshot["id"])} | |||||
Not Done Inline ActionsI think you want a f-string here right ? anlambert: I think you want a f-string here right ? | |||||
Done Inline Actionsyeah (damn!) ardumont: yeah (damn!) | |||||
Not Done Inline ActionsI think this is unnecessary (the else case). This is testing code, if something incorrect is given, it will fail anyway. douardda: I think this is unnecessary (the `else` case). This is testing code, if something incorrect is… | |||||
Done Inline Actionsmight as well be explicit? ardumont: might as well be explicit?
I'm annoyed when i don't understand immediately the assertion… | |||||
branches = {} | |||||
for branch, target in snapshot["branches"].items(): | |||||
if isinstance(branch, str): | |||||
branch = branch.encode("utf-8") | |||||
branches[branch] = encode_target(target) | |||||
snapshot_dict["branches"] = branches | |||||
expected_snapshot = Snapshot.from_dict(snapshot_dict) | |||||
else: | |||||
raise AssertionError(f"variable 'snapshot' must be a snapshot: {snapshot!r}") | |||||
snap = storage.snapshot_get(expected_snapshot.id) | |||||
if snap is None: | if snap is None: | ||||
raise AssertionError(f"Snapshot {expected_snapshot_id} is not found") | raise AssertionError(f"Snapshot {expected_snapshot.id.hex()} is not found") | ||||
assert snap["next_branch"] is None # we don't deal with large snapshot in tests | |||||
snap.pop("next_branch") | |||||
actual_snap = Snapshot.from_dict(snap) | |||||
assert expected_snapshot == actual_snap | |||||
Not Done Inline Actionsbranch type is always bytes no ? anlambert: branch type is always bytes no ? | |||||
Done Inline Actionsnot necessarily in test. Initially, to simplify the test writing, we allowed to pass dict-like model object with hashes as hex. ardumont: not necessarily in test.
Initially, to simplify the test writing, we allowed to pass dict-like… | |||||
branches = { | return snap # for retro compat, returned the dict, remove when clients are migrated | ||||
branch.decode("utf-8"): decode_target(target) | |||||
for branch, target in snap["branches"].items() | |||||
} | |||||
assert expected_branches == branches | |||||
return snap | |||||
Done Inline ActionsInverted the order to match what we do in other loaders. ardumont: Inverted the order to match what we do in other loaders.
Unsure if it's a good idea though. | |||||
Done Inline ActionsI reverted it. ardumont: I reverted it. | |||||
def get_stats(storage) -> Dict: | def get_stats(storage) -> Dict: | ||||
"""Adaptation utils to unify the stats counters across storage | """Adaptation utils to unify the stats counters across storage | ||||
implementation. | implementation. | ||||
""" | """ | ||||
storage.refresh_stat_counters() | storage.refresh_stat_counters() | ||||
stats = storage.stat_counters() | stats = storage.stat_counters() | ||||
Show All 13 Lines |
expected_snapshot or snapshot ?