Changeset View
Standalone View
swh/loader/tests/__init__.py
# Copyright (C) 2018-2020 The Software Heritage developers | # Copyright (C) 2018-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | import os | ||||
import subprocess | import subprocess | ||||
from collections import defaultdict | |||||
from pathlib import PosixPath | from pathlib import PosixPath | ||||
from typing import Any, Dict, Optional, Union | from typing import Any, Dict, Iterable, List, Optional, Tuple, Union | ||||
from swh.model.model import OriginVisitStatus, Snapshot | from swh.model.model import OriginVisitStatus, Snapshot, TargetType | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
from swh.storage.algos.origin import origin_get_latest_visit_status | from swh.storage.algos.origin import origin_get_latest_visit_status | ||||
def assert_last_visit_matches( | def assert_last_visit_matches( | ||||
storage, | storage, | ||||
▲ Show 20 Lines • Show All 73 Lines • ▼ Show 20 Lines | if target_type == "alias" and isinstance(target_data, str): | ||||
encoded_target = target_data.encode("utf-8") | encoded_target = target_data.encode("utf-8") | ||||
elif isinstance(target_data, str): | elif isinstance(target_data, str): | ||||
encoded_target = hash_to_bytes(target_data) | encoded_target = hash_to_bytes(target_data) | ||||
else: | else: | ||||
encoded_target = target_data | encoded_target = target_data | ||||
return {"target": encoded_target, "target_type": target_type} | return {"target": encoded_target, "target_type": target_type} | ||||
class InconsistentAliasBranchError(AssertionError): | |||||
"""When an alias branch targets an inexistent branch.""" | |||||
pass | |||||
class InexistentObjectsError(AssertionError): | |||||
"""When a targeted branch reference does not exist in the storage""" | |||||
pass | |||||
def check_snapshot( | def check_snapshot( | ||||
vlorentz: way too many newlines here | |||||
Done Inline ActionsWhat's the change proposal? ardumont: What's the change proposal?
(I did not get the implied solution) | |||||
Not Done Inline Actionsclass InconsistentAliasBranchError(AssertionError): """When an alias branch targets an inexistent branch.""" pass vlorentz: ```
class InconsistentAliasBranchError(AssertionError):
"""When an alias branch targets an… | |||||
Done Inline Actionsheh, right, though the pass will get spaced by black (i think) ;) ardumont: heh, right, though the pass will get spaced by black (i think) ;) | |||||
snapshot: Union[Dict[str, Any], Snapshot], storage: StorageInterface | snapshot: Union[Dict[str, Any], Snapshot], | ||||
storage: StorageInterface, | |||||
Not Done Inline Actionsthat's not a great name. What about allowed_empty? vlorentz: that's not a great name. What about `allowed_empty`? | |||||
Done Inline Actionssounds good. ardumont: sounds good. | |||||
allowed_empty: Iterable[Tuple[TargetType, bytes]] = [], | |||||
): | ): | ||||
"""Check for snapshot match. | """Check that: | ||||
- snapshot exists in the storage and match | |||||
The hashes can be both in hex or bytes, the necessary conversion will happen prior | - each object reference up to the revision/release targets exists | ||||
to check. | |||||
Args: | Args: | ||||
snapshot: full snapshot to check for existence and consistency | snapshot: full snapshot to check for existence and consistency | ||||
storage: storage to lookup information into | storage: storage to lookup information into | ||||
allowed_empty: Iterable of branch we allow to be empty (some edge case loaders | |||||
allows this case to happen, nixguix for example allows the branch evaluation" | |||||
to target the nixpkgs git commit reference, which may not yet be resolvable at | |||||
loading time) | |||||
Returns: | Returns: | ||||
the snapshot stored in the storage for further test assertion if any is | the snapshot stored in the storage for further test assertion if any is | ||||
needed. | needed. | ||||
""" | """ | ||||
if isinstance(snapshot, Snapshot): | if isinstance(snapshot, Snapshot): | ||||
expected_snapshot = snapshot | expected_snapshot = snapshot | ||||
elif isinstance(snapshot, dict): | elif isinstance(snapshot, dict): | ||||
# dict must be snapshot compliant | # dict must be snapshot compliant | ||||
snapshot_dict = {"id": hash_to_bytes(snapshot["id"])} | snapshot_dict = {"id": hash_to_bytes(snapshot["id"])} | ||||
branches = {} | branches = {} | ||||
for branch, target in snapshot["branches"].items(): | for branch, target in snapshot["branches"].items(): | ||||
if isinstance(branch, str): | if isinstance(branch, str): | ||||
branch = branch.encode("utf-8") | branch = branch.encode("utf-8") | ||||
branches[branch] = encode_target(target) | branches[branch] = encode_target(target) | ||||
snapshot_dict["branches"] = branches | snapshot_dict["branches"] = branches | ||||
expected_snapshot = Snapshot.from_dict(snapshot_dict) | expected_snapshot = Snapshot.from_dict(snapshot_dict) | ||||
else: | else: | ||||
raise AssertionError(f"variable 'snapshot' must be a snapshot: {snapshot!r}") | raise AssertionError(f"variable 'snapshot' must be a snapshot: {snapshot!r}") | ||||
snap = storage.snapshot_get(expected_snapshot.id) | snapshot_dict = storage.snapshot_get(expected_snapshot.id) | ||||
if snap is None: | if snapshot_dict is None: | ||||
raise AssertionError(f"Snapshot {expected_snapshot.id.hex()} is not found") | raise AssertionError(f"Snapshot {expected_snapshot.id.hex()} is not found") | ||||
Not Done Inline ActionsCould you rename it to branches_by_target_type, or something of the sort? vlorentz: Could you rename it to `branches_by_target_type`, or something of the sort? | |||||
assert snap["next_branch"] is None # we don't deal with large snapshot in tests | snapshot_dict.pop("next_branch") | ||||
Not Done Inline ActionsThat's not a very helpful comment :/ vlorentz: That's not a very helpful comment :/ | |||||
Done Inline Actions"alias" consistency? ardumont: "alias" consistency?
I want to check that the alias reference does not target something that… | |||||
Not Done Inline Actions"check that the alias reference does not target something that does not exist" vlorentz: "check that the alias reference does not target something that does not exist"
-> write that ;) | |||||
snap.pop("next_branch") | actual_snaphot = Snapshot.from_dict(snapshot_dict) | ||||
actual_snap = Snapshot.from_dict(snap) | assert isinstance(actual_snaphot, Snapshot) | ||||
assert expected_snapshot == actual_snaphot | |||||
branches_by_target_type = defaultdict(list) | |||||
object_to_branch = {} | |||||
for branch, target in actual_snaphot.branches.items(): | |||||
if (target.target_type, branch) in allowed_empty: | |||||
# safe for those elements to not be checked for existence | |||||
continue | |||||
branches_by_target_type[target.target_type].append(target.target) | |||||
object_to_branch[target.target] = branch | |||||
# check that alias references target something that exists, otherwise raise | |||||
aliases: List[bytes] = branches_by_target_type.get(TargetType.ALIAS, []) | |||||
for alias in aliases: | |||||
Not Done Inline Actionsyou can do the joining directly on this line vlorentz: you can do the joining directly on this line | |||||
Done Inline Actionsinside the string you mean? ardumont: inside the string you mean?
(it was there originally but it was unreadable) | |||||
Not Done Inline Actionson the contrary, not inside the string: `missing_objs = ', '.join(str((object_to_branch[rev], rev.hex())) for rev in not_found) vlorentz: on the contrary, not inside the string: `missing_objs = ', '.join(str((object_to_branch[rev]… | |||||
if alias not in actual_snaphot.branches: | |||||
raise InconsistentAliasBranchError( | |||||
f"Alias branch {alias.decode('utf-8')} " | |||||
Not Done Inline Actionsspace should be before the line break vlorentz: space should be before the line break | |||||
f"should be in {list(actual_snaphot.branches)}" | |||||
) | |||||
revs = branches_by_target_type.get(TargetType.REVISION) | |||||
if revs: | |||||
revisions = list(storage.revision_get(revs)) | |||||
not_found = [rev_id for rev_id, rev in zip(revs, revisions) if rev is None] | |||||
if not_found: | |||||
missing_objs = ", ".join( | |||||
str((object_to_branch[rev], rev.hex())) for rev in not_found | |||||
Not Done Inline Actionssame vlorentz: same | |||||
) | |||||
raise InexistentObjectsError( | |||||
Not Done Inline Actionsconcat vlorentz: concat | |||||
f"Branch/Revision(s) {missing_objs} should exist in storage" | |||||
) | |||||
assert expected_snapshot == actual_snap | rels = branches_by_target_type.get(TargetType.RELEASE) | ||||
if rels: | |||||
releases = list(storage.release_get(rels)) | |||||
not_found = [rel_id for rel_id, rel in zip(rels, releases) if rel is None] | |||||
if not_found: | |||||
missing_objs = ", ".join( | |||||
str((object_to_branch[rel], rel.hex())) for rel in not_found | |||||
) | |||||
raise InexistentObjectsError( | |||||
f"Branch/Release(s) {missing_objs} should exist in storage" | |||||
) | |||||
return snap # for retro compat, returned the dict, remove when clients are migrated | # for retro compat, returned the dict, remove when clients are migrated | ||||
return snapshot_dict | |||||
def get_stats(storage) -> Dict: | def get_stats(storage) -> Dict: | ||||
"""Adaptation utils to unify the stats counters across storage | """Adaptation utils to unify the stats counters across storage | ||||
implementation. | implementation. | ||||
""" | """ | ||||
storage.refresh_stat_counters() | storage.refresh_stat_counters() | ||||
Show All 14 Lines |
way too many newlines here