Changeset View
Standalone View
swh/loader/tests/__init__.py
# Copyright (C) 2018-2020 The Software Heritage developers | # Copyright (C) 2018-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | import os | ||||
import subprocess | import subprocess | ||||
from collections import defaultdict | |||||
from pathlib import PosixPath | from pathlib import PosixPath | ||||
from typing import Any, Dict, Optional, Union | from typing import Any, Dict, Iterable, List, Optional, Tuple, Union | ||||
from swh.model.model import OriginVisitStatus, Snapshot | from swh.model.model import OriginVisitStatus, Snapshot | ||||
from swh.model.hashutil import hash_to_bytes, hash_to_hex | from swh.model.hashutil import hash_to_bytes, hash_to_hex | ||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
from swh.storage.algos.origin import origin_get_latest_visit_status | from swh.storage.algos.origin import origin_get_latest_visit_status | ||||
▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines | def decode_target(target: Dict) -> Dict: | ||||
elif isinstance(target_data, bytes): | elif isinstance(target_data, bytes): | ||||
decoded_target = hash_to_hex(target_data) | decoded_target = hash_to_hex(target_data) | ||||
else: | else: | ||||
decoded_target = target_data | decoded_target = target_data | ||||
return {"target": decoded_target, "target_type": target_type} | return {"target": decoded_target, "target_type": target_type} | ||||
class InconsistentAliasBranchError(AssertionError): | |||||
"""When an alias branch targets an inexistent branch. | |||||
""" | |||||
pass | |||||
class InexistentObjectsError(AssertionError): | |||||
"""When a targeted branch reference does not exist in the storage | |||||
""" | |||||
pass | |||||
vlorentz: way too many newlines here | |||||
ardumontAuthorUnsubmitted Done Inline ActionsWhat's the change proposal? ardumont: What's the change proposal?
(I did not get the implied solution) | |||||
vlorentzUnsubmitted Not Done Inline Actionsclass InconsistentAliasBranchError(AssertionError): """When an alias branch targets an inexistent branch.""" pass vlorentz: ```
class InconsistentAliasBranchError(AssertionError):
"""When an alias branch targets an… | |||||
ardumontAuthorUnsubmitted Done Inline Actionsheh, right, though the pass will get spaced by black (i think) ;) ardumont: heh, right, though the pass will get spaced by black (i think) ;) | |||||
def check_snapshot( | def check_snapshot( | ||||
snapshot: Union[Dict[str, Any], Snapshot], storage: StorageInterface | snapshot: Union[Dict[str, Any], Snapshot], | ||||
storage: StorageInterface, | |||||
safelist: Iterable[Tuple[str, str]] = [], | |||||
vlorentzUnsubmitted Not Done Inline Actionsthat's not a great name. What about allowed_empty? vlorentz: that's not a great name. What about `allowed_empty`? | |||||
ardumontAuthorUnsubmitted Done Inline Actionssounds good. ardumont: sounds good. | |||||
): | ): | ||||
"""Check for snapshot match. | """Check that: | ||||
- snapshot exists in the storage and match | |||||
The hashes can be both in hex or bytes, the necessary conversion will happen prior | - each object reference up to the revision/release targets exists | ||||
to check. | |||||
Args: | Args: | ||||
expected_snapshot: full snapshot to check for existence and consistency | snapshot: full snapshot to check for existence and consistency | ||||
storage: storage to lookup information into | storage: storage to lookup information into | ||||
safelist: Iterable of branch we allow to be empty (some edge case loaders allows | |||||
this case to happen, nixguix for example allows the evaluation branch to not | |||||
be resolvable) | |||||
Returns: | Returns: | ||||
the snapshot stored in the storage for further test assertion if any is | the snapshot stored in the storage for further test assertion if any is | ||||
needed. | needed. | ||||
""" | """ | ||||
if isinstance(snapshot, Snapshot): | if isinstance(snapshot, Snapshot): | ||||
expected_snapshot = snapshot.to_dict() | expected_snapshot = snapshot.to_dict() | ||||
elif isinstance(snapshot, dict): | elif isinstance(snapshot, dict): | ||||
expected_snapshot = snapshot | expected_snapshot = snapshot | ||||
else: | else: | ||||
raise AssertionError(f"variable 'snapshot' must be a snapshot: {snapshot!r}") | raise AssertionError(f"variable 'snapshot' must be a snapshot: {snapshot!r}") | ||||
expected_snapshot_id = expected_snapshot["id"] | expected_snapshot_id = expected_snapshot["id"] | ||||
expected_branches = expected_snapshot["branches"] | expected_branches = expected_snapshot["branches"] | ||||
snap = storage.snapshot_get(hash_to_bytes(expected_snapshot_id)) | snap = storage.snapshot_get(hash_to_bytes(expected_snapshot_id)) | ||||
if snap is None: | if snap is None: | ||||
raise AssertionError(f"Snapshot {expected_snapshot_id} is not found") | raise AssertionError(f"Snapshot {expected_snapshot_id} is not found") | ||||
snap.pop("next_branch") | |||||
snapshot = Snapshot.from_dict(snap) | |||||
assert isinstance(snapshot, Snapshot) | |||||
objects = defaultdict(list) | |||||
vlorentzUnsubmitted Not Done Inline ActionsCould you rename it to branches_by_target_type, or something of the sort? vlorentz: Could you rename it to `branches_by_target_type`, or something of the sort? | |||||
# recursively check the objects referenced by the snapshots exists | |||||
expected_branches = {} | expected_branches = {} | ||||
for branch, target in expected_snapshot["branches"].items(): | for branch, target in expected_snapshot["branches"].items(): | ||||
if isinstance(branch, bytes): | if isinstance(branch, bytes): | ||||
branch = branch.decode("utf-8") | branch = branch.decode("utf-8") | ||||
expected_branches[branch] = decode_target(target) | expected_branches[branch] = decode_target(target) | ||||
snapshot_branches = {} | snapshot_branches = {} | ||||
object_to_branch = {} | |||||
for branch, target in snap["branches"].items(): | for branch, target in snap["branches"].items(): | ||||
if isinstance(branch, bytes): | if isinstance(branch, bytes): | ||||
branch = branch.decode("utf-8") | branch = branch.decode("utf-8") | ||||
snapshot_branches[branch] = decode_target(target) | snapshot_branches[branch] = decode_target(target) | ||||
target_type = target["target_type"] | |||||
if (target_type, branch) in safelist: | |||||
# safe for those elements to not be checked for existence | |||||
continue | |||||
objects[target_type].append(target["target"]) | |||||
object_to_branch[target["target"]] = branch | |||||
# Check snapshot is compliant regarding expectations | |||||
assert expected_branches == snapshot_branches | assert expected_branches == snapshot_branches | ||||
# Now check for more consistency | |||||
vlorentzUnsubmitted Not Done Inline ActionsThat's not a very helpful comment :/ vlorentz: That's not a very helpful comment :/ | |||||
ardumontAuthorUnsubmitted Done Inline Actions"alias" consistency? ardumont: "alias" consistency?
I want to check that the alias reference does not target something that… | |||||
vlorentzUnsubmitted Not Done Inline Actions"check that the alias reference does not target something that does not exist" vlorentz: "check that the alias reference does not target something that does not exist"
-> write that ;) | |||||
aliases: List[bytes] = objects.get("alias", []) | |||||
for alias in aliases: | |||||
# ensure the snapshot alias target consistent references | |||||
if alias not in snapshot.branches: | |||||
raise InconsistentAliasBranchError( | |||||
f"Alias branch {alias.decode('utf-8')} " | |||||
f"should be in {list(snapshot.branches)}" | |||||
) | |||||
revs = objects.get("revision") | |||||
if revs: | |||||
revisions = list(storage.revision_get(revs)) | |||||
not_found = [rev_id for rev_id, rev in zip(revs, revisions) if rev is None] | |||||
if not_found: | |||||
missing_objs = [ | |||||
str((object_to_branch[rev], rev.hex())) for rev in not_found | |||||
] | |||||
vlorentzUnsubmitted Not Done Inline Actionsyou can do the joining directly on this line vlorentz: you can do the joining directly on this line | |||||
ardumontAuthorUnsubmitted Done Inline Actionsinside the string you mean? ardumont: inside the string you mean?
(it was there originally but it was unreadable) | |||||
vlorentzUnsubmitted Not Done Inline Actionson the contrary, not inside the string: `missing_objs = ', '.join(str((object_to_branch[rev], rev.hex())) for rev in not_found) vlorentz: on the contrary, not inside the string: `missing_objs = ', '.join(str((object_to_branch[rev]… | |||||
raise InexistentObjectsError( | |||||
f"Branch/Revision(s) {','.join(missing_objs)}" | |||||
" should exist in storage" | |||||
vlorentzUnsubmitted Not Done Inline Actionsspace should be before the line break vlorentz: space should be before the line break | |||||
) | |||||
rels = objects.get("release") | |||||
if rels: | |||||
releases = list(storage.release_get(rels)) | |||||
not_found = [rel_id for rel_id, rel in zip(rels, releases) if rel is None] | |||||
if not_found: | |||||
missing_objs = [ | |||||
str((object_to_branch[rev], rev.hex())) for rev in not_found | |||||
] | |||||
vlorentzUnsubmitted Not Done Inline Actionssame vlorentz: same | |||||
raise InexistentObjectsError( | |||||
f"Branch/Release(s) {','.join(missing_objs)}" " should exist in storage" | |||||
vlorentzUnsubmitted Not Done Inline Actionsconcat vlorentz: concat | |||||
) | |||||
return snap | return snap | ||||
def get_stats(storage) -> Dict: | def get_stats(storage) -> Dict: | ||||
"""Adaptation utils to unify the stats counters across storage | """Adaptation utils to unify the stats counters across storage | ||||
implementation. | implementation. | ||||
""" | """ | ||||
Show All 15 Lines |
way too many newlines here