diff --git a/swh/loader/tests/__init__.py b/swh/loader/tests/__init__.py --- a/swh/loader/tests/__init__.py +++ b/swh/loader/tests/__init__.py @@ -6,8 +6,9 @@ import os import subprocess +from collections import defaultdict from pathlib import PosixPath -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, List, Optional, Union from swh.model.model import OriginVisitStatus, Snapshot from swh.model.hashutil import hash_to_bytes, hash_to_hex @@ -100,16 +101,31 @@ return {"target": decoded_target, "target_type": target_type} +class InconsistentAliasBranchError(AssertionError): + """When an alias branch targets an inexistent branch. + + """ + + pass + + +class InexistentObjectsError(AssertionError): + """When a targeted branch reference does not exist in the storage + + """ + + pass + + def check_snapshot( snapshot: Union[Dict[str, Any], Snapshot], storage: StorageInterface ): - """Check for snapshot match. - - The hashes can be both in hex or bytes, the necessary conversion will happen prior - to check. + """Check that: + - snapshot exists in the storage and match + - each object reference up to the revision/release targets exists Args: - expected_snapshot: full snapshot to check for existence and consistency + snapshot: full snapshot to check for existence and consistency storage: storage to lookup information into Returns: @@ -129,7 +145,13 @@ snap = storage.snapshot_get(hash_to_bytes(expected_snapshot_id)) if snap is None: raise AssertionError(f"Snapshot {expected_snapshot_id} is not found") + snap.pop("next_branch") + snapshot = Snapshot.from_dict(snap) + assert isinstance(snapshot, Snapshot) + + objects = defaultdict(list) + # recursively check the objects referenced by the snapshots exists expected_branches = {} for branch, target in expected_snapshot["branches"].items(): if isinstance(branch, bytes): @@ -141,7 +163,43 @@ if isinstance(branch, bytes): branch = branch.decode("utf-8") snapshot_branches[branch] = decode_target(target) + target_type = target["target_type"] + objects[target_type].append(target["target"]) + + # Check snapshot is compliant regarding expectations assert expected_branches == snapshot_branches + + # Now check for more consistency + aliases: List[bytes] = objects.get("alias", []) + for alias in aliases: + # ensure the snapshot alias target consistent references + if alias not in snapshot.branches: + raise InconsistentAliasBranchError( + f"Alias branch {alias.decode('utf-8')} " + f"should be in {list(snapshot.branches)}" + ) + + revs = objects.get("revision") + if revs: + revisions = list(storage.revision_get(revs)) + not_found = [rev_id for rev_id, rev in zip(revs, revisions) if rev is None] + if not_found: + + raise InexistentObjectsError( + f"Revision(s) {','.join(r.hex() for r in not_found)}" + " should exist in storage" + ) + + rels = objects.get("release") + if rels: + releases = list(storage.release_get(rels)) + not_found = [rel_id for rel_id, rel in zip(rels, releases) if rel is None] + if not_found: + raise InexistentObjectsError( + f"Release(s) {','.join(r.hex() for r in not_found)}" + " should exist in storage" + ) + return snap diff --git a/swh/loader/tests/test_init.py b/swh/loader/tests/test_init.py --- a/swh/loader/tests/test_init.py +++ b/swh/loader/tests/test_init.py @@ -9,10 +9,21 @@ import os import subprocess -from swh.loader.tests import prepare_repository_from_archive, assert_last_visit_matches +from swh.loader.tests import ( + prepare_repository_from_archive, + assert_last_visit_matches, + InconsistentAliasBranchError, + InexistentObjectsError, +) +from swh.model.from_disk import DentryPerms + from swh.model.model import ( + Content, + Directory, OriginVisit, OriginVisitStatus, + Release, + Revision, Snapshot, SnapshotBranch, TargetType, @@ -46,6 +57,113 @@ ) +CONTENT = Content.from_dict( + { + "data": b"42\n", + "length": 3, + "sha1": hash_to_bytes("34973274ccef6ab4dfaaf86599792fa9c3fe4689"), + "sha1_git": hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), + "sha256": hash_to_bytes( + "673650f936cb3b0a2f93ce09d81be10748b1b203c19e8176b4eefc1964a0cf3a" + ), + "blake2s256": hash_to_bytes( + "d5fe1939576527e42cfd76a9455a2432fe7f56669564577dd93c4280e76d661d" + ), + "status": "visible", + } +) + + +DIRECTORY = Directory.from_dict( + { + "id": hash_to_bytes("34f335a750111ca0a8b64d8034faec9eedc396be"), + "entries": [ + { + "name": b"foo", + "type": "file", + "target": CONTENT.sha1_git, + "perms": DentryPerms.content, + } + ], + } +) + + +REVISION = Revision.from_dict( + { + "id": hash_to_bytes("066b1b62dbfa033362092af468bf6cfabec230e7"), + "message": b"hello", + "author": { + "name": b"Nicolas Dandrimont", + "email": b"nicolas@example.com", + "fullname": b"Nicolas Dandrimont ", + }, + "date": { + "timestamp": {"seconds": 1234567890, "microseconds": 0}, + "offset": 120, + "negative_utc": False, + }, + "committer": { + "name": b"St\xc3fano Zacchiroli", + "email": b"stefano@example.com", + "fullname": b"St\xc3fano Zacchiroli ", + }, + "committer_date": { + "timestamp": {"seconds": 1123456789, "microseconds": 0}, + "offset": 0, + "negative_utc": True, + }, + "parents": (), + "type": "git", + "directory": DIRECTORY.id, + "metadata": { + "checksums": {"sha1": "tarball-sha1", "sha256": "tarball-sha256",}, + "signed-off-by": "some-dude", + }, + "extra_headers": ( + (b"gpgsig", b"test123"), + (b"mergetag", b"foo\\bar"), + (b"mergetag", b"\x22\xaf\x89\x80\x01\x00"), + ), + "synthetic": True, + } +) + + +RELEASE = Release.from_dict( + { + "id": hash_to_bytes("3e9050196aa288264f2a9d279d6abab8b158448b"), + "name": b"v0.0.2", + "author": { + "name": b"tony", + "email": b"tony@ardumont.fr", + "fullname": b"tony ", + }, + "date": { + "timestamp": {"seconds": 1634336813, "microseconds": 0}, + "offset": 0, + "negative_utc": False, + }, + "target": REVISION.id, + "target_type": "revision", + "message": b"yet another synthetic release", + "synthetic": True, + } +) + + +SNAPSHOT = Snapshot( + id=hash_to_bytes("2498dbf535f882bc7f9a18fb16c9ad27fda7bab7"), + branches={ + b"release/0.1.0": SnapshotBranch( + target=RELEASE.id, target_type=TargetType.RELEASE, + ), + b"HEAD": SnapshotBranch(target=REVISION.id, target_type=TargetType.REVISION,), + b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,), + }, +) + + @pytest.fixture def mock_storage(mocker): mock_storage = mocker.patch("swh.loader.tests.origin_get_latest_visit_status") @@ -198,27 +316,56 @@ def test_check_snapshot(swh_storage): - """Check snapshot should not raise when everything is fine""" - snapshot = Snapshot( - id=hash_to_bytes("2498dbf535f882bc7f9a18fb16c9ad27fda7bab7"), - branches={ - b"master": SnapshotBranch( - target=hash_to_bytes(hash_hex), target_type=TargetType.REVISION, - ), - }, - ) + """Everything should be fine when snapshot is found and the snapshot reference up to the + revision exist in the storage. - s = swh_storage.snapshot_add([snapshot]) + """ + # Create a consistent snapshot arborescence tree in storage + found = False + for entry in DIRECTORY.entries: + if entry.target == CONTENT.sha1_git: + found = True + break + assert found is True + + assert REVISION.directory == DIRECTORY.id + assert RELEASE.target == REVISION.id + + for branch, target in SNAPSHOT.branches.items(): + if branch == b"alias": + assert target.target in SNAPSHOT.branches + else: + assert target.target in [REVISION.id, RELEASE.id] + + swh_storage.content_add([CONTENT.to_dict()]) + swh_storage.directory_add([DIRECTORY.to_dict()]) + swh_storage.revision_add([REVISION.to_dict()]) + swh_storage.release_add([RELEASE.to_dict()]) + s = swh_storage.snapshot_add([SNAPSHOT.to_dict()]) assert s == { "snapshot:add": 1, } - for snap in [snapshot, snapshot.to_dict()]: + for snap in [SNAPSHOT, SNAPSHOT.to_dict()]: + # all should be fine! check_snapshot(snap, swh_storage) -def test_check_snapshot_failure(swh_storage): - """check_snapshot should raise if something goes wrong""" +def test_check_snapshot_failures(swh_storage): + """Failure scenarios: + + 0. snapshot parameter is not a snapshot + 1. snapshot id is correct but branches mismatched + 2. snapshot id is not correct, it's not found in the storage + 3. snapshot reference an alias which does not exist + 4. snapshot is found in storage, targeted revision does not exist + 5. snapshot is found in storage, targeted release does not exist + + The following are not dealt with yet: + 6. snapshot is found in storage, targeted directory does not exist + 7. snapshot is found in storage, targeted content does not exist + + """ snap_id_hex = "2498dbf535f882bc7f9a18fb16c9ad27fda7bab7" snapshot = Snapshot( id=hash_to_bytes(snap_id_hex), @@ -241,19 +388,69 @@ }, } - # id is correct, the branch is wrong, that should raise nonetheless + # 0. not a Snapshot object, raise! + with pytest.raises(AssertionError, match="variable 'snapshot' must be a snapshot"): + check_snapshot(ORIGIN_VISIT, swh_storage) + + # 1. snapshot id is correct but branches mismatched for snap_id in [snap_id_hex, snapshot.id]: with pytest.raises(AssertionError, match="Differing items"): unexpected_snapshot["id"] = snap_id check_snapshot(unexpected_snapshot, swh_storage) - # snapshot id which does not exist + # 2. snapshot id is not correct, it's not found in the storage wrong_snap_id_hex = "999666f535f882bc7f9a18fb16c9ad27fda7bab7" for snap_id in [wrong_snap_id_hex, hash_to_bytes(wrong_snap_id_hex)]: unexpected_snapshot["id"] = wrong_snap_id_hex with pytest.raises(AssertionError, match="is not found"): check_snapshot(unexpected_snapshot, swh_storage) - # not a Snapshot object, raise! - with pytest.raises(AssertionError, match="variable 'snapshot' must be a snapshot"): - check_snapshot(ORIGIN_VISIT, swh_storage) + # 3. snapshot reference an alias which does not exist + + snapshot0 = Snapshot( + id=hash_to_bytes("123666f535f882bc7f9a18fb16c9ad27fda7bab7"), + branches={ + b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,), + }, + ) + swh_storage.snapshot_add([snapshot0]) + + with pytest.raises(InconsistentAliasBranchError, match="Alias branch HEAD"): + check_snapshot(snapshot0, swh_storage) + + # 4. snapshot is found in storage, targeted revision does not exist + snapshot1 = Snapshot( + id=hash_to_bytes("456666f535f882bc7f9a18fb16c9ad27fda7bab7"), + branches={ + b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,), + b"HEAD": SnapshotBranch( + target=REVISION.id, target_type=TargetType.REVISION, + ), + }, + ) + + swh_storage.snapshot_add([snapshot1]) + + with pytest.raises(InexistentObjectsError, match="Revision"): + check_snapshot(snapshot1, swh_storage) + + swh_storage.revision_add([REVISION.to_dict()]) + snapshot2 = Snapshot( + id=hash_to_bytes("789666f535f882bc7f9a18fb16c9ad27fda7bab7"), + branches={ + b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,), + b"HEAD": SnapshotBranch( + target=REVISION.id, target_type=TargetType.REVISION, + ), + b"release/0.1.0": SnapshotBranch( + target=RELEASE.id, target_type=TargetType.RELEASE, + ), + }, + ) + + swh_storage.snapshot_add([snapshot2]) + + with pytest.raises(InexistentObjectsError, match="Release"): + check_snapshot(snapshot2, swh_storage) + + swh_storage.release_add([RELEASE.to_dict()])