diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py --- a/swh/loader/package/nixguix/tests/test_nixguix.py +++ b/swh/loader/package/nixguix/tests/test_nixguix.py @@ -10,11 +10,12 @@ import pytest from json.decoder import JSONDecodeError -from typing import Dict, Optional, Tuple +from swh.storage.interface import StorageInterface +from typing import Any, Dict, Iterable, Optional, Tuple, Union from unittest.mock import patch -from swh.model.model import Snapshot +from swh.model.model import Snapshot, TargetType from swh.loader.package.archive.loader import ArchiveLoader from swh.loader.package.nixguix.loader import ( NixGuixLoader, @@ -29,13 +30,23 @@ from swh.loader.tests import ( assert_last_visit_matches, get_stats, - check_snapshot, + check_snapshot as check_snapshot_full, ) sources_url = "https://nix-community.github.io/nixpkgs-swh/sources.json" +def check_snapshot( + snapshot: Union[Dict[str, Any], Snapshot], + storage: StorageInterface, + allowed_empty: Iterable[Tuple[TargetType, bytes]] = [ + (TargetType.REVISION, b"evaluation") + ], +): + return check_snapshot_full(snapshot, storage, allowed_empty) + + def test_retrieve_sources(swh_config, requests_mock_datadir): j = retrieve_sources(sources_url) assert "sources" in j.keys() diff --git a/swh/loader/tests/__init__.py b/swh/loader/tests/__init__.py --- a/swh/loader/tests/__init__.py +++ b/swh/loader/tests/__init__.py @@ -6,10 +6,11 @@ import os import subprocess +from collections import defaultdict from pathlib import PosixPath -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union -from swh.model.model import OriginVisitStatus, Snapshot +from swh.model.model import OriginVisitStatus, Snapshot, TargetType from swh.model.hashutil import hash_to_bytes from swh.storage.interface import StorageInterface @@ -100,17 +101,34 @@ return {"target": encoded_target, "target_type": target_type} +class InconsistentAliasBranchError(AssertionError): + """When an alias branch targets an inexistent branch.""" + + pass + + +class InexistentObjectsError(AssertionError): + """When a targeted branch reference does not exist in the storage""" + + pass + + def check_snapshot( - snapshot: Union[Dict[str, Any], Snapshot], storage: StorageInterface + snapshot: Union[Dict[str, Any], Snapshot], + storage: StorageInterface, + allowed_empty: Iterable[Tuple[TargetType, bytes]] = [], ): - """Check for snapshot match. - - The hashes can be both in hex or bytes, the necessary conversion will happen prior - to check. + """Check that: + - snapshot exists in the storage and match + - each object reference up to the revision/release targets exists Args: snapshot: full snapshot to check for existence and consistency storage: storage to lookup information into + allowed_empty: Iterable of branch we allow to be empty (some edge case loaders + allows this case to happen, nixguix for example allows the branch evaluation" + to target the nixpkgs git commit reference, which may not yet be resolvable at + loading time) Returns: the snapshot stored in the storage for further test assertion if any is @@ -132,17 +150,60 @@ else: raise AssertionError(f"variable 'snapshot' must be a snapshot: {snapshot!r}") - snap = storage.snapshot_get(expected_snapshot.id) - if snap is None: + snapshot_dict = storage.snapshot_get(expected_snapshot.id) + if snapshot_dict is None: raise AssertionError(f"Snapshot {expected_snapshot.id.hex()} is not found") - assert snap["next_branch"] is None # we don't deal with large snapshot in tests - snap.pop("next_branch") - actual_snap = Snapshot.from_dict(snap) - - assert expected_snapshot == actual_snap - - return snap # for retro compat, returned the dict, remove when clients are migrated + snapshot_dict.pop("next_branch") + actual_snaphot = Snapshot.from_dict(snapshot_dict) + assert isinstance(actual_snaphot, Snapshot) + + assert expected_snapshot == actual_snaphot + + branches_by_target_type = defaultdict(list) + object_to_branch = {} + for branch, target in actual_snaphot.branches.items(): + if (target.target_type, branch) in allowed_empty: + # safe for those elements to not be checked for existence + continue + branches_by_target_type[target.target_type].append(target.target) + object_to_branch[target.target] = branch + + # check that alias references target something that exists, otherwise raise + aliases: List[bytes] = branches_by_target_type.get(TargetType.ALIAS, []) + for alias in aliases: + if alias not in actual_snaphot.branches: + raise InconsistentAliasBranchError( + f"Alias branch {alias.decode('utf-8')} " + f"should be in {list(actual_snaphot.branches)}" + ) + + revs = branches_by_target_type.get(TargetType.REVISION) + if revs: + revisions = list(storage.revision_get(revs)) + not_found = [rev_id for rev_id, rev in zip(revs, revisions) if rev is None] + if not_found: + missing_objs = ", ".join( + str((object_to_branch[rev], rev.hex())) for rev in not_found + ) + raise InexistentObjectsError( + f"Branch/Revision(s) {missing_objs} should exist in storage" + ) + + rels = branches_by_target_type.get(TargetType.RELEASE) + if rels: + releases = list(storage.release_get(rels)) + not_found = [rel_id for rel_id, rel in zip(rels, releases) if rel is None] + if not_found: + missing_objs = ", ".join( + str((object_to_branch[rel], rel.hex())) for rel in not_found + ) + raise InexistentObjectsError( + f"Branch/Release(s) {missing_objs} should exist in storage" + ) + + # for retro compat, returned the dict, remove when clients are migrated + return snapshot_dict def get_stats(storage) -> Dict: diff --git a/swh/loader/tests/test_init.py b/swh/loader/tests/test_init.py --- a/swh/loader/tests/test_init.py +++ b/swh/loader/tests/test_init.py @@ -9,12 +9,23 @@ import os import subprocess +from swh.model.from_disk import DentryPerms from swh.model.model import ( + Content, + Directory, + DirectoryEntry, + ObjectType, OriginVisit, OriginVisitStatus, + Person, + Release, + Revision, + RevisionType, Snapshot, SnapshotBranch, TargetType, + Timestamp, + TimestampWithTimezone, ) from swh.model.hashutil import hash_to_bytes @@ -23,6 +34,8 @@ encode_target, check_snapshot, prepare_repository_from_archive, + InconsistentAliasBranchError, + InexistentObjectsError, ) @@ -47,6 +60,109 @@ ) +CONTENT = Content( + data=b"42\n", + length=3, + sha1=hash_to_bytes("34973274ccef6ab4dfaaf86599792fa9c3fe4689"), + sha1_git=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), + sha256=hash_to_bytes( + "673650f936cb3b0a2f93ce09d81be10748b1b203c19e8176b4eefc1964a0cf3a" + ), + blake2s256=hash_to_bytes( + "d5fe1939576527e42cfd76a9455a2432fe7f56669564577dd93c4280e76d661d" + ), + status="visible", +) + + +DIRECTORY = Directory( + id=hash_to_bytes("34f335a750111ca0a8b64d8034faec9eedc396be"), + entries=tuple( + [ + DirectoryEntry( + name=b"foo", + type="file", + target=CONTENT.sha1_git, + perms=DentryPerms.content, + ) + ] + ), +) + + +REVISION = Revision( + id=hash_to_bytes("066b1b62dbfa033362092af468bf6cfabec230e7"), + message=b"hello", + author=Person( + name=b"Nicolas Dandrimont", + email=b"nicolas@example.com", + fullname=b"Nicolas Dandrimont ", + ), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1234567890, microseconds=0), + offset=120, + negative_utc=False, + ), + committer=Person( + name=b"St\xc3fano Zacchiroli", + email=b"stefano@example.com", + fullname=b"St\xc3fano Zacchiroli ", + ), + committer_date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1123456789, microseconds=0), + offset=0, + negative_utc=True, + ), + parents=(), + type=RevisionType.GIT, + directory=DIRECTORY.id, + metadata={ + "checksums": {"sha1": "tarball-sha1", "sha256": "tarball-sha256",}, + "signed-off-by": "some-dude", + }, + extra_headers=( + (b"gpgsig", b"test123"), + (b"mergetag", b"foo\\bar"), + (b"mergetag", b"\x22\xaf\x89\x80\x01\x00"), + ), + synthetic=True, +) + + +RELEASE = Release( + id=hash_to_bytes("3e9050196aa288264f2a9d279d6abab8b158448b"), + name=b"v0.0.2", + author=Person( + name=b"tony", email=b"tony@ardumont.fr", fullname=b"tony ", + ), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1634336813, microseconds=0), + offset=0, + negative_utc=False, + ), + target=REVISION.id, + target_type=ObjectType.REVISION, + message=b"yet another synthetic release", + synthetic=True, +) + + +SNAPSHOT = Snapshot( + id=hash_to_bytes("2498dbf535f882bc7f9a18fb16c9ad27fda7bab7"), + branches={ + b"release/0.1.0": SnapshotBranch( + target=RELEASE.id, target_type=TargetType.RELEASE, + ), + b"HEAD": SnapshotBranch(target=REVISION.id, target_type=TargetType.REVISION,), + b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,), + b"evaluation": SnapshotBranch( # branch dedicated to not exist in storage + target=hash_to_bytes("cc4e04c26672dd74e5fd0fecb78b435fb55368f7"), + target_type=TargetType.REVISION, + ), + }, +) + + @pytest.fixture def mock_storage(mocker): mock_storage = mocker.patch("swh.loader.tests.origin_get_latest_visit_status") @@ -197,27 +313,61 @@ def test_check_snapshot(swh_storage): - """Check snapshot should not raise when everything is fine""" - snapshot = Snapshot( - id=hash_to_bytes("2498dbf535f882bc7f9a18fb16c9ad27fda7bab7"), - branches={ - b"master": SnapshotBranch( - target=hash_to_bytes(hash_hex), target_type=TargetType.REVISION, - ), - }, - ) + """Everything should be fine when snapshot is found and the snapshot reference up to the + revision exist in the storage. - s = swh_storage.snapshot_add([snapshot]) + """ + # Create a consistent snapshot arborescence tree in storage + found = False + for entry in DIRECTORY.entries: + if entry.target == CONTENT.sha1_git: + found = True + break + assert found is True + + assert REVISION.directory == DIRECTORY.id + assert RELEASE.target == REVISION.id + + for branch, target in SNAPSHOT.branches.items(): + if branch == b"alias": + assert target.target in SNAPSHOT.branches + elif branch == b"evaluation": + # this one does not exist and we are safelisting its check below + continue + else: + assert target.target in [REVISION.id, RELEASE.id] + + swh_storage.content_add([CONTENT.to_dict()]) + swh_storage.directory_add([DIRECTORY.to_dict()]) + swh_storage.revision_add([REVISION.to_dict()]) + swh_storage.release_add([RELEASE.to_dict()]) + s = swh_storage.snapshot_add([SNAPSHOT.to_dict()]) assert s == { "snapshot:add": 1, } - for snap in [snapshot, snapshot.to_dict()]: - check_snapshot(snap, swh_storage) + for snap in [SNAPSHOT, SNAPSHOT.to_dict()]: + # all should be fine! + check_snapshot( + snap, swh_storage, allowed_empty=[(TargetType.REVISION, b"evaluation")] + ) + + +def test_check_snapshot_failures(swh_storage): + """Failure scenarios: + 0. snapshot parameter is not a snapshot + 1. snapshot id is correct but branches mismatched + 2. snapshot id is not correct, it's not found in the storage + 3. snapshot reference an alias which does not exist + 4. snapshot is found in storage, targeted revision does not exist + 5. snapshot is found in storage, targeted release does not exist -def test_check_snapshot_failure(swh_storage): - """check_snapshot should raise if something goes wrong""" + The following are not dealt with yet: + 6. snapshot is found in storage, targeted directory does not exist + 7. snapshot is found in storage, targeted content does not exist + + """ snap_id_hex = "2498dbf535f882bc7f9a18fb16c9ad27fda7bab7" snapshot = Snapshot( id=hash_to_bytes(snap_id_hex), @@ -240,19 +390,68 @@ }, } - # id is correct, the branch is wrong, that should raise nonetheless + # 0. not a Snapshot object, raise! + with pytest.raises(AssertionError, match="variable 'snapshot' must be a snapshot"): + check_snapshot(ORIGIN_VISIT, swh_storage) + + # 1. snapshot id is correct but branches mismatched for snap_id in [snap_id_hex, snapshot.id]: with pytest.raises(AssertionError, match="Differing attributes"): unexpected_snapshot["id"] = snap_id check_snapshot(unexpected_snapshot, swh_storage) - # snapshot id which does not exist + # 2. snapshot id is not correct, it's not found in the storage wrong_snap_id_hex = "999666f535f882bc7f9a18fb16c9ad27fda7bab7" for snap_id in [wrong_snap_id_hex, hash_to_bytes(wrong_snap_id_hex)]: unexpected_snapshot["id"] = wrong_snap_id_hex with pytest.raises(AssertionError, match="is not found"): check_snapshot(unexpected_snapshot, swh_storage) - # not a Snapshot object, raise! - with pytest.raises(AssertionError, match="variable 'snapshot' must be a snapshot"): - check_snapshot(ORIGIN_VISIT, swh_storage) + # 3. snapshot references an inexistent alias + snapshot0 = Snapshot( + id=hash_to_bytes("123666f535f882bc7f9a18fb16c9ad27fda7bab7"), + branches={ + b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,), + }, + ) + swh_storage.snapshot_add([snapshot0]) + + with pytest.raises(InconsistentAliasBranchError, match="Alias branch HEAD"): + check_snapshot(snapshot0, swh_storage) + + # 4. snapshot is found in storage, targeted revision does not exist + snapshot1 = Snapshot( + id=hash_to_bytes("456666f535f882bc7f9a18fb16c9ad27fda7bab7"), + branches={ + b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,), + b"HEAD": SnapshotBranch( + target=REVISION.id, target_type=TargetType.REVISION, + ), + }, + ) + + swh_storage.snapshot_add([snapshot1]) + + with pytest.raises(InexistentObjectsError, match="Branch/Revision"): + check_snapshot(snapshot1, swh_storage) + + swh_storage.revision_add([REVISION.to_dict()]) + snapshot2 = Snapshot( + id=hash_to_bytes("789666f535f882bc7f9a18fb16c9ad27fda7bab7"), + branches={ + b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,), + b"HEAD": SnapshotBranch( + target=REVISION.id, target_type=TargetType.REVISION, + ), + b"release/0.1.0": SnapshotBranch( + target=RELEASE.id, target_type=TargetType.RELEASE, + ), + }, + ) + + swh_storage.snapshot_add([snapshot2]) + + with pytest.raises(InexistentObjectsError, match="Branch/Release"): + check_snapshot(snapshot2, swh_storage) + + swh_storage.release_add([RELEASE.to_dict()])