Page MenuHomeSoftware Heritage

D3478.diff
No OneTemporary

D3478.diff

diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py
--- a/swh/loader/package/nixguix/tests/test_nixguix.py
+++ b/swh/loader/package/nixguix/tests/test_nixguix.py
@@ -10,11 +10,12 @@
import pytest
from json.decoder import JSONDecodeError
-from typing import Dict, Optional, Tuple
+from swh.storage.interface import StorageInterface
+from typing import Any, Dict, Iterable, Optional, Tuple, Union
from unittest.mock import patch
-from swh.model.model import Snapshot
+from swh.model.model import Snapshot, TargetType
from swh.loader.package.archive.loader import ArchiveLoader
from swh.loader.package.nixguix.loader import (
NixGuixLoader,
@@ -29,13 +30,23 @@
from swh.loader.tests import (
assert_last_visit_matches,
get_stats,
- check_snapshot,
+ check_snapshot as check_snapshot_full,
)
sources_url = "https://nix-community.github.io/nixpkgs-swh/sources.json"
+def check_snapshot(
+ snapshot: Union[Dict[str, Any], Snapshot],
+ storage: StorageInterface,
+ allowed_empty: Iterable[Tuple[TargetType, bytes]] = [
+ (TargetType.REVISION, b"evaluation")
+ ],
+):
+ return check_snapshot_full(snapshot, storage, allowed_empty)
+
+
def test_retrieve_sources(swh_config, requests_mock_datadir):
j = retrieve_sources(sources_url)
assert "sources" in j.keys()
diff --git a/swh/loader/tests/__init__.py b/swh/loader/tests/__init__.py
--- a/swh/loader/tests/__init__.py
+++ b/swh/loader/tests/__init__.py
@@ -6,10 +6,11 @@
import os
import subprocess
+from collections import defaultdict
from pathlib import PosixPath
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
-from swh.model.model import OriginVisitStatus, Snapshot
+from swh.model.model import OriginVisitStatus, Snapshot, TargetType
from swh.model.hashutil import hash_to_bytes
from swh.storage.interface import StorageInterface
@@ -100,17 +101,34 @@
return {"target": encoded_target, "target_type": target_type}
+class InconsistentAliasBranchError(AssertionError):
+ """When an alias branch targets an inexistent branch."""
+
+ pass
+
+
+class InexistentObjectsError(AssertionError):
+ """When a targeted branch reference does not exist in the storage"""
+
+ pass
+
+
def check_snapshot(
- snapshot: Union[Dict[str, Any], Snapshot], storage: StorageInterface
+ snapshot: Union[Dict[str, Any], Snapshot],
+ storage: StorageInterface,
+ allowed_empty: Iterable[Tuple[TargetType, bytes]] = [],
):
- """Check for snapshot match.
-
- The hashes can be both in hex or bytes, the necessary conversion will happen prior
- to check.
+ """Check that:
+ - snapshot exists in the storage and match
+ - each object reference up to the revision/release targets exists
Args:
snapshot: full snapshot to check for existence and consistency
storage: storage to lookup information into
+ allowed_empty: Iterable of branch we allow to be empty (some edge case loaders
+ allows this case to happen, nixguix for example allows the branch evaluation"
+ to target the nixpkgs git commit reference, which may not yet be resolvable at
+ loading time)
Returns:
the snapshot stored in the storage for further test assertion if any is
@@ -132,17 +150,60 @@
else:
raise AssertionError(f"variable 'snapshot' must be a snapshot: {snapshot!r}")
- snap = storage.snapshot_get(expected_snapshot.id)
- if snap is None:
+ snapshot_dict = storage.snapshot_get(expected_snapshot.id)
+ if snapshot_dict is None:
raise AssertionError(f"Snapshot {expected_snapshot.id.hex()} is not found")
- assert snap["next_branch"] is None # we don't deal with large snapshot in tests
- snap.pop("next_branch")
- actual_snap = Snapshot.from_dict(snap)
-
- assert expected_snapshot == actual_snap
-
- return snap # for retro compat, returned the dict, remove when clients are migrated
+ snapshot_dict.pop("next_branch")
+ actual_snaphot = Snapshot.from_dict(snapshot_dict)
+ assert isinstance(actual_snaphot, Snapshot)
+
+ assert expected_snapshot == actual_snaphot
+
+ branches_by_target_type = defaultdict(list)
+ object_to_branch = {}
+ for branch, target in actual_snaphot.branches.items():
+ if (target.target_type, branch) in allowed_empty:
+ # safe for those elements to not be checked for existence
+ continue
+ branches_by_target_type[target.target_type].append(target.target)
+ object_to_branch[target.target] = branch
+
+ # check that alias references target something that exists, otherwise raise
+ aliases: List[bytes] = branches_by_target_type.get(TargetType.ALIAS, [])
+ for alias in aliases:
+ if alias not in actual_snaphot.branches:
+ raise InconsistentAliasBranchError(
+ f"Alias branch {alias.decode('utf-8')} "
+ f"should be in {list(actual_snaphot.branches)}"
+ )
+
+ revs = branches_by_target_type.get(TargetType.REVISION)
+ if revs:
+ revisions = list(storage.revision_get(revs))
+ not_found = [rev_id for rev_id, rev in zip(revs, revisions) if rev is None]
+ if not_found:
+ missing_objs = ", ".join(
+ str((object_to_branch[rev], rev.hex())) for rev in not_found
+ )
+ raise InexistentObjectsError(
+ f"Branch/Revision(s) {missing_objs} should exist in storage"
+ )
+
+ rels = branches_by_target_type.get(TargetType.RELEASE)
+ if rels:
+ releases = list(storage.release_get(rels))
+ not_found = [rel_id for rel_id, rel in zip(rels, releases) if rel is None]
+ if not_found:
+ missing_objs = ", ".join(
+ str((object_to_branch[rel], rel.hex())) for rel in not_found
+ )
+ raise InexistentObjectsError(
+ f"Branch/Release(s) {missing_objs} should exist in storage"
+ )
+
+ # for retro compat, returned the dict, remove when clients are migrated
+ return snapshot_dict
def get_stats(storage) -> Dict:
diff --git a/swh/loader/tests/test_init.py b/swh/loader/tests/test_init.py
--- a/swh/loader/tests/test_init.py
+++ b/swh/loader/tests/test_init.py
@@ -9,12 +9,23 @@
import os
import subprocess
+from swh.model.from_disk import DentryPerms
from swh.model.model import (
+ Content,
+ Directory,
+ DirectoryEntry,
+ ObjectType,
OriginVisit,
OriginVisitStatus,
+ Person,
+ Release,
+ Revision,
+ RevisionType,
Snapshot,
SnapshotBranch,
TargetType,
+ Timestamp,
+ TimestampWithTimezone,
)
from swh.model.hashutil import hash_to_bytes
@@ -23,6 +34,8 @@
encode_target,
check_snapshot,
prepare_repository_from_archive,
+ InconsistentAliasBranchError,
+ InexistentObjectsError,
)
@@ -47,6 +60,109 @@
)
+CONTENT = Content(
+ data=b"42\n",
+ length=3,
+ sha1=hash_to_bytes("34973274ccef6ab4dfaaf86599792fa9c3fe4689"),
+ sha1_git=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"),
+ sha256=hash_to_bytes(
+ "673650f936cb3b0a2f93ce09d81be10748b1b203c19e8176b4eefc1964a0cf3a"
+ ),
+ blake2s256=hash_to_bytes(
+ "d5fe1939576527e42cfd76a9455a2432fe7f56669564577dd93c4280e76d661d"
+ ),
+ status="visible",
+)
+
+
+DIRECTORY = Directory(
+ id=hash_to_bytes("34f335a750111ca0a8b64d8034faec9eedc396be"),
+ entries=tuple(
+ [
+ DirectoryEntry(
+ name=b"foo",
+ type="file",
+ target=CONTENT.sha1_git,
+ perms=DentryPerms.content,
+ )
+ ]
+ ),
+)
+
+
+REVISION = Revision(
+ id=hash_to_bytes("066b1b62dbfa033362092af468bf6cfabec230e7"),
+ message=b"hello",
+ author=Person(
+ name=b"Nicolas Dandrimont",
+ email=b"nicolas@example.com",
+ fullname=b"Nicolas Dandrimont <nicolas@example.com> ",
+ ),
+ date=TimestampWithTimezone(
+ timestamp=Timestamp(seconds=1234567890, microseconds=0),
+ offset=120,
+ negative_utc=False,
+ ),
+ committer=Person(
+ name=b"St\xc3fano Zacchiroli",
+ email=b"stefano@example.com",
+ fullname=b"St\xc3fano Zacchiroli <stefano@example.com>",
+ ),
+ committer_date=TimestampWithTimezone(
+ timestamp=Timestamp(seconds=1123456789, microseconds=0),
+ offset=0,
+ negative_utc=True,
+ ),
+ parents=(),
+ type=RevisionType.GIT,
+ directory=DIRECTORY.id,
+ metadata={
+ "checksums": {"sha1": "tarball-sha1", "sha256": "tarball-sha256",},
+ "signed-off-by": "some-dude",
+ },
+ extra_headers=(
+ (b"gpgsig", b"test123"),
+ (b"mergetag", b"foo\\bar"),
+ (b"mergetag", b"\x22\xaf\x89\x80\x01\x00"),
+ ),
+ synthetic=True,
+)
+
+
+RELEASE = Release(
+ id=hash_to_bytes("3e9050196aa288264f2a9d279d6abab8b158448b"),
+ name=b"v0.0.2",
+ author=Person(
+ name=b"tony", email=b"tony@ardumont.fr", fullname=b"tony <tony@ardumont.fr>",
+ ),
+ date=TimestampWithTimezone(
+ timestamp=Timestamp(seconds=1634336813, microseconds=0),
+ offset=0,
+ negative_utc=False,
+ ),
+ target=REVISION.id,
+ target_type=ObjectType.REVISION,
+ message=b"yet another synthetic release",
+ synthetic=True,
+)
+
+
+SNAPSHOT = Snapshot(
+ id=hash_to_bytes("2498dbf535f882bc7f9a18fb16c9ad27fda7bab7"),
+ branches={
+ b"release/0.1.0": SnapshotBranch(
+ target=RELEASE.id, target_type=TargetType.RELEASE,
+ ),
+ b"HEAD": SnapshotBranch(target=REVISION.id, target_type=TargetType.REVISION,),
+ b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,),
+ b"evaluation": SnapshotBranch( # branch dedicated to not exist in storage
+ target=hash_to_bytes("cc4e04c26672dd74e5fd0fecb78b435fb55368f7"),
+ target_type=TargetType.REVISION,
+ ),
+ },
+)
+
+
@pytest.fixture
def mock_storage(mocker):
mock_storage = mocker.patch("swh.loader.tests.origin_get_latest_visit_status")
@@ -197,27 +313,61 @@
def test_check_snapshot(swh_storage):
- """Check snapshot should not raise when everything is fine"""
- snapshot = Snapshot(
- id=hash_to_bytes("2498dbf535f882bc7f9a18fb16c9ad27fda7bab7"),
- branches={
- b"master": SnapshotBranch(
- target=hash_to_bytes(hash_hex), target_type=TargetType.REVISION,
- ),
- },
- )
+ """Everything should be fine when snapshot is found and the snapshot reference up to the
+ revision exist in the storage.
- s = swh_storage.snapshot_add([snapshot])
+ """
+ # Create a consistent snapshot arborescence tree in storage
+ found = False
+ for entry in DIRECTORY.entries:
+ if entry.target == CONTENT.sha1_git:
+ found = True
+ break
+ assert found is True
+
+ assert REVISION.directory == DIRECTORY.id
+ assert RELEASE.target == REVISION.id
+
+ for branch, target in SNAPSHOT.branches.items():
+ if branch == b"alias":
+ assert target.target in SNAPSHOT.branches
+ elif branch == b"evaluation":
+ # this one does not exist and we are safelisting its check below
+ continue
+ else:
+ assert target.target in [REVISION.id, RELEASE.id]
+
+ swh_storage.content_add([CONTENT.to_dict()])
+ swh_storage.directory_add([DIRECTORY.to_dict()])
+ swh_storage.revision_add([REVISION.to_dict()])
+ swh_storage.release_add([RELEASE.to_dict()])
+ s = swh_storage.snapshot_add([SNAPSHOT.to_dict()])
assert s == {
"snapshot:add": 1,
}
- for snap in [snapshot, snapshot.to_dict()]:
- check_snapshot(snap, swh_storage)
+ for snap in [SNAPSHOT, SNAPSHOT.to_dict()]:
+ # all should be fine!
+ check_snapshot(
+ snap, swh_storage, allowed_empty=[(TargetType.REVISION, b"evaluation")]
+ )
+
+
+def test_check_snapshot_failures(swh_storage):
+ """Failure scenarios:
+ 0. snapshot parameter is not a snapshot
+ 1. snapshot id is correct but branches mismatched
+ 2. snapshot id is not correct, it's not found in the storage
+ 3. snapshot reference an alias which does not exist
+ 4. snapshot is found in storage, targeted revision does not exist
+ 5. snapshot is found in storage, targeted release does not exist
-def test_check_snapshot_failure(swh_storage):
- """check_snapshot should raise if something goes wrong"""
+ The following are not dealt with yet:
+ 6. snapshot is found in storage, targeted directory does not exist
+ 7. snapshot is found in storage, targeted content does not exist
+
+ """
snap_id_hex = "2498dbf535f882bc7f9a18fb16c9ad27fda7bab7"
snapshot = Snapshot(
id=hash_to_bytes(snap_id_hex),
@@ -240,19 +390,66 @@
},
}
- # id is correct, the branch is wrong, that should raise nonetheless
+ # 0. not a Snapshot object, raise!
+ with pytest.raises(AssertionError, match="variable 'snapshot' must be a snapshot"):
+ check_snapshot(ORIGIN_VISIT, swh_storage)
+
+ # 1. snapshot id is correct but branches mismatched
for snap_id in [snap_id_hex, snapshot.id]:
with pytest.raises(AssertionError, match="Differing attributes"):
unexpected_snapshot["id"] = snap_id
check_snapshot(unexpected_snapshot, swh_storage)
- # snapshot id which does not exist
+ # 2. snapshot id is not correct, it's not found in the storage
wrong_snap_id_hex = "999666f535f882bc7f9a18fb16c9ad27fda7bab7"
for snap_id in [wrong_snap_id_hex, hash_to_bytes(wrong_snap_id_hex)]:
unexpected_snapshot["id"] = wrong_snap_id_hex
with pytest.raises(AssertionError, match="is not found"):
check_snapshot(unexpected_snapshot, swh_storage)
- # not a Snapshot object, raise!
- with pytest.raises(AssertionError, match="variable 'snapshot' must be a snapshot"):
- check_snapshot(ORIGIN_VISIT, swh_storage)
+ # 3. snapshot references an inexistent alias
+ snapshot0 = Snapshot(
+ id=hash_to_bytes("123666f535f882bc7f9a18fb16c9ad27fda7bab7"),
+ branches={
+ b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,),
+ },
+ )
+ swh_storage.snapshot_add([snapshot0])
+
+ with pytest.raises(InconsistentAliasBranchError, match="Alias branch HEAD"):
+ check_snapshot(snapshot0, swh_storage)
+
+ # 4. snapshot is found in storage, targeted revision does not exist
+ snapshot1 = Snapshot(
+ id=hash_to_bytes("456666f535f882bc7f9a18fb16c9ad27fda7bab7"),
+ branches={
+ b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,),
+ b"HEAD": SnapshotBranch(
+ target=REVISION.id, target_type=TargetType.REVISION,
+ ),
+ },
+ )
+
+ swh_storage.snapshot_add([snapshot1])
+
+ with pytest.raises(InexistentObjectsError, match="Branch/Revision"):
+ check_snapshot(snapshot1, swh_storage)
+
+ swh_storage.revision_add([REVISION.to_dict()])
+ snapshot2 = Snapshot(
+ id=hash_to_bytes("789666f535f882bc7f9a18fb16c9ad27fda7bab7"),
+ branches={
+ b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,),
+ b"HEAD": SnapshotBranch(
+ target=REVISION.id, target_type=TargetType.REVISION,
+ ),
+ b"release/0.1.0": SnapshotBranch(
+ target=RELEASE.id, target_type=TargetType.RELEASE,
+ ),
+ },
+ )
+
+ swh_storage.snapshot_add([snapshot2])
+
+ with pytest.raises(InexistentObjectsError, match="Branch/Release"):
+ check_snapshot(snapshot2, swh_storage)

File Metadata

Mime Type
text/plain
Expires
Wed, Jul 2, 11:24 AM (1 w, 22 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217609

Event Timeline