Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9313115
D3478.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
15 KB
Subscribers
None
D3478.diff
View Options
diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py
--- a/swh/loader/package/nixguix/tests/test_nixguix.py
+++ b/swh/loader/package/nixguix/tests/test_nixguix.py
@@ -10,11 +10,12 @@
import pytest
from json.decoder import JSONDecodeError
-from typing import Dict, Optional, Tuple
+from swh.storage.interface import StorageInterface
+from typing import Any, Dict, Iterable, Optional, Tuple, Union
from unittest.mock import patch
-from swh.model.model import Snapshot
+from swh.model.model import Snapshot, TargetType
from swh.loader.package.archive.loader import ArchiveLoader
from swh.loader.package.nixguix.loader import (
NixGuixLoader,
@@ -29,13 +30,23 @@
from swh.loader.tests import (
assert_last_visit_matches,
get_stats,
- check_snapshot,
+ check_snapshot as check_snapshot_full,
)
sources_url = "https://nix-community.github.io/nixpkgs-swh/sources.json"
+def check_snapshot(
+ snapshot: Union[Dict[str, Any], Snapshot],
+ storage: StorageInterface,
+ allowed_empty: Iterable[Tuple[TargetType, bytes]] = [
+ (TargetType.REVISION, b"evaluation")
+ ],
+):
+ return check_snapshot_full(snapshot, storage, allowed_empty)
+
+
def test_retrieve_sources(swh_config, requests_mock_datadir):
j = retrieve_sources(sources_url)
assert "sources" in j.keys()
diff --git a/swh/loader/tests/__init__.py b/swh/loader/tests/__init__.py
--- a/swh/loader/tests/__init__.py
+++ b/swh/loader/tests/__init__.py
@@ -6,10 +6,11 @@
import os
import subprocess
+from collections import defaultdict
from pathlib import PosixPath
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
-from swh.model.model import OriginVisitStatus, Snapshot
+from swh.model.model import OriginVisitStatus, Snapshot, TargetType
from swh.model.hashutil import hash_to_bytes
from swh.storage.interface import StorageInterface
@@ -100,17 +101,34 @@
return {"target": encoded_target, "target_type": target_type}
+class InconsistentAliasBranchError(AssertionError):
+ """When an alias branch targets an inexistent branch."""
+
+ pass
+
+
+class InexistentObjectsError(AssertionError):
+ """When a targeted branch reference does not exist in the storage"""
+
+ pass
+
+
def check_snapshot(
- snapshot: Union[Dict[str, Any], Snapshot], storage: StorageInterface
+ snapshot: Union[Dict[str, Any], Snapshot],
+ storage: StorageInterface,
+ allowed_empty: Iterable[Tuple[TargetType, bytes]] = [],
):
- """Check for snapshot match.
-
- The hashes can be both in hex or bytes, the necessary conversion will happen prior
- to check.
+ """Check that:
+ - snapshot exists in the storage and match
+ - each object reference up to the revision/release targets exists
Args:
snapshot: full snapshot to check for existence and consistency
storage: storage to lookup information into
+ allowed_empty: Iterable of branch we allow to be empty (some edge case loaders
+ allows this case to happen, nixguix for example allows the branch evaluation"
+ to target the nixpkgs git commit reference, which may not yet be resolvable at
+ loading time)
Returns:
the snapshot stored in the storage for further test assertion if any is
@@ -132,17 +150,60 @@
else:
raise AssertionError(f"variable 'snapshot' must be a snapshot: {snapshot!r}")
- snap = storage.snapshot_get(expected_snapshot.id)
- if snap is None:
+ snapshot_dict = storage.snapshot_get(expected_snapshot.id)
+ if snapshot_dict is None:
raise AssertionError(f"Snapshot {expected_snapshot.id.hex()} is not found")
- assert snap["next_branch"] is None # we don't deal with large snapshot in tests
- snap.pop("next_branch")
- actual_snap = Snapshot.from_dict(snap)
-
- assert expected_snapshot == actual_snap
-
- return snap # for retro compat, returned the dict, remove when clients are migrated
+ snapshot_dict.pop("next_branch")
+ actual_snaphot = Snapshot.from_dict(snapshot_dict)
+ assert isinstance(actual_snaphot, Snapshot)
+
+ assert expected_snapshot == actual_snaphot
+
+ branches_by_target_type = defaultdict(list)
+ object_to_branch = {}
+ for branch, target in actual_snaphot.branches.items():
+ if (target.target_type, branch) in allowed_empty:
+ # safe for those elements to not be checked for existence
+ continue
+ branches_by_target_type[target.target_type].append(target.target)
+ object_to_branch[target.target] = branch
+
+ # check that alias references target something that exists, otherwise raise
+ aliases: List[bytes] = branches_by_target_type.get(TargetType.ALIAS, [])
+ for alias in aliases:
+ if alias not in actual_snaphot.branches:
+ raise InconsistentAliasBranchError(
+ f"Alias branch {alias.decode('utf-8')} "
+ f"should be in {list(actual_snaphot.branches)}"
+ )
+
+ revs = branches_by_target_type.get(TargetType.REVISION)
+ if revs:
+ revisions = list(storage.revision_get(revs))
+ not_found = [rev_id for rev_id, rev in zip(revs, revisions) if rev is None]
+ if not_found:
+ missing_objs = ", ".join(
+ str((object_to_branch[rev], rev.hex())) for rev in not_found
+ )
+ raise InexistentObjectsError(
+ f"Branch/Revision(s) {missing_objs} should exist in storage"
+ )
+
+ rels = branches_by_target_type.get(TargetType.RELEASE)
+ if rels:
+ releases = list(storage.release_get(rels))
+ not_found = [rel_id for rel_id, rel in zip(rels, releases) if rel is None]
+ if not_found:
+ missing_objs = ", ".join(
+ str((object_to_branch[rel], rel.hex())) for rel in not_found
+ )
+ raise InexistentObjectsError(
+ f"Branch/Release(s) {missing_objs} should exist in storage"
+ )
+
+ # for retro compat, returned the dict, remove when clients are migrated
+ return snapshot_dict
def get_stats(storage) -> Dict:
diff --git a/swh/loader/tests/test_init.py b/swh/loader/tests/test_init.py
--- a/swh/loader/tests/test_init.py
+++ b/swh/loader/tests/test_init.py
@@ -9,12 +9,23 @@
import os
import subprocess
+from swh.model.from_disk import DentryPerms
from swh.model.model import (
+ Content,
+ Directory,
+ DirectoryEntry,
+ ObjectType,
OriginVisit,
OriginVisitStatus,
+ Person,
+ Release,
+ Revision,
+ RevisionType,
Snapshot,
SnapshotBranch,
TargetType,
+ Timestamp,
+ TimestampWithTimezone,
)
from swh.model.hashutil import hash_to_bytes
@@ -23,6 +34,8 @@
encode_target,
check_snapshot,
prepare_repository_from_archive,
+ InconsistentAliasBranchError,
+ InexistentObjectsError,
)
@@ -47,6 +60,109 @@
)
+CONTENT = Content(
+ data=b"42\n",
+ length=3,
+ sha1=hash_to_bytes("34973274ccef6ab4dfaaf86599792fa9c3fe4689"),
+ sha1_git=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"),
+ sha256=hash_to_bytes(
+ "673650f936cb3b0a2f93ce09d81be10748b1b203c19e8176b4eefc1964a0cf3a"
+ ),
+ blake2s256=hash_to_bytes(
+ "d5fe1939576527e42cfd76a9455a2432fe7f56669564577dd93c4280e76d661d"
+ ),
+ status="visible",
+)
+
+
+DIRECTORY = Directory(
+ id=hash_to_bytes("34f335a750111ca0a8b64d8034faec9eedc396be"),
+ entries=tuple(
+ [
+ DirectoryEntry(
+ name=b"foo",
+ type="file",
+ target=CONTENT.sha1_git,
+ perms=DentryPerms.content,
+ )
+ ]
+ ),
+)
+
+
+REVISION = Revision(
+ id=hash_to_bytes("066b1b62dbfa033362092af468bf6cfabec230e7"),
+ message=b"hello",
+ author=Person(
+ name=b"Nicolas Dandrimont",
+ email=b"nicolas@example.com",
+ fullname=b"Nicolas Dandrimont <nicolas@example.com> ",
+ ),
+ date=TimestampWithTimezone(
+ timestamp=Timestamp(seconds=1234567890, microseconds=0),
+ offset=120,
+ negative_utc=False,
+ ),
+ committer=Person(
+ name=b"St\xc3fano Zacchiroli",
+ email=b"stefano@example.com",
+ fullname=b"St\xc3fano Zacchiroli <stefano@example.com>",
+ ),
+ committer_date=TimestampWithTimezone(
+ timestamp=Timestamp(seconds=1123456789, microseconds=0),
+ offset=0,
+ negative_utc=True,
+ ),
+ parents=(),
+ type=RevisionType.GIT,
+ directory=DIRECTORY.id,
+ metadata={
+ "checksums": {"sha1": "tarball-sha1", "sha256": "tarball-sha256",},
+ "signed-off-by": "some-dude",
+ },
+ extra_headers=(
+ (b"gpgsig", b"test123"),
+ (b"mergetag", b"foo\\bar"),
+ (b"mergetag", b"\x22\xaf\x89\x80\x01\x00"),
+ ),
+ synthetic=True,
+)
+
+
+RELEASE = Release(
+ id=hash_to_bytes("3e9050196aa288264f2a9d279d6abab8b158448b"),
+ name=b"v0.0.2",
+ author=Person(
+ name=b"tony", email=b"tony@ardumont.fr", fullname=b"tony <tony@ardumont.fr>",
+ ),
+ date=TimestampWithTimezone(
+ timestamp=Timestamp(seconds=1634336813, microseconds=0),
+ offset=0,
+ negative_utc=False,
+ ),
+ target=REVISION.id,
+ target_type=ObjectType.REVISION,
+ message=b"yet another synthetic release",
+ synthetic=True,
+)
+
+
+SNAPSHOT = Snapshot(
+ id=hash_to_bytes("2498dbf535f882bc7f9a18fb16c9ad27fda7bab7"),
+ branches={
+ b"release/0.1.0": SnapshotBranch(
+ target=RELEASE.id, target_type=TargetType.RELEASE,
+ ),
+ b"HEAD": SnapshotBranch(target=REVISION.id, target_type=TargetType.REVISION,),
+ b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,),
+ b"evaluation": SnapshotBranch( # branch dedicated to not exist in storage
+ target=hash_to_bytes("cc4e04c26672dd74e5fd0fecb78b435fb55368f7"),
+ target_type=TargetType.REVISION,
+ ),
+ },
+)
+
+
@pytest.fixture
def mock_storage(mocker):
mock_storage = mocker.patch("swh.loader.tests.origin_get_latest_visit_status")
@@ -197,27 +313,61 @@
def test_check_snapshot(swh_storage):
- """Check snapshot should not raise when everything is fine"""
- snapshot = Snapshot(
- id=hash_to_bytes("2498dbf535f882bc7f9a18fb16c9ad27fda7bab7"),
- branches={
- b"master": SnapshotBranch(
- target=hash_to_bytes(hash_hex), target_type=TargetType.REVISION,
- ),
- },
- )
+ """Everything should be fine when snapshot is found and the snapshot reference up to the
+ revision exist in the storage.
- s = swh_storage.snapshot_add([snapshot])
+ """
+ # Create a consistent snapshot arborescence tree in storage
+ found = False
+ for entry in DIRECTORY.entries:
+ if entry.target == CONTENT.sha1_git:
+ found = True
+ break
+ assert found is True
+
+ assert REVISION.directory == DIRECTORY.id
+ assert RELEASE.target == REVISION.id
+
+ for branch, target in SNAPSHOT.branches.items():
+ if branch == b"alias":
+ assert target.target in SNAPSHOT.branches
+ elif branch == b"evaluation":
+ # this one does not exist and we are safelisting its check below
+ continue
+ else:
+ assert target.target in [REVISION.id, RELEASE.id]
+
+ swh_storage.content_add([CONTENT.to_dict()])
+ swh_storage.directory_add([DIRECTORY.to_dict()])
+ swh_storage.revision_add([REVISION.to_dict()])
+ swh_storage.release_add([RELEASE.to_dict()])
+ s = swh_storage.snapshot_add([SNAPSHOT.to_dict()])
assert s == {
"snapshot:add": 1,
}
- for snap in [snapshot, snapshot.to_dict()]:
- check_snapshot(snap, swh_storage)
+ for snap in [SNAPSHOT, SNAPSHOT.to_dict()]:
+ # all should be fine!
+ check_snapshot(
+ snap, swh_storage, allowed_empty=[(TargetType.REVISION, b"evaluation")]
+ )
+
+
+def test_check_snapshot_failures(swh_storage):
+ """Failure scenarios:
+ 0. snapshot parameter is not a snapshot
+ 1. snapshot id is correct but branches mismatched
+ 2. snapshot id is not correct, it's not found in the storage
+ 3. snapshot reference an alias which does not exist
+ 4. snapshot is found in storage, targeted revision does not exist
+ 5. snapshot is found in storage, targeted release does not exist
-def test_check_snapshot_failure(swh_storage):
- """check_snapshot should raise if something goes wrong"""
+ The following are not dealt with yet:
+ 6. snapshot is found in storage, targeted directory does not exist
+ 7. snapshot is found in storage, targeted content does not exist
+
+ """
snap_id_hex = "2498dbf535f882bc7f9a18fb16c9ad27fda7bab7"
snapshot = Snapshot(
id=hash_to_bytes(snap_id_hex),
@@ -240,19 +390,66 @@
},
}
- # id is correct, the branch is wrong, that should raise nonetheless
+ # 0. not a Snapshot object, raise!
+ with pytest.raises(AssertionError, match="variable 'snapshot' must be a snapshot"):
+ check_snapshot(ORIGIN_VISIT, swh_storage)
+
+ # 1. snapshot id is correct but branches mismatched
for snap_id in [snap_id_hex, snapshot.id]:
with pytest.raises(AssertionError, match="Differing attributes"):
unexpected_snapshot["id"] = snap_id
check_snapshot(unexpected_snapshot, swh_storage)
- # snapshot id which does not exist
+ # 2. snapshot id is not correct, it's not found in the storage
wrong_snap_id_hex = "999666f535f882bc7f9a18fb16c9ad27fda7bab7"
for snap_id in [wrong_snap_id_hex, hash_to_bytes(wrong_snap_id_hex)]:
unexpected_snapshot["id"] = wrong_snap_id_hex
with pytest.raises(AssertionError, match="is not found"):
check_snapshot(unexpected_snapshot, swh_storage)
- # not a Snapshot object, raise!
- with pytest.raises(AssertionError, match="variable 'snapshot' must be a snapshot"):
- check_snapshot(ORIGIN_VISIT, swh_storage)
+ # 3. snapshot references an inexistent alias
+ snapshot0 = Snapshot(
+ id=hash_to_bytes("123666f535f882bc7f9a18fb16c9ad27fda7bab7"),
+ branches={
+ b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,),
+ },
+ )
+ swh_storage.snapshot_add([snapshot0])
+
+ with pytest.raises(InconsistentAliasBranchError, match="Alias branch HEAD"):
+ check_snapshot(snapshot0, swh_storage)
+
+ # 4. snapshot is found in storage, targeted revision does not exist
+ snapshot1 = Snapshot(
+ id=hash_to_bytes("456666f535f882bc7f9a18fb16c9ad27fda7bab7"),
+ branches={
+ b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,),
+ b"HEAD": SnapshotBranch(
+ target=REVISION.id, target_type=TargetType.REVISION,
+ ),
+ },
+ )
+
+ swh_storage.snapshot_add([snapshot1])
+
+ with pytest.raises(InexistentObjectsError, match="Branch/Revision"):
+ check_snapshot(snapshot1, swh_storage)
+
+ swh_storage.revision_add([REVISION.to_dict()])
+ snapshot2 = Snapshot(
+ id=hash_to_bytes("789666f535f882bc7f9a18fb16c9ad27fda7bab7"),
+ branches={
+ b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,),
+ b"HEAD": SnapshotBranch(
+ target=REVISION.id, target_type=TargetType.REVISION,
+ ),
+ b"release/0.1.0": SnapshotBranch(
+ target=RELEASE.id, target_type=TargetType.RELEASE,
+ ),
+ },
+ )
+
+ swh_storage.snapshot_add([snapshot2])
+
+ with pytest.raises(InexistentObjectsError, match="Branch/Release"):
+ check_snapshot(snapshot2, swh_storage)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Jul 2, 11:24 AM (1 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217609
Attached To
D3478: loader.core: Make check_snapshot ensure snapshot object references exist
Event Timeline
Log In to Comment