diff --git a/swh/storage/algos/snapshot.py b/swh/storage/algos/snapshot.py --- a/swh/storage/algos/snapshot.py +++ b/swh/storage/algos/snapshot.py @@ -3,7 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Iterator, List, Optional, Tuple +from typing import Iterator, List, Optional, Tuple, cast from swh.model.hashutil import hash_to_hex from swh.model.model import ( @@ -11,6 +11,7 @@ OriginVisitStatus, Sha1Git, Snapshot, + SnapshotBranch, TargetType, ) from swh.storage.algos.origin import ( @@ -164,3 +165,56 @@ and branch.target == revision_id ): # snapshot found yield (visit, visit_status, snapshot) + + +def snapshot_resolve_alias( + storage: StorageInterface, snapshot_id: Sha1Git, alias_name: bytes +) -> Optional[Tuple[List[SnapshotBranch], Optional[SnapshotBranch]]]: + """ + Resolve snapshot branch alias to its real target. + + Args: + storage: Storage instance + snapshot_id: snapshot identifier + alias_name: name of the branch alias to resolve + + Returns: + A tuple whose first member is the list of followed branches until the alias + got resolved to a branch whose target type is not an alias, and second member + the real targeted branch. + If a dangling branch is encountered during the resolve process, second member of + the tuple will be None. + If the target type of the tuple second member is an alias, it means that + a cycle has been detected during the resolve process. + """ + snapshot = storage.snapshot_get_branches( + snapshot_id, branches_from=alias_name, branches_count=1 + ) + if snapshot is None: + return None + + if alias_name not in snapshot["branches"]: + return ([], None) + + branch_info = snapshot["branches"][alias_name] + branches = [branch_info] + + seen_aliases = {alias_name} + + while ( + branch_info is not None + and branch_info.target_type == TargetType.ALIAS + and branch_info.target not in seen_aliases + ): + alias_target = branch_info.target + snapshot = storage.snapshot_get_branches( + snapshot_id, branches_from=alias_target, branches_count=1 + ) + assert snapshot is not None + if alias_target not in snapshot["branches"]: + break + seen_aliases.add(alias_target) + branch_info = snapshot["branches"][alias_target] + branches.append(branch_info) + + return (cast(List[SnapshotBranch], branches[:-1]), branches[-1]) diff --git a/swh/storage/tests/algos/test_snapshot.py b/swh/storage/tests/algos/test_snapshot.py --- a/swh/storage/tests/algos/test_snapshot.py +++ b/swh/storage/tests/algos/test_snapshot.py @@ -6,13 +6,19 @@ from hypothesis import given import pytest -from swh.model.collections import ImmutableDict from swh.model.hypothesis_strategies import branch_names, branch_targets, snapshots -from swh.model.model import OriginVisit, OriginVisitStatus, Snapshot +from swh.model.model import ( + OriginVisit, + OriginVisitStatus, + Snapshot, + SnapshotBranch, + TargetType, +) from swh.storage.algos.snapshot import ( snapshot_get_all_branches, snapshot_get_latest, snapshot_id_get_from_revision, + snapshot_resolve_alias, visits_and_snapshots_get_from_revision, ) from swh.storage.utils import now @@ -37,9 +43,7 @@ @given(branch_name=branch_names(), branch_target=branch_targets(only_objects=True)) def test_snapshot_large(swh_storage, branch_name, branch_target): # noqa snapshot = Snapshot( - branches=ImmutableDict( - (b"%s%05d" % (branch_name, i), branch_target) for i in range(10000) - ), + branches={b"%s%05d" % (branch_name, i): branch_target for i in range(10000)}, ) swh_storage.snapshot_add([snapshot]) @@ -265,3 +269,129 @@ visits_and_snapshots_get_from_revision(swh_storage, origin.url, revision1.id) ) assert res == [(ov1, ovs1, complete_snapshot)] + + +def test_snapshot_resolve_aliases_unknown_snapshot(swh_storage): + assert snapshot_resolve_alias(swh_storage, b"foo", b"HEAD") is None + + +def test_snapshot_resolve_aliases_no_aliases(swh_storage): + snapshot = Snapshot(branches={}) + swh_storage.snapshot_add([snapshot]) + + assert snapshot_resolve_alias(swh_storage, snapshot.id, b"HEAD") == ([], None) + + +def test_snapshot_resolve_alias(swh_storage, sample_data): + rev_branch_name = b"revision_branch" + rel_branch_name = b"release_branch" + rev_alias1_name = b"rev_alias1" + rev_alias2_name = b"rev_alias2" + rev_alias3_name = b"rev_alias3" + rel_alias_name = b"rel_alias" + rev_branch_info = SnapshotBranch( + target=sample_data.revisions[0].id, target_type=TargetType.REVISION, + ) + rel_branch_info = SnapshotBranch( + target=sample_data.releases[0].id, target_type=TargetType.RELEASE, + ) + rev_alias1_branch_info = SnapshotBranch( + target=rev_branch_name, target_type=TargetType.ALIAS + ) + rev_alias2_branch_info = SnapshotBranch( + target=rev_alias1_name, target_type=TargetType.ALIAS + ) + + rev_alias3_branch_info = SnapshotBranch( + target=rev_alias2_name, target_type=TargetType.ALIAS + ) + rel_alias_branch_info = SnapshotBranch( + target=rel_branch_name, target_type=TargetType.ALIAS + ) + + snapshot = Snapshot( + branches={ + rev_branch_name: rev_branch_info, + rel_branch_name: rel_branch_info, + rev_alias1_name: rev_alias1_branch_info, + rev_alias2_name: rev_alias2_branch_info, + rev_alias3_name: rev_alias3_branch_info, + rel_alias_name: rel_alias_branch_info, + } + ) + swh_storage.snapshot_add([snapshot]) + + for alias_name, expected_branches in ( + (rev_alias1_name, ([rev_alias1_branch_info], rev_branch_info)), + ( + rev_alias2_name, + ([rev_alias2_branch_info, rev_alias1_branch_info], rev_branch_info), + ), + ( + rev_alias3_name, + ( + [ + rev_alias3_branch_info, + rev_alias2_branch_info, + rev_alias1_branch_info, + ], + rev_branch_info, + ), + ), + (rel_alias_name, ([rel_alias_branch_info], rel_branch_info)), + ): + branches = snapshot_resolve_alias(swh_storage, snapshot.id, alias_name) + assert branches == expected_branches + + +def test_snapshot_resolve_alias_dangling_branch(swh_storage): + dangling_branch_name = b"dangling_branch" + alias_name = b"rev_alias" + + alias_branch_info = SnapshotBranch( + target=dangling_branch_name, target_type=TargetType.ALIAS + ) + + snapshot = Snapshot( + branches={dangling_branch_name: None, alias_name: alias_branch_info,} + ) + swh_storage.snapshot_add([snapshot]) + + branches = snapshot_resolve_alias(swh_storage, snapshot.id, alias_name) + assert branches == ([alias_branch_info], None) + + +def test_snapshot_resolve_alias_cycle_found(swh_storage): + alias1_name = b"alias_1" + alias2_name = b"alias_2" + alias3_name = b"alias_3" + alias4_name = b"alias_4" + + alias1_branch_info = SnapshotBranch( + target=alias2_name, target_type=TargetType.ALIAS + ) + alias2_branch_info = SnapshotBranch( + target=alias3_name, target_type=TargetType.ALIAS + ) + alias3_branch_info = SnapshotBranch( + target=alias4_name, target_type=TargetType.ALIAS + ) + alias4_branch_info = SnapshotBranch( + target=alias2_name, target_type=TargetType.ALIAS + ) + + snapshot = Snapshot( + branches={ + alias1_name: alias1_branch_info, + alias2_name: alias2_branch_info, + alias3_name: alias3_branch_info, + alias4_name: alias4_branch_info, + } + ) + swh_storage.snapshot_add([snapshot]) + + branches = snapshot_resolve_alias(swh_storage, snapshot.id, alias1_name) + assert branches == ( + [alias1_branch_info, alias2_branch_info, alias3_branch_info], + alias4_branch_info, + )