diff --git a/swh/storage/algos/snapshot.py b/swh/storage/algos/snapshot.py --- a/swh/storage/algos/snapshot.py +++ b/swh/storage/algos/snapshot.py @@ -3,7 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Iterator, List, Optional, Tuple +from typing import Dict, Iterator, List, Optional, Tuple from swh.model.hashutil import hash_to_hex from swh.model.model import ( @@ -11,6 +11,7 @@ OriginVisitStatus, Sha1Git, Snapshot, + SnapshotBranch, TargetType, ) from swh.storage.algos.origin import ( @@ -18,7 +19,7 @@ iter_origin_visits, origin_get_latest_visit_status, ) -from swh.storage.interface import ListOrder, StorageInterface +from swh.storage.interface import ListOrder, PartialBranches, StorageInterface def snapshot_get_all_branches( @@ -164,3 +165,89 @@ and branch.target == revision_id ): # snapshot found yield (visit, visit_status, snapshot) + + +def snapshot_resolve_aliases( + storage: StorageInterface, snapshot_id: Sha1Git, nb_aliases: Optional[int] = None +) -> Optional[PartialBranches]: + """ + Resolve snapshot branch aliases (possibly chained) to their real targets. + + Args: + storage: Storage instance + snapshot_id: snapshot identifier + nb_aliases: optional parameter to restrict the number of aliases to resolve + (total number of aliases will be considered otherwise) + + Returns: + A dict with the following keys: + * **id**: identifier of the snapshot + * **branches**: a dict of resolved aliased branches contained in the + snapshot whose keys are the alias names and values the real targets. + """ + aliases: Dict[bytes, Optional[SnapshotBranch]] = {} + if nb_aliases is None: + snapshot_sizes = storage.snapshot_count_branches(snapshot_id) + if snapshot_sizes is None: + return None + nb_aliases = snapshot_sizes.get("alias", 0) + assert nb_aliases is not None + if nb_aliases > 0: + # iterate on all snapshot branches (1000 are processed by iteration step) + # to resolve aliases but stop once all aliases have been processed + snapshot = storage.snapshot_get_branches(snapshot_id) + if snapshot is None: + return None + processed_aliases = set() + while snapshot is not None: + for branch_name, branch_info in snapshot["branches"].items(): + # to store possible chain of aliases + alias_branch_names = [] + while ( + branch_info is not None + and branch_info.target_type == TargetType.ALIAS + ): + alias_branch_names.append(branch_name) + processed_aliases.add(branch_name) + branch_name = branch_info.target + + # alias already resolved in a previous iteration + if branch_name in aliases: + for alias_branch_name in alias_branch_names: + aliases[alias_branch_name] = aliases[branch_name] + break + + if ( + snapshot["branches"] is not None + and branch_info.target in snapshot["branches"] + ): + # target is in the current fetched branches + branch_info = snapshot["branches"][branch_info.target] + else: + # target branch needs to be fetched + branches = storage.snapshot_get_branches( + snapshot_id, + branches_from=branch_info.target, + branches_count=1, + ) + assert branches is not None + branch_info = branches["branches"].get(branch_info.target) + # store current resolved aliases + for alias_branch_name in alias_branch_names: + aliases[alias_branch_name] = branch_info + + if len(processed_aliases) == nb_aliases: + break + + if len(processed_aliases) == nb_aliases: + break + + if snapshot["next_branch"] is not None: + # fetch next branches + snapshot = storage.snapshot_get_branches( + snapshot_id, branches_from=snapshot["next_branch"], + ) + else: + snapshot = None + + return PartialBranches(id=snapshot_id, branches=aliases, next_branch=None) diff --git a/swh/storage/tests/algos/test_snapshot.py b/swh/storage/tests/algos/test_snapshot.py --- a/swh/storage/tests/algos/test_snapshot.py +++ b/swh/storage/tests/algos/test_snapshot.py @@ -8,13 +8,21 @@ from swh.model.collections import ImmutableDict from swh.model.hypothesis_strategies import branch_names, branch_targets, snapshots -from swh.model.model import OriginVisit, OriginVisitStatus, Snapshot +from swh.model.model import ( + OriginVisit, + OriginVisitStatus, + Snapshot, + SnapshotBranch, + TargetType, +) from swh.storage.algos.snapshot import ( snapshot_get_all_branches, snapshot_get_latest, snapshot_id_get_from_revision, + snapshot_resolve_aliases, visits_and_snapshots_get_from_revision, ) +from swh.storage.interface import PartialBranches from swh.storage.utils import now @@ -265,3 +273,79 @@ visits_and_snapshots_get_from_revision(swh_storage, origin.url, revision1.id) ) assert res == [(ov1, ovs1, complete_snapshot)] + + +def test_snapshot_resolve_aliases_unknown_snapshot(swh_storage): + assert snapshot_resolve_aliases(swh_storage, b"foo") is None + + +def test_snapshot_resolve_aliases_no_aliases(swh_storage): + snapshot = Snapshot(branches=ImmutableDict()) + swh_storage.snapshot_add([snapshot]) + + assert snapshot_resolve_aliases(swh_storage, snapshot.id) == PartialBranches( + id=snapshot.id, branches={}, next_branch=None, + ) + + +def test_snapshot_resolve_aliases(swh_storage, sample_data): + rev_branch_name = b"revision_branch" + rel_branch_name = b"release_branch" + rev_alias1_name = b"rev_alias1" + rev_alias2_name = b"rev_alias2" + rev_alias3_name = b"rev_alias3" + rel_alias_name = b"rel_alias" + rev_branch_info = SnapshotBranch( + target=sample_data.revisions[0].id, target_type=TargetType.REVISION, + ) + rel_branch_info = SnapshotBranch( + target=sample_data.releases[0].id, target_type=TargetType.RELEASE, + ) + + snapshot = Snapshot( + branches=ImmutableDict( + ( + (rev_branch_name, rev_branch_info), + (rel_branch_name, rel_branch_info), + ( + rev_alias1_name, + SnapshotBranch( + target=rev_branch_name, target_type=TargetType.ALIAS + ), + ), + ( + rev_alias2_name, + SnapshotBranch( + target=rev_alias1_name, target_type=TargetType.ALIAS + ), + ), + ( + rev_alias3_name, + SnapshotBranch( + target=rev_alias2_name, target_type=TargetType.ALIAS + ), + ), + ( + rel_alias_name, + SnapshotBranch( + target=rel_branch_name, target_type=TargetType.ALIAS + ), + ), + ) + ) + ) + swh_storage.snapshot_add([snapshot]) + + expected_resolved_aliases = PartialBranches( + id=snapshot.id, + branches={ + rel_alias_name: rel_branch_info, + rev_alias1_name: rev_branch_info, + rev_alias2_name: rev_branch_info, + rev_alias3_name: rev_branch_info, + }, + next_branch=None, + ) + + resolved_aliases = snapshot_resolve_aliases(swh_storage, snapshot.id) + assert resolved_aliases == expected_resolved_aliases