diff --git a/swh/graphql/resolvers/content.py b/swh/graphql/resolvers/content.py --- a/swh/graphql/resolvers/content.py +++ b/swh/graphql/resolvers/content.py @@ -14,7 +14,7 @@ from .directory_entry import BaseDirectoryEntryNode from .release import BaseReleaseNode from .search import SearchResultNode -from .snapshot_branch import BaseSnapshotBranchNode +from .snapshot_branch import SnapshotBranchNode def read_and_validate_content_hashes(hashes) -> Dict[str, bytes]: @@ -98,7 +98,7 @@ SearchResultNode, BaseDirectoryEntryNode, BaseReleaseNode, - BaseSnapshotBranchNode, + SnapshotBranchNode, ] def _get_node_data(self) -> Optional[Content]: diff --git a/swh/graphql/resolvers/directory.py b/swh/graphql/resolvers/directory.py --- a/swh/graphql/resolvers/directory.py +++ b/swh/graphql/resolvers/directory.py @@ -12,7 +12,7 @@ from .release import BaseReleaseNode from .revision import BaseRevisionNode from .search import SearchResultNode -from .snapshot_branch import BaseSnapshotBranchNode +from .snapshot_branch import SnapshotBranchNode class BaseDirectoryNode(BaseSWHNode): @@ -69,7 +69,7 @@ _can_be_null = True obj: Union[ - BaseSnapshotBranchNode, + SnapshotBranchNode, BaseReleaseNode, BaseDirectoryEntryNode, SearchResultNode, diff --git a/swh/graphql/resolvers/release.py b/swh/graphql/resolvers/release.py --- a/swh/graphql/resolvers/release.py +++ b/swh/graphql/resolvers/release.py @@ -7,7 +7,7 @@ from .base_node import BaseSWHNode from .search import SearchResultNode -from .snapshot_branch import BaseSnapshotBranchNode +from .snapshot_branch import SnapshotBranchNode class BaseReleaseNode(BaseSWHNode): @@ -47,7 +47,7 @@ """ _can_be_null = True - obj: Union[BaseSnapshotBranchNode, BaseReleaseNode, SearchResultNode] + obj: Union[SnapshotBranchNode, BaseReleaseNode, SearchResultNode] def _get_node_data(self): # self.obj.target_hash is the requested release id diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py --- a/swh/graphql/resolvers/resolver_factory.py +++ b/swh/graphql/resolvers/resolver_factory.py @@ -33,7 +33,7 @@ TargetSnapshotNode, VisitSnapshotNode, ) -from .snapshot_branch import AliasSnapshotBranchNode, SnapshotBranchConnection +from .snapshot_branch import SnapshotBranchConnection from .visit import LatestVisitNode, OriginVisitConnection, OriginVisitNode from .visit_status import LatestVisitStatusNode, VisitStatusConnection @@ -46,7 +46,6 @@ "latest-status": LatestVisitStatusNode, "visit-snapshot": VisitSnapshotNode, "snapshot": SnapshotNode, - "branch-alias": AliasSnapshotBranchNode, "branch-revision": TargetRevisionNode, "branch-release": TargetReleaseNode, "branch-directory": TargetDirectoryNode, diff --git a/swh/graphql/resolvers/resolvers.py b/swh/graphql/resolvers/resolvers.py --- a/swh/graphql/resolvers/resolvers.py +++ b/swh/graphql/resolvers/resolvers.py @@ -97,18 +97,17 @@ @snapshot_branch.field("target") def snapshot_branch_target_resolver( - obj: rs.snapshot_branch.BaseSnapshotBranchNode, info: GraphQLResolveInfo, **kw + obj: rs.snapshot_branch.SnapshotBranchNode, info: GraphQLResolveInfo, **kw ) -> Union[ rs.revision.BaseRevisionNode, rs.release.BaseReleaseNode, rs.directory.BaseDirectoryNode, rs.content.BaseContentNode, rs.snapshot.BaseSnapshotNode, - rs.snapshot_branch.BaseSnapshotBranchNode, ]: """ Snapshot branch target can be a revision, release, directory, - content, snapshot or a branch itself (alias type) + content or a snapshot """ return NodeObjectFactory.create(f"branch-{obj.targetType}", obj, info, **kw) @@ -329,7 +328,6 @@ rs.directory.BaseDirectoryNode, rs.content.BaseContentNode, rs.snapshot.BaseSnapshotNode, - rs.snapshot_branch.BaseSnapshotBranchNode, ], *_, ) -> str: diff --git a/swh/graphql/resolvers/revision.py b/swh/graphql/resolvers/revision.py --- a/swh/graphql/resolvers/revision.py +++ b/swh/graphql/resolvers/revision.py @@ -15,7 +15,7 @@ from .directory_entry import BaseDirectoryEntryNode from .release import BaseReleaseNode from .search import SearchResultNode -from .snapshot_branch import BaseSnapshotBranchNode +from .snapshot_branch import SnapshotBranchNode class BaseRevisionNode(BaseSWHNode): @@ -67,7 +67,7 @@ _can_be_null = True obj: Union[ - BaseSnapshotBranchNode, + SnapshotBranchNode, BaseReleaseNode, BaseDirectoryEntryNode, SearchResultNode, diff --git a/swh/graphql/resolvers/snapshot.py b/swh/graphql/resolvers/snapshot.py --- a/swh/graphql/resolvers/snapshot.py +++ b/swh/graphql/resolvers/snapshot.py @@ -71,10 +71,10 @@ Node resolver for a snapshot requested as a target """ - from .snapshot_branch import BaseSnapshotBranchNode + from .snapshot_branch import SnapshotBranchNode _can_be_null = True - obj: Union[SearchResultNode, BaseSnapshotBranchNode] + obj: Union[SearchResultNode, SnapshotBranchNode] def _get_node_data(self): snapshot_id = self.obj.target_hash diff --git a/swh/graphql/resolvers/snapshot_branch.py b/swh/graphql/resolvers/snapshot_branch.py --- a/swh/graphql/resolvers/snapshot_branch.py +++ b/swh/graphql/resolvers/snapshot_branch.py @@ -3,79 +3,96 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from collections import namedtuple +from typing import List, Optional, Tuple -from swh.graphql.errors import ObjectNotFoundError +from swh.graphql.errors import DataError from swh.graphql.utils import utils -from swh.storage.interface import PagedResult +from swh.model.model import CoreSWHID, SnapshotBranch, TargetType +from swh.storage.interface import PagedResult, PartialBranches from .base_connection import BaseConnection from .base_node import BaseNode -class BaseSnapshotBranchNode(BaseNode): +class SnapshotBranchNode(BaseNode): + + obj: "SnapshotBranchConnection" # target field for this node is a UNION type # It is resolved in the top level (resolvers.resolvers.py) - def _get_node_from_data(self, node_data: tuple): - # node_data is a tuple as returned by _get_paged_result in - # SnapshotBranchConnection and _get_node_data in AliasSnapshotBranchNode - # overriding to support this special data structure + MAX_ALIAS_REDIRECTS = 5 + + def _resolve_branch_with_target( + self, branch_obj: Optional[SnapshotBranch] + ) -> Tuple[Optional[SnapshotBranch], List[bytes]]: + """ + Follow the alias chain and return the first branch with a non alias target + """ + resolve_chain: List[ + bytes + ] = [] # List to keep track of all the alias branch names + while branch_obj and branch_obj.target_type == TargetType.ALIAS: + target_branch: bytes = branch_obj.target # target branch name + resolve_chain.append(target_branch) + snapshot_swhid: CoreSWHID = self._get_snapshot_swhid() + # query for the new branch + alias_branches: Optional[ + PartialBranches + ] = self.archive.get_snapshot_branches( + snapshot=snapshot_swhid.object_id, first=1, name_include=target_branch + ) + if ( + not alias_branches + # The target branch is missing + or alias_branches["branches"].get(target_branch) is None + # Circular reference + or resolve_chain.count(target_branch) > 1 + # Too many re-directs + or len(resolve_chain) > self.MAX_ALIAS_REDIRECTS + ): + # chain broke without a target, this will still return the resolve_chain list + branch_obj = None + break + branch_obj = alias_branches["branches"][target_branch] + return branch_obj, resolve_chain + + def _get_node_from_data(self, node_data: Tuple[bytes, Optional[SnapshotBranch]]): + # node_data is received as a tuple from the archive + # (unlike an object or a dict in other cases) + # overriding this method to make a dict from this special data structure branch_name, branch_obj = node_data - node = { + branch_obj, resolve_chain = self._resolve_branch_with_target(branch_obj) + updated_node_data = { + # Name of the branch is kept to the original(first) branch "name": branch_name, - "type": branch_obj.target_type.value, - "target_hash": branch_obj.target, + # This will be None for every target other than alias + "resolve_chain": resolve_chain if resolve_chain else None, + "target_type": branch_obj.target_type.value if branch_obj else None, + "target_hash": branch_obj.target if branch_obj else None, } - return namedtuple("NodeObj", node.keys())(*node.values()) - - def is_type_of(self): - return "Branch" + return super()._get_node_from_data(updated_node_data) @property def targetType(self): # To support the schema naming convention - return self._node.type + return self._node.target_type - def snapshot_swhid(self): - """ - Logic to handle multiple branch alias redirects - Alias redirects can be any level deep. Hence the parent snapshot can be - reached only by a loop + @property + def resolveChain(self): + return self._node.resolve_chain - This code expects every BranchNode to have a Snapshot parent in the GraphQL query - at some level. - """ + def _get_snapshot_swhid(self) -> CoreSWHID: from .snapshot import BaseSnapshotNode - parent = self.obj - while parent: - if isinstance( - parent, BaseSnapshotNode - ): # Reached the nearest SnapshotNode object - return parent.swhid - parent = parent.obj - # Reached the root query node. This will never happen with the current entrypoints - raise ObjectNotFoundError("There is no snapshot associated with the branch") - - -class AliasSnapshotBranchNode(BaseSnapshotBranchNode): - - obj: BaseSnapshotBranchNode - - def _get_node_data(self): - snapshot_swhid = self.snapshot_swhid() - target_branch = self.obj.target_hash - - alias_branch = self.archive.get_snapshot_branches( - snapshot_swhid.object_id, first=1, name_include=target_branch - ) - if target_branch not in alias_branch["branches"]: - raise ObjectNotFoundError( - f"Branch name with {target_branch.decode()} is not available" + # As of now parent of SnapshotBranch will always be a SnapshotBranchConnection + # and self.obj.obj will always be a BaseSnapshot object + # override this method for SnapshotBranch nodes reachable by some other path + if not isinstance(self.obj.obj, BaseSnapshotNode): + # This is not expected to happen with the existing endpoints + raise DataError( + "SnapshotBranch is accessed outside a SnapshotBranchConnection context" ) - # this will be serialized in _get_node_from_data method in the base class - return (target_branch, alias_branch["branches"][target_branch]) + return self.obj.obj.swhid class SnapshotBranchConnection(BaseConnection): @@ -87,28 +104,29 @@ obj: BaseSnapshotNode - _node_class = BaseSnapshotBranchNode + _node_class = SnapshotBranchNode def _get_paged_result(self) -> PagedResult: - result = self.archive.get_snapshot_branches( - self.obj.swhid.object_id, + branches: Optional[PartialBranches] = self.archive.get_snapshot_branches( + snapshot=self.obj.swhid.object_id, after=self._get_after_arg(), first=self._get_first_arg(), target_types=self.kwargs.get("types"), name_include=self._get_name_include_arg(), name_exclude_prefix=self._get_name_exclude_prefix_arg(), ) - # endCursor is the last branch name, logic for that - end_cusrsor = ( - result["next_branch"] if result["next_branch"] is not None else None - ) + end_cursor: Optional[bytes] = branches.get("next_branch") if branches else None # FIXME, this pagination is not consistent with other connections # FIX in swh-storage to return PagedResult # STORAGE-TODO - # this will be serialized in _get_node_from_data method in the node class + # each result item will be converted to a dict in _get_node_from_data + # method in the node class + results: List[Tuple[bytes, Optional[SnapshotBranch]]] = ( + list(branches["branches"].items()) if branches else [] + ) return PagedResult( - results=result["branches"].items(), next_page_token=end_cusrsor + results=results, next_page_token=end_cursor.decode() if end_cursor else None ) def _get_after_arg(self): @@ -124,6 +142,6 @@ name_exclude_prefix = self.kwargs.get("nameExcludePrefix", None) return name_exclude_prefix.encode() if name_exclude_prefix else None - def _get_index_cursor(self, index: int, node: BaseSnapshotBranchNode): + def _get_index_cursor(self, index: int, node: SnapshotBranchNode): # Snapshot branch is using a different cursor, hence the override return utils.get_encoded_cursor(node.name) diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql --- a/swh/graphql/schema/schema.graphql +++ b/swh/graphql/schema/schema.graphql @@ -496,7 +496,7 @@ """ Possible branch target objects """ -union BranchTarget = Revision | Release | Branch | Content | Directory | Snapshot +union BranchTarget = Revision | Release | Content | Directory | Snapshot """ Possible Branch target types @@ -524,6 +524,11 @@ """ targetType: BranchTargetType + """ + The chain of alias branches until the target + """ + resolveChain: [BinaryString] + """ Branch target object """ diff --git a/swh/graphql/tests/data.py b/swh/graphql/tests/data.py --- a/swh/graphql/tests/data.py +++ b/swh/graphql/tests/data.py @@ -13,6 +13,9 @@ Release, Revision, RevisionType, + Snapshot, + SnapshotBranch, + TargetType, ) from swh.model.tests import swh_model_data @@ -172,7 +175,75 @@ ] +def get_snapshots_with_multiple_alias(): + return [ + Snapshot( + # This branch alias chain breaks without a target + branches={ + b"target/alias1": SnapshotBranch( + target_type=TargetType.ALIAS, target=b"target/alias2" + ), + b"target/alias2": SnapshotBranch( + target_type=TargetType.ALIAS, target=b"target/alias3" + ), + }, + ), + Snapshot( + # This branch alias chain resolves to a release after 2 levels + branches={ + b"target/alias1": SnapshotBranch( + target_type=TargetType.ALIAS, target=b"target/alias2" + ), + b"target/alias2": SnapshotBranch( + target_type=TargetType.ALIAS, target=b"target/release" + ), + b"target/release": SnapshotBranch( + target_type=TargetType.RELEASE, target=get_releases()[0].id + ), + }, + ), + Snapshot( + # This branch alias chain is going 6 levels deep + branches={ + b"target/alias1": SnapshotBranch( + target_type=TargetType.ALIAS, target=b"target/alias2" + ), + b"target/alias2": SnapshotBranch( + target_type=TargetType.ALIAS, target=b"target/alias3" + ), + b"target/alias3": SnapshotBranch( + target_type=TargetType.ALIAS, target=b"target/alias4" + ), + b"target/alias4": SnapshotBranch( + target_type=TargetType.ALIAS, target=b"target/alias5" + ), + b"target/alias5": SnapshotBranch( + target_type=TargetType.ALIAS, target=b"target/alias6" + ), + b"target/alias6": SnapshotBranch( + target_type=TargetType.ALIAS, target=b"target/release" + ), + b"target/release": SnapshotBranch( + target_type=TargetType.RELEASE, target=get_releases()[0].id + ), + }, + ), + Snapshot( + # This branch alias chain is recursive + branches={ + b"target/alias1": SnapshotBranch( + target_type=TargetType.ALIAS, target=b"target/alias2" + ), + b"target/alias2": SnapshotBranch( + target_type=TargetType.ALIAS, target=b"target/alias1" + ), + }, + ), + ] + + GRAPHQL_EXTRA_TEST_OBJECTS = { + "snapshot": get_snapshots_with_multiple_alias(), "release": get_releases_with_target(), "revision": get_revisions_with_parents() + get_revisions_with_none_date(), "directory": get_directories_with_nested_path() diff --git a/swh/graphql/tests/functional/test_branch_connection.py b/swh/graphql/tests/functional/test_branch_connection.py --- a/swh/graphql/tests/functional/test_branch_connection.py +++ b/swh/graphql/tests/functional/test_branch_connection.py @@ -6,6 +6,7 @@ import pytest from . import utils +from ..data import get_releases, get_snapshots_with_multiple_alias def get_branches(client, **kwargs) -> tuple: @@ -20,16 +21,14 @@ } nodes { targetType + resolveChain { + text + } name { text } target { __typename - ...on Branch { - name { - text - } - } ...on Revision { swhid } @@ -67,17 +66,105 @@ "swhid": "swh:1:rev:66c7c1cd9673275037140f2abff7b7b11fc9439c", }, "targetType": "revision", + "resolveChain": None, } -def test_get_branches_with_alias(client): +def test_get_branches_with_one_level_alias(client): swhid = "swh:1:snp:0e7f84ede9a254f2cd55649ad5240783f557e65f" data, _ = get_branches(client, swhid=swhid, first=10, types=["alias"]) node = data["snapshot"]["branches"]["nodes"][0] assert node == { - "name": {"text": "target/alias"}, - "target": {"__typename": "Branch", "name": {"text": "target/revision"}}, - "targetType": "alias", + "name": {"text": "target/alias"}, # original name + "target": { + "__typename": "Revision", + "swhid": "swh:1:rev:66c7c1cd9673275037140f2abff7b7b11fc9439c", + }, + "targetType": "revision", + "resolveChain": [{"text": "target/revision"}], + } + + +def test_get_branches_alias_without_a_target(client): + snapshot = get_snapshots_with_multiple_alias()[0] + data, errors = get_branches( + client, + swhid=str(snapshot.swhid()), + first=1, + types=["alias"], + nameInclude="alias1", + ) + node = data["snapshot"]["branches"]["nodes"][0] + assert node == { + "name": {"text": "target/alias1"}, # original name + "target": None, + "targetType": None, + "resolveChain": [{"text": "target/alias2"}, {"text": "target/alias3"}], + } + + +def test_get_branches_with_multiple_alias_redirects(client): + snapshot = get_snapshots_with_multiple_alias()[1] + data, errors = get_branches( + client, + swhid=str(snapshot.swhid()), + first=1, + types=["alias"], + nameInclude="alias1", + ) + node = data["snapshot"]["branches"]["nodes"][0] + assert node == { + "name": {"text": "target/alias1"}, # original name + "target": {"__typename": "Release", "swhid": str(get_releases()[0].swhid())}, + "targetType": "release", + "resolveChain": [{"text": "target/alias2"}, {"text": "target/release"}], + } + + +def test_get_branches_with_too_many_alias_redirects(client): + snapshot = get_snapshots_with_multiple_alias()[2] + data, errors = get_branches( + client, + swhid=str(snapshot.swhid()), + first=1, + types=["alias"], + nameInclude="alias1", + ) + node = data["snapshot"]["branches"]["nodes"][0] + assert node == { + "name": {"text": "target/alias1"}, # original name + "target": None, + "targetType": None, + "resolveChain": [ + {"text": "target/alias2"}, + {"text": "target/alias3"}, + {"text": "target/alias4"}, + {"text": "target/alias5"}, + {"text": "target/alias6"}, + {"text": "target/release"}, + ], + } + + +def test_get_branches_with_infinite_alias_redirect(client): + snapshot = get_snapshots_with_multiple_alias()[3] + data, errors = get_branches( + client, + swhid=str(snapshot.swhid()), + first=1, + types=["alias"], + nameInclude="alias1", + ) + node = data["snapshot"]["branches"]["nodes"][0] + assert node == { + "name": {"text": "target/alias1"}, # original name + "target": None, + "targetType": None, + "resolveChain": [ + {"text": "target/alias2"}, + {"text": "target/alias1"}, + {"text": "target/alias2"}, + ], } @@ -96,6 +183,7 @@ data, _ = get_branches(client, swhid=swhid, first=10, types=[filter_type]) assert len(data["snapshot"]["branches"]["nodes"]) == count for node in data["snapshot"]["branches"]["nodes"]: + assert node["resolveChain"] is None assert node["target"]["__typename"] == target_type assert node["target"]["swhid"].startswith(swhid_pattern)