diff --git a/swh/graphql/backends/archive.py b/swh/graphql/backends/archive.py index 644c29c..cee5091 100644 --- a/swh/graphql/backends/archive.py +++ b/swh/graphql/backends/archive.py @@ -1,77 +1,79 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.graphql import server class Archive: def __init__(self): self.storage = server.get_storage() def get_origin(self, url): return self.storage.origin_get([url])[0] def get_origins(self, after=None, first=50, url_pattern=None): # STORAGE-TODO # Make them a single function in the backend if url_pattern is None: return self.storage.origin_list(page_token=after, limit=first) return self.storage.origin_search( url_pattern=url_pattern, page_token=after, limit=first ) def get_origin_visits(self, origin_url, after=None, first=50): return self.storage.origin_visit_get(origin_url, page_token=after, limit=first) def get_origin_visit(self, origin_url, visit_id): return self.storage.origin_visit_get_by(origin_url, visit_id) def get_origin_latest_visit(self, origin_url): return self.storage.origin_visit_get_latest(origin_url) def get_visit_status(self, origin_url, visit_id, after=None, first=50): return self.storage.origin_visit_status_get( origin_url, visit_id, page_token=after, limit=first ) def get_latest_visit_status(self, origin_url, visit_id): return self.storage.origin_visit_status_get_latest(origin_url, visit_id) def get_origin_snapshots(self, origin_url): return self.storage.origin_snapshot_get_all(origin_url) def is_snapshot_available(self, snapshot_ids): return not self.storage.snapshot_missing(snapshot_ids) - def get_snapshot_branches(self, snapshot, after, first, target_types, name_include): + def get_snapshot_branches( + self, snapshot, after=b"", first=50, target_types=[], name_include=None + ): return self.storage.snapshot_get_branches( snapshot, branches_from=after, branches_count=first, target_types=target_types, branch_name_include_substring=name_include, ) def get_revisions(self, revision_ids): return self.storage.revision_get(revision_ids=revision_ids) def get_revision_log(self, revision_ids, after=None, first=50): return self.storage.revision_log(revisions=revision_ids, limit=first) def get_releases(self, release_ids): return self.storage.release_get(releases=release_ids) def is_directory_available(self, directory_ids): return not self.storage.directory_missing(directory_ids) def get_directory_entries(self, directory_id, after=None, first=50): return self.storage.directory_get_entries( directory_id, limit=first, page_token=after ) def get_content(self, content_id): # FIXME, only for tests return self.storage.content_find({"sha1_git": content_id}) diff --git a/swh/graphql/resolvers/content.py b/swh/graphql/resolvers/content.py index 044464e..964508b 100644 --- a/swh/graphql/resolvers/content.py +++ b/swh/graphql/resolvers/content.py @@ -1,58 +1,59 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Union from swh.graphql.backends import archive from .base_node import BaseSWHNode from .directory_entry import DirectoryEntryNode from .release import BaseReleaseNode +from .snapshot_branch import SnapshotBranchNode class BaseContentNode(BaseSWHNode): """ Base resolver for all the content nodes """ def _get_content_by_id(self, content_id): content = archive.Archive().get_content(content_id) return content[0] if content else None @property def checksum(self): # FIXME, return a Node object return {k: v.hex() for (k, v) in self._node.hashes().items()} @property def id(self): return self._node.sha1_git def is_type_of(self): # is_type_of is required only when resolving a UNION type # This is for ariadne to return the right type return "Content" class ContentNode(BaseContentNode): """ Node resolver for a content requested directly with its SWHID """ def _get_node_data(self): return self._get_content_by_id(self.kwargs.get("swhid").object_id) class TargetContentNode(BaseContentNode): """ Node resolver for a content requested from a directory entry or from a release target """ - obj: Union[DirectoryEntryNode, BaseReleaseNode] + obj: Union[DirectoryEntryNode, BaseReleaseNode, SnapshotBranchNode] def _get_node_data(self): content_id = self.obj.targetHash return self._get_content_by_id(content_id) diff --git a/swh/graphql/resolvers/directory.py b/swh/graphql/resolvers/directory.py index 15d41ad..d6bc32a 100644 --- a/swh/graphql/resolvers/directory.py +++ b/swh/graphql/resolvers/directory.py @@ -1,62 +1,70 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from typing import Union + from swh.graphql.backends import archive from swh.model.model import Directory from .base_node import BaseSWHNode +from .release import BaseReleaseNode from .revision import BaseRevisionNode +from .snapshot_branch import SnapshotBranchNode class BaseDirectoryNode(BaseSWHNode): """ Base resolver for all the directory nodes """ def _get_directory_by_id(self, directory_id): # Return a Directory model object # entries is initialized as empty # Same pattern is used in snapshot return Directory(id=directory_id, entries=()) def is_type_of(self): return "Directory" class DirectoryNode(BaseDirectoryNode): """ Node resolver for a directory requested directly with its SWHID """ def _get_node_data(self): directory_id = self.kwargs.get("swhid").object_id # path = "" if archive.Archive().is_directory_available([directory_id]): # _get_directory_by_id is not making any backend call # hence the is_directory_available validation return self._get_directory_by_id(directory_id) return None class RevisionDirectoryNode(BaseDirectoryNode): """ Node resolver for a directory requested from a revision """ obj: BaseRevisionNode def _get_node_data(self): # self.obj.directorySWHID is the requested directory SWHID directory_id = self.obj.directorySWHID.object_id return self._get_directory_by_id(directory_id) class TargetDirectoryNode(BaseDirectoryNode): """ Node resolver for a directory requested as a target """ + from .directory_entry import DirectoryEntryNode + + obj: Union[SnapshotBranchNode, BaseReleaseNode, DirectoryEntryNode] + def _get_node_data(self): return self._get_directory_by_id(self.obj.targetHash) diff --git a/swh/graphql/resolvers/directory_entry.py b/swh/graphql/resolvers/directory_entry.py index c87c2de..5593fbd 100644 --- a/swh/graphql/resolvers/directory_entry.py +++ b/swh/graphql/resolvers/directory_entry.py @@ -1,40 +1,41 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.graphql.backends import archive from swh.graphql.utils import utils from .base_connection import BaseConnection from .base_node import BaseNode -from .directory import BaseDirectoryNode class DirectoryEntryNode(BaseNode): """ Node resolver for a directory entry """ @property def targetHash(self): # To support the schema naming convention return self._node.target class DirectoryEntryConnection(BaseConnection): """ Connection resolver for entries in a directory """ + from .directory import BaseDirectoryNode + obj: BaseDirectoryNode _node_class = DirectoryEntryNode def _get_paged_result(self): # FIXME, using dummy(local) pagination, move pagination to backend # To remove localpagination, just drop the paginated call # STORAGE-TODO entries = ( archive.Archive().get_directory_entries(self.obj.swhid.object_id).results ) return utils.paginated(entries, self._get_first_arg(), self._get_after_arg()) diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py index 286573e..388128f 100644 --- a/swh/graphql/resolvers/resolver_factory.py +++ b/swh/graphql/resolvers/resolver_factory.py @@ -1,65 +1,73 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from .content import ContentNode, TargetContentNode from .directory import DirectoryNode, RevisionDirectoryNode, TargetDirectoryNode from .directory_entry import DirectoryEntryConnection from .origin import OriginConnection, OriginNode from .release import ReleaseNode, TargetReleaseNode from .revision import ( LogRevisionConnection, ParentRevisionConnection, RevisionNode, TargetRevisionNode, ) -from .snapshot import OriginSnapshotConnection, SnapshotNode, VisitSnapshotNode +from .snapshot import ( + OriginSnapshotConnection, + SnapshotNode, + TargetSnapshotNode, + VisitSnapshotNode, +) from .snapshot_branch import SnapshotBranchConnection from .visit import LatestVisitNode, OriginVisitConnection, OriginVisitNode from .visit_status import LatestVisitStatusNode, VisitStatusConnection def get_node_resolver(resolver_type): # FIXME, replace with a proper factory method mapping = { "origin": OriginNode, "visit": OriginVisitNode, "latest-visit": LatestVisitNode, "latest-status": LatestVisitStatusNode, "visit-snapshot": VisitSnapshotNode, "snapshot": SnapshotNode, "branch-revision": TargetRevisionNode, "branch-release": TargetReleaseNode, + "branch-directory": TargetDirectoryNode, + "branch-content": TargetContentNode, + "branch-snapshot": TargetSnapshotNode, "revision": RevisionNode, "revision-directory": RevisionDirectoryNode, "release": ReleaseNode, "release-revision": TargetRevisionNode, "release-release": TargetReleaseNode, "release-directory": TargetDirectoryNode, "release-content": TargetContentNode, "directory": DirectoryNode, "content": ContentNode, "dir-entry-dir": TargetDirectoryNode, "dir-entry-file": TargetContentNode, } if resolver_type not in mapping: raise AttributeError(f"Invalid node type: {resolver_type}") return mapping[resolver_type] def get_connection_resolver(resolver_type): # FIXME, replace with a proper factory method mapping = { "origins": OriginConnection, "origin-visits": OriginVisitConnection, "origin-snapshots": OriginSnapshotConnection, "visit-status": VisitStatusConnection, "snapshot-branches": SnapshotBranchConnection, "revision-parents": ParentRevisionConnection, "revision-log": LogRevisionConnection, "directory-entries": DirectoryEntryConnection, } if resolver_type not in mapping: raise AttributeError(f"Invalid connection type: {resolver_type}") return mapping[resolver_type] diff --git a/swh/graphql/resolvers/snapshot.py b/swh/graphql/resolvers/snapshot.py index 1fe4d66..c39ac34 100644 --- a/swh/graphql/resolvers/snapshot.py +++ b/swh/graphql/resolvers/snapshot.py @@ -1,69 +1,90 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from typing import Union + from swh.graphql.backends import archive from swh.graphql.utils import utils from swh.model.model import Snapshot from .base_connection import BaseConnection from .base_node import BaseSWHNode from .origin import OriginNode from .visit_status import BaseVisitStatusNode class BaseSnapshotNode(BaseSWHNode): """ Base resolver for all the snapshot nodes """ def _get_snapshot_by_id(self, snapshot_id): # Return a Snapshot model object # branches is initialized as empty # Same pattern is used in directory return Snapshot(id=snapshot_id, branches={}) + def is_type_of(self): + # is_type_of is required only when resolving a UNION type + # This is for ariadne to return the right type + return "Snapshot" + class SnapshotNode(BaseSnapshotNode): """ Node resolver for a snapshot requested directly with its SWHID """ def _get_node_data(self): """ """ snapshot_id = self.kwargs.get("swhid").object_id if archive.Archive().is_snapshot_available([snapshot_id]): return self._get_snapshot_by_id(snapshot_id) return None class VisitSnapshotNode(BaseSnapshotNode): """ Node resolver for a snapshot requested from a visit-status """ obj: BaseVisitStatusNode def _get_node_data(self): # self.obj.snapshotSWHID is the requested snapshot SWHID snapshot_id = self.obj.snapshotSWHID.object_id return self._get_snapshot_by_id(snapshot_id) +class TargetSnapshotNode(BaseSnapshotNode): + """ + Node resolver for a snapshot requested as a target + """ + + from .snapshot_branch import SnapshotBranchNode + + obj: Union[BaseVisitStatusNode, SnapshotBranchNode] + + def _get_node_data(self): + snapshot_id = self.obj.targetHash + return self._get_snapshot_by_id(snapshot_id) + + class OriginSnapshotConnection(BaseConnection): """ Connection resolver for the snapshots in an origin """ obj: OriginNode _node_class = BaseSnapshotNode def _get_paged_result(self): results = archive.Archive().get_origin_snapshots(self.obj.url) snapshots = [Snapshot(id=snapshot, branches={}) for snapshot in results] # FIXME, using dummy(local) pagination, move pagination to backend # To remove localpagination, just drop the paginated call # STORAGE-TODO return utils.paginated(snapshots, self._get_first_arg(), self._get_after_arg()) diff --git a/swh/graphql/resolvers/snapshot_branch.py b/swh/graphql/resolvers/snapshot_branch.py index 78e989f..6f780b4 100644 --- a/swh/graphql/resolvers/snapshot_branch.py +++ b/swh/graphql/resolvers/snapshot_branch.py @@ -1,83 +1,85 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from collections import namedtuple from swh.graphql.backends import archive from swh.graphql.utils import utils from swh.storage.interface import PagedResult from .base_connection import BaseConnection from .base_node import BaseNode -from .snapshot import SnapshotNode class SnapshotBranchNode(BaseNode): """ Node resolver for a snapshot branch """ # target field for this Node is a UNION type # It is resolved in the top level (resolvers.resolvers.py) def _get_node_from_data(self, node_data): # node_data is not a dict in this case # overriding to support this special data structure # STORAGE-TODO; return an object in the normal format branch_name, branch_obj = node_data node = { "name": branch_name, "type": branch_obj.target_type.value, "target": branch_obj.target, } return namedtuple("NodeObj", node.keys())(*node.values()) @property def targetHash(self): # To support the schema naming convention return self._node.target class SnapshotBranchConnection(BaseConnection): """ Connection resolver for the branches in a snapshot """ + from .snapshot import SnapshotNode + obj: SnapshotNode _node_class = SnapshotBranchNode def _get_paged_result(self): # self.obj.swhid is the snapshot SWHID result = archive.Archive().get_snapshot_branches( self.obj.swhid.object_id, after=self._get_after_arg(), first=self._get_first_arg(), target_types=self.kwargs.get("types"), - name_include=self.kwargs.get("nameInclude"), + name_include=self._get_name_include_arg(), ) # FIXME Cursor must be a hex to be consistent with # the base class, hack to make that work end_cusrsor = ( - result["next_branch"].hex() if result["next_branch"] is not None else None + result["next_branch"] if result["next_branch"] is not None else None ) # FIXME, this pagination is not consistent with other connections # FIX in swh-storage to return PagedResult # STORAGE-TODO return PagedResult( results=result["branches"].items(), next_page_token=end_cusrsor ) def _get_after_arg(self): # Snapshot branch is using a different cursor; logic to handle that - - # FIXME Cursor must be a hex to be consistent with - # the base class, hack to make that work after = utils.get_decoded_cursor(self.kwargs.get("after", "")) - return bytes.fromhex(after) + return after.encode() if after else b"" + + def _get_name_include_arg(self): + name_include = self.kwargs.get("nameInclude", None) + return name_include.encode() if name_include else None def _get_index_cursor(self, index: int, node: SnapshotBranchNode): # Snapshot branch is using a different cursor, hence the override return utils.get_encoded_cursor(node.name.hex()) diff --git a/swh/graphql/tests/functional/test_branch_connection.py b/swh/graphql/tests/functional/test_branch_connection.py new file mode 100644 index 0000000..d3cc92b --- /dev/null +++ b/swh/graphql/tests/functional/test_branch_connection.py @@ -0,0 +1,204 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + +from .utils import get_query_response + + +def test_get(client): + query_str = """ + { + snapshot(swhid: "swh:1:snp:0e7f84ede9a254f2cd55649ad5240783f557e65f") { + branches(first:10) { + nodes { + type + target { + __typename + ...on Revision { + swhid + } + ...on Release { + swhid + } + ...on Content { + swhid + } + ...on Directory { + swhid + } + ...on Snapshot { + swhid + } + } + } + } + } + } + """ + data, errors = get_query_response(client, query_str) + # Alias type is not handled at the moment, hence the error + assert len(errors) == 1 + assert errors[0]["message"] == "Invalid node type: branch-alias" + assert len(data["snapshot"]["branches"]["nodes"]) == 5 + + +@pytest.mark.parametrize( + "filter_type, count, target_type, swhid_pattern", + [ + ("revision", 1, "Revision", "swh:1:rev"), + ("release", 1, "Release", "swh:1:rel"), + ("directory", 1, "Directory", "swh:1:dir"), + ("content", 0, "Content", "swh:1:cnt"), + ("snapshot", 1, "Snapshot", "swh:1:snp"), + ], +) +def test_get_type_filter(client, filter_type, count, target_type, swhid_pattern): + query_str = ( + """ + { + snapshot(swhid: "swh:1:snp:0e7f84ede9a254f2cd55649ad5240783f557e65f") { + branches(first:10, types: [%s]) { + nodes { + type + target { + __typename + ...on Revision { + swhid + } + ...on Release { + swhid + } + ...on Content { + swhid + } + ...on Directory { + swhid + } + ...on Snapshot { + swhid + } + } + } + } + } + } + """ + % filter_type + ) + data, _ = get_query_response(client, query_str) + + assert len(data["snapshot"]["branches"]["nodes"]) == count + for node in data["snapshot"]["branches"]["nodes"]: + assert node["target"]["__typename"] == target_type + assert node["target"]["swhid"].startswith(swhid_pattern) + + +@pytest.mark.parametrize( + "filter_types, count", + [ + ("revision, release", 2), + ("revision, snapshot, release", 3), + ], +) +def test_get_type_filter_multiple(client, filter_types, count): + query_str = ( + """ + { + snapshot(swhid: "swh:1:snp:0e7f84ede9a254f2cd55649ad5240783f557e65f") { + branches(first:10, types: [%s]) { + nodes { + type + } + } + } + }""" + % filter_types + ) + data, _ = get_query_response(client, query_str) + assert len(data["snapshot"]["branches"]["nodes"]) == count + + +@pytest.mark.parametrize("name", ["rel", "rev", "non-exist"]) +def test_get_name_include_filter(client, name): + query_str = ( + """ + { + snapshot(swhid: "swh:1:snp:0e7f84ede9a254f2cd55649ad5240783f557e65f") { + branches(first:10, nameInclude: "%s") { + nodes { + name { + text + } + } + } + } + }""" + % name + ) + data, _ = get_query_response(client, query_str) + for node in data["snapshot"]["branches"]["nodes"]: + assert name in node["name"]["text"] + + +@pytest.mark.parametrize("count", [1, 2]) +def test_get_first_arg(client, count): + query_str = ( + """ + { + snapshot(swhid: "swh:1:snp:0e7f84ede9a254f2cd55649ad5240783f557e65f") { + branches(first: %s) { + nodes { + type + } + } + } + }""" + % count + ) + data, _ = get_query_response(client, query_str) + assert len(data["snapshot"]["branches"]["nodes"]) == count + + +def test_get_after_arg(client): + query_str = """ + { + snapshot(swhid: "swh:1:snp:0e7f84ede9a254f2cd55649ad5240783f557e65f") { + branches(first: 1) { + pageInfo { + endCursor + } + nodes { + name { + text + } + } + } + } + }""" + first_data, _ = get_query_response(client, query_str) + end_cursor = first_data["snapshot"]["branches"]["pageInfo"]["endCursor"] + node_name = first_data["snapshot"]["branches"]["nodes"][0]["name"]["text"] + + query_str = ( + """ + { + snapshot(swhid: "swh:1:snp:0e7f84ede9a254f2cd55649ad5240783f557e65f") { + branches(first: 3, after: "%s") { + nodes { + type + name { + text + } + } + } + } + }""" + % end_cursor + ) + second_data, _ = get_query_response(client, query_str) + assert len(second_data["snapshot"]["branches"]["nodes"]) == 3 + for node in second_data["snapshot"]["branches"]["nodes"]: + assert node["name"]["text"] > node_name