diff --git a/swh/graphql/app.py b/swh/graphql/app.py --- a/swh/graphql/app.py +++ b/swh/graphql/app.py @@ -30,9 +30,11 @@ resolvers.release, resolvers.directory, resolvers.directory_entry, + resolvers.search_result, resolvers.branch_target, resolvers.release_target, resolvers.directory_entry_target, + resolvers.search_result_target, resolvers.binary_string, scalars.id_scalar, scalars.datetime_scalar, diff --git a/swh/graphql/backends/archive.py b/swh/graphql/backends/archive.py --- a/swh/graphql/backends/archive.py +++ b/swh/graphql/backends/archive.py @@ -4,6 +4,7 @@ # See top-level LICENSE file for more information from swh.graphql import server +from swh.model.swhids import ObjectType class Archive: @@ -43,9 +44,6 @@ def get_origin_snapshots(self, origin_url): return self.storage.origin_snapshot_get_all(origin_url) - def is_snapshot_available(self, snapshot_ids): - return not self.storage.snapshot_missing(snapshot_ids) - def get_snapshot_branches( self, snapshot, after=b"", first=50, target_types=[], name_include=None ): @@ -66,9 +64,6 @@ def get_releases(self, release_ids): return self.storage.release_get(releases=release_ids) - def is_directory_available(self, directory_ids): - return not self.storage.directory_missing(directory_ids) - def get_directory_entries(self, directory_id, after=None, first=50): return self.storage.directory_get_entries( directory_id, limit=first, page_token=after @@ -77,3 +72,13 @@ def get_content(self, content_id): # FIXME, only for tests return self.storage.content_find({"sha1_git": content_id}) + + def is_object_available(self, object_id: str, object_type: ObjectType) -> bool: + mapping = { + ObjectType.CONTENT: self.storage.content_missing_per_sha1_git, + ObjectType.DIRECTORY: self.storage.directory_missing, + ObjectType.RELEASE: self.storage.release_missing, + ObjectType.REVISION: self.storage.revision_missing, + ObjectType.SNAPSHOT: self.storage.snapshot_missing, + } + return not mapping[object_type]([object_id]) diff --git a/swh/graphql/resolvers/directory.py b/swh/graphql/resolvers/directory.py --- a/swh/graphql/resolvers/directory.py +++ b/swh/graphql/resolvers/directory.py @@ -7,6 +7,7 @@ from swh.graphql.backends import archive from swh.model.model import Directory +from swh.model.swhids import ObjectType from .base_node import BaseSWHNode from .release import BaseReleaseNode @@ -35,12 +36,17 @@ """ def _get_node_data(self): - directory_id = self.kwargs.get("swhid").object_id + swhid = self.kwargs.get("swhid") # path = "" - if archive.Archive().is_directory_available([directory_id]): + if ( + swhid.object_type == ObjectType.DIRECTORY + and archive.Archive().is_object_available( + swhid.object_id, swhid.object_type + ) + ): # _get_directory_by_id is not making any backend call # hence the is_directory_available validation - return self._get_directory_by_id(directory_id) + return self._get_directory_by_id(swhid.object_id) return None diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py --- a/swh/graphql/resolvers/resolver_factory.py +++ b/swh/graphql/resolvers/resolver_factory.py @@ -14,6 +14,7 @@ RevisionNode, TargetRevisionNode, ) +from .search import SearchSwhidConnection from .snapshot import ( OriginSnapshotConnection, SnapshotNode, @@ -50,6 +51,11 @@ "content": ContentNode, "dir-entry-dir": TargetDirectoryNode, "dir-entry-file": TargetContentNode, + "search-result-snapshot": TargetSnapshotNode, + "search-result-revision": TargetRevisionNode, + "search-result-release": TargetReleaseNode, + "search-result-directory": TargetDirectoryNode, + "search-result-content": TargetContentNode, } if resolver_type not in mapping: raise AttributeError(f"Invalid node type: {resolver_type}") @@ -67,6 +73,7 @@ "revision-parents": ParentRevisionConnection, "revision-log": LogRevisionConnection, "directory-entries": DirectoryEntryConnection, + "search-swhid": SearchSwhidConnection, } if resolver_type not in mapping: raise AttributeError(f"Invalid connection type: {resolver_type}") diff --git a/swh/graphql/resolvers/resolvers.py b/swh/graphql/resolvers/resolvers.py --- a/swh/graphql/resolvers/resolvers.py +++ b/swh/graphql/resolvers/resolvers.py @@ -34,11 +34,13 @@ release: ObjectType = ObjectType("Release") directory: ObjectType = ObjectType("Directory") directory_entry: ObjectType = ObjectType("DirectoryEntry") +search_result: ObjectType = ObjectType("SearchResult") binary_string: ObjectType = ObjectType("BinaryString") branch_target: UnionType = UnionType("BranchTarget") release_target: UnionType = UnionType("ReleaseTarget") directory_entry_target: UnionType = UnionType("DirectoryEntryTarget") +search_result_target: UnionType = UnionType("SearchResultTarget") # Node resolvers # A node resolver should return an instance of BaseNode @@ -172,6 +174,15 @@ return resolver(obj, info, **kw) +@search_result.field("target") +def search_result_target_resolver( + obj: rs.search.SearchResultNode, info: GraphQLResolveInfo, **kw +): + resolver_type = f"search-result-{obj.type}" + resolver = get_node_resolver(resolver_type) + return resolver(obj, info, **kw) + + # Connection resolvers # A connection resolver should return an instance of BaseConnection @@ -239,12 +250,21 @@ return resolver(obj, info, **kw) +@query.field("searchSwhid") +def search_swhid_resolver( + obj, info: GraphQLResolveInfo, **kw +) -> rs.search.SearchSwhidConnection: + resolver = get_connection_resolver("search-swhid") + return resolver(obj, info, **kw) + + # Any other type of resolver @release_target.type_resolver @directory_entry_target.type_resolver @branch_target.type_resolver +@search_result_target.type_resolver def union_resolver(obj, *_) -> str: """ Generic resolver for all the union types diff --git a/swh/graphql/resolvers/search.py b/swh/graphql/resolvers/search.py new file mode 100644 --- /dev/null +++ b/swh/graphql/resolvers/search.py @@ -0,0 +1,33 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.graphql.backends import archive +from swh.storage.interface import PagedResult + +from .base_connection import BaseConnection +from .base_node import BaseNode + + +class SearchResultNode(BaseNode): + """ """ + + +class SearchSwhidConnection(BaseConnection): + + _node_class = SearchResultNode + + def _get_paged_result(self) -> PagedResult: + swhid = self.kwargs.get("swhid") + results = [] + if archive.Archive().is_object_available(swhid.object_id, swhid.object_type): + results = [ + { + "target_hash": swhid.object_id, + "type": swhid.object_type.name.lower(), + # match will always be 100% for a SWHID lookup + "match": 100, + } + ] + return PagedResult(results=results) diff --git a/swh/graphql/resolvers/snapshot.py b/swh/graphql/resolvers/snapshot.py --- a/swh/graphql/resolvers/snapshot.py +++ b/swh/graphql/resolvers/snapshot.py @@ -8,6 +8,7 @@ from swh.graphql.backends import archive from swh.graphql.utils import utils from swh.model.model import Snapshot +from swh.model.swhids import ObjectType from swh.storage.interface import PagedResult from .base_connection import BaseConnection @@ -40,9 +41,14 @@ def _get_node_data(self): """ """ - snapshot_id = self.kwargs.get("swhid").object_id - if archive.Archive().is_snapshot_available([snapshot_id]): - return self._get_snapshot_by_id(snapshot_id) + swhid = self.kwargs.get("swhid") + if ( + swhid.object_type == ObjectType.SNAPSHOT + and archive.Archive().is_object_available( + swhid.object_id, swhid.object_type + ) + ): + return self._get_snapshot_by_id(swhid.object_id) return None diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql --- a/swh/graphql/schema/schema.graphql +++ b/swh/graphql/schema/schema.graphql @@ -805,6 +805,77 @@ status: String } +""" +Connection to SearchResults +""" +type SearchResultConnection { + """ + List of SearchResult edges + """ + edges: [SearchResultEdge] + + """ + List of SearchResult objects + """ + nodes: [SearchResult] + + """ + Information for pagination + """ + pageInfo: PageInfo! + + """ + Total number of result objects in the connection + """ + totalCount: Int +} + +""" +Edge in SearchResult connection +""" +type SearchResultEdge { + """ + Cursor to request the next page after the item + """ + cursor: String! + + """ + SearchResult object + """ + node: SearchResult +} + +union SearchResultTarget = Origin | Revision | Release | Content | Directory | Snapshot + +enum SearchResultTargetType { + origin + revision + release + content + directory + snapshot +} + +""" +A SearchResult object +""" +type SearchResult { + """ + Result target type + """ + type: SearchResultTargetType + + """ + Result target object + """ + target: SearchResultTarget + + """ + Result match percentage to the search term + """ + match: Int +} + """ The query root of the GraphQL interface. """ @@ -903,4 +974,14 @@ """ swhid: SWHID! ): Content + + """ + Resolve the given SWHID to an object + """ + searchSwhid( + """ + SWHID to look for + """ + swhid: SWHID! + ): SearchResultConnection! } diff --git a/swh/graphql/tests/data.py b/swh/graphql/tests/data.py --- a/swh/graphql/tests/data.py +++ b/swh/graphql/tests/data.py @@ -22,3 +22,15 @@ def get_releases(): return swh_model_data.RELEASES + + +def get_revisions(): + return swh_model_data.REVISIONS + + +def get_contents(): + return swh_model_data.CONTENTS + + +def get_directories(): + return swh_model_data.DIRECTORIES diff --git a/swh/graphql/tests/functional/test_swhid_search.py b/swh/graphql/tests/functional/test_swhid_search.py new file mode 100644 --- /dev/null +++ b/swh/graphql/tests/functional/test_swhid_search.py @@ -0,0 +1,190 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + +from . import utils +from ..data import get_directories, get_releases, get_revisions, get_snapshots + + +def test_invalid_swhid(client): + query_str = """ + { + searchSwhid(swhid: "swh:1:dir:dae0d245988b472abd30a4f968b919d0019b6c7") { + nodes { + type + } + } + } + """ + errors = utils.get_error_response(client, query_str) + # API will throw an error in case of an invalid SWHID + assert len(errors) == 1 + assert "Invalid SWHID: invalid syntax" in errors[0]["message"] + + +@pytest.mark.parametrize( + "swhid", + [ + "swh:1:rel:0949d7a8c96347dba09be8d79085b8207f345412", + "swh:1:rev:0949d7a8c96347dba09be8d79085b8207f345412", + "swh:1:dir:0949d7a8c96347dba09be8d79085b8207f345412", + "swh:1:cnt:0949d7a8c96347dba09be8d79085b8207f345412", + "swh:1:snp:0949d7a8c96347dba09be8d79085b8207f345412", + ], +) +def test_missing_swhid(client, swhid): + query_str = """ + { + searchSwhid(swhid: "%s") { + nodes { + type + } + } + } + """ + data, _ = utils.get_query_response(client, query_str % swhid) + # API will return an empty list in case of a valid, non existing SWHID + assert data == {"searchSwhid": {"nodes": []}} + + +@pytest.mark.parametrize("snapshot", get_snapshots()) +def test_snapshot_swhid_search(client, snapshot): + query_str = """ + { + searchSwhid(swhid: "%s") { + nodes { + type + match + target { + __typename + ... on Snapshot { + swhid + } + } + } + } + } + """ + data, _ = utils.get_query_response(client, query_str % snapshot.swhid()) + assert data == { + "searchSwhid": { + "nodes": [ + { + "match": 100, + "target": { + "__typename": "Snapshot", + "swhid": str(snapshot.swhid()), + }, + "type": "snapshot", + } + ] + } + } + + +@pytest.mark.parametrize("revision", get_revisions()) +def test_revision_swhid_search(client, revision): + query_str = """ + { + searchSwhid(swhid: "%s") { + nodes { + type + match + target { + __typename + ... on Revision { + swhid + } + } + } + } + } + """ + data, _ = utils.get_query_response(client, query_str % revision.swhid()) + assert data == { + "searchSwhid": { + "nodes": [ + { + "match": 100, + "target": { + "__typename": "Revision", + "swhid": str(revision.swhid()), + }, + "type": "revision", + } + ] + } + } + + +@pytest.mark.parametrize("release", get_releases()) +def test_release_swhid_search(client, release): + query_str = """ + { + searchSwhid(swhid: "%s") { + nodes { + type + match + target { + __typename + ... on Release { + swhid + } + } + } + } + } + """ + data, _ = utils.get_query_response(client, query_str % release.swhid()) + assert data == { + "searchSwhid": { + "nodes": [ + { + "match": 100, + "target": { + "__typename": "Release", + "swhid": str(release.swhid()), + }, + "type": "release", + } + ] + } + } + + +@pytest.mark.parametrize("directory", get_directories()) +def test_directory_swhid_search(client, directory): + query_str = """ + { + searchSwhid(swhid: "%s") { + nodes { + type + match + target { + __typename + ... on Directory { + swhid + } + } + } + } + } + """ + data, _ = utils.get_query_response(client, query_str % directory.swhid()) + assert data == { + "searchSwhid": { + "nodes": [ + { + "match": 100, + "target": { + "__typename": "Directory", + "swhid": str(directory.swhid()), + }, + "type": "directory", + } + ] + } + } diff --git a/swh/graphql/tests/unit/resolvers/test_resolver_factory.py b/swh/graphql/tests/unit/resolvers/test_resolver_factory.py --- a/swh/graphql/tests/unit/resolvers/test_resolver_factory.py +++ b/swh/graphql/tests/unit/resolvers/test_resolver_factory.py @@ -34,6 +34,11 @@ ("content", "ContentNode"), ("dir-entry-dir", "TargetDirectoryNode"), ("dir-entry-file", "TargetContentNode"), + ("search-result-snapshot", "TargetSnapshotNode"), + ("search-result-revision", "TargetRevisionNode"), + ("search-result-release", "TargetReleaseNode"), + ("search-result-directory", "TargetDirectoryNode"), + ("search-result-content", "TargetContentNode"), ], ) def test_get_node_resolver(self, input_type, expected): @@ -55,6 +60,7 @@ ("revision-parents", "ParentRevisionConnection"), ("revision-log", "LogRevisionConnection"), ("directory-entries", "DirectoryEntryConnection"), + ("search-swhid", "SearchSwhidConnection"), ], ) def test_get_connection_resolver(self, input_type, expected):