diff --git a/config/dev.yml b/config/dev.yml --- a/config/dev.yml +++ b/config/dev.yml @@ -2,6 +2,10 @@ cls: remote url: http://moma.internal.softwareheritage.org:5002 +search: + cls: remote + url: http://moma.internal.softwareheritage.org:5010 + debug: yes server-type: asgi diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -2,3 +2,4 @@ swh.core[http] >= 0.3 # [http] is required by swh.core.pytest_plugin swh.storage swh.model +swh.search diff --git a/swh/graphql/backends/search.py b/swh/graphql/backends/search.py new file mode 100644 --- /dev/null +++ b/swh/graphql/backends/search.py @@ -0,0 +1,24 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.graphql import server + + +class Search: + def __init__(self): + self.search = server.get_search() + + def _is_query_using_advanced_language(self, query: str) -> bool: + return False + + def do_search(self, query: str, after=None, first=50): + return self.search.origin_search( + url_pattern=query, + page_token=after, + limit=first, + ) + + def search_using_query_language(self, query): + return query diff --git a/swh/graphql/resolvers/origin.py b/swh/graphql/resolvers/origin.py --- a/swh/graphql/resolvers/origin.py +++ b/swh/graphql/resolvers/origin.py @@ -4,13 +4,22 @@ # See top-level LICENSE file for more information from swh.graphql.backends import archive +from swh.model.model import Origin from swh.storage.interface import PagedResult from .base_connection import BaseConnection from .base_node import BaseSWHNode +from .search import SearchResultNode -class OriginNode(BaseSWHNode): +class BaseOriginNode(BaseSWHNode): + def is_type_of(self): + # is_type_of is required only when resolving a UNION type + # This is for ariadne to return the right type + return "Origin" + + +class OriginNode(BaseOriginNode): """ Node resolver for an origin requested directly with its URL """ @@ -19,12 +28,28 @@ return archive.Archive().get_origin(self.kwargs.get("url")) +class TargetOriginNode(BaseOriginNode): + """ + Node resolver for an origin requested as a target + """ + + obj: SearchResultNode + + def _get_node_data(self): + # URL is named as target_hash in SearchResultNode for consistency + + # The origin URL is gurenteed to exists in the archive + # So, returning the origin object without any explicit call to the archive + # This assumes that the search index and archive are always in sync + return Origin(self.obj.target_hash) + + class OriginConnection(BaseConnection): """ Connection resolver for the origins """ - _node_class = OriginNode + _node_class = BaseOriginNode def _get_paged_result(self) -> PagedResult: return archive.Archive().get_origins( diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py --- a/swh/graphql/resolvers/resolver_factory.py +++ b/swh/graphql/resolvers/resolver_factory.py @@ -6,7 +6,7 @@ from .content import ContentNode, HashContentNode, TargetContentNode from .directory import DirectoryNode, RevisionDirectoryNode, TargetDirectoryNode from .directory_entry import DirectoryEntryConnection -from .origin import OriginConnection, OriginNode +from .origin import OriginConnection, OriginNode, TargetOriginNode from .release import ReleaseNode, TargetReleaseNode from .revision import ( LogRevisionConnection, @@ -14,7 +14,7 @@ RevisionNode, TargetRevisionNode, ) -from .search import ResolveSwhidConnection +from .search import ResolveSwhidConnection, SearchConnection from .snapshot import ( OriginSnapshotConnection, SnapshotNode, @@ -52,6 +52,7 @@ "content-by-hash": HashContentNode, "dir-entry-dir": TargetDirectoryNode, "dir-entry-file": TargetContentNode, + "search-result-origin": TargetOriginNode, "search-result-snapshot": TargetSnapshotNode, "search-result-revision": TargetRevisionNode, "search-result-release": TargetReleaseNode, @@ -75,6 +76,7 @@ "revision-log": LogRevisionConnection, "directory-entries": DirectoryEntryConnection, "resolve-swhid": ResolveSwhidConnection, + "search": SearchConnection, } if resolver_type not in mapping: raise AttributeError(f"Invalid connection type: {resolver_type}") diff --git a/swh/graphql/resolvers/resolvers.py b/swh/graphql/resolvers/resolvers.py --- a/swh/graphql/resolvers/resolvers.py +++ b/swh/graphql/resolvers/resolvers.py @@ -266,6 +266,14 @@ return resolver(obj, info, **kw) +@query.field("search") +def search_resolver( + obj, info: GraphQLResolveInfo, **kw +) -> rs.search.ResolveSwhidConnection: + resolver = get_connection_resolver("search") + return resolver(obj, info, **kw) + + # Any other type of resolver diff --git a/swh/graphql/resolvers/search.py b/swh/graphql/resolvers/search.py --- a/swh/graphql/resolvers/search.py +++ b/swh/graphql/resolvers/search.py @@ -3,7 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from swh.graphql.backends import archive +from swh.graphql.backends import archive, search from swh.storage.interface import PagedResult from .base_connection import BaseConnection @@ -29,3 +29,22 @@ } ] return PagedResult(results=results) + + +class SearchConnection(BaseConnection): + + _node_class = SearchResultNode + + def _get_paged_result(self) -> PagedResult: + origins = search.Search().do_search( + query=self.kwargs.get("query"), + after=self._get_after_arg(), + first=self._get_first_arg(), + ) + # FIXME hard coding type to origin for now, as it is the + # only searchable object + # using the name 'target_hash' to be consistent with other objects + results = [ + {"target_hash": ori["url"], "type": "origin"} for ori in origins.results + ] + return PagedResult(results=results, next_page_token=origins.next_page_token) diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql --- a/swh/graphql/schema/schema.graphql +++ b/swh/graphql/schema/schema.graphql @@ -921,6 +921,10 @@ target: SearchResultTarget } +enum SearchType { + origin +} + """ The query root of the GraphQL interface. """ @@ -1040,4 +1044,24 @@ """ swhid: SWHID! ): SearchResultConnection! + + """ + Search in SWH + """ + search( + """ + String to search for + """ + query: String! + + """ + Returns the first _n_ elements from the list + """ + first: Int! + + """ + Returns the page after the cursor + """ + after: String + ): SearchResultConnection! } diff --git a/swh/graphql/server.py b/swh/graphql/server.py --- a/swh/graphql/server.py +++ b/swh/graphql/server.py @@ -7,19 +7,28 @@ from typing import Any, Dict, Optional from swh.core import config -from swh.storage import get_storage as get_swhstorage +from swh.search import get_search as get_swh_search +from swh.storage import get_storage as get_swh_storage graphql_cfg = None storage = None +search = None def get_storage(): global storage if not storage: - storage = get_swhstorage(**graphql_cfg["storage"]) + storage = get_swh_storage(**graphql_cfg["storage"]) return storage +def get_search(): + global search + if not search: + search = get_swh_search(**graphql_cfg["search"]) + return search + + def load_and_check_config(config_path: Optional[str]) -> Dict[str, Any]: """Check the minimal configuration is set to run the api or raise an error explanation.