Page MenuHomeSoftware Heritage

D8260.id29899.diff
No OneTemporary

D8260.id29899.diff

diff --git a/config/dev.yml b/config/dev.yml
--- a/config/dev.yml
+++ b/config/dev.yml
@@ -2,6 +2,10 @@
cls: remote
url: http://moma.internal.softwareheritage.org:5002
+search:
+ cls: remote
+ url: http://moma.internal.softwareheritage.org:5010
+
debug: yes
server-type: asgi
diff --git a/config/staging.yml b/config/staging.yml
--- a/config/staging.yml
+++ b/config/staging.yml
@@ -2,6 +2,10 @@
cls: remote
url: http://webapp.internal.staging.swh.network:5002
+search:
+ cls: remote
+ url: http://webapp.internal.staging.swh.network:5010
+
debug: yes
server-type: wsgi
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -2,3 +2,4 @@
swh.core[http] >= 0.3 # [http] is required by swh.core.pytest_plugin
swh.storage
swh.model
+swh.search
diff --git a/swh/graphql/backends/archive.py b/swh/graphql/backends/archive.py
--- a/swh/graphql/backends/archive.py
+++ b/swh/graphql/backends/archive.py
@@ -14,15 +14,8 @@
def get_origin(self, url):
return self.storage.origin_get([url])[0]
- def get_origins(self, after=None, first=50, url_pattern=None):
- # STORAGE-TODO
- # Make them a single function in the backend
- if url_pattern is None:
- return self.storage.origin_list(page_token=after, limit=first)
-
- return self.storage.origin_search(
- url_pattern=url_pattern, page_token=after, limit=first
- )
+ def get_origins(self, after=None, first=50):
+ return self.storage.origin_list(page_token=after, limit=first)
def get_origin_visits(self, origin_url, after=None, first=50):
return self.storage.origin_visit_get(origin_url, page_token=after, limit=first)
diff --git a/swh/graphql/backends/search.py b/swh/graphql/backends/search.py
new file mode 100644
--- /dev/null
+++ b/swh/graphql/backends/search.py
@@ -0,0 +1,18 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.graphql import server
+
+
+class Search:
+ def __init__(self):
+ self.search = server.get_search()
+
+ def get_origins(self, query: str, after=None, first=50):
+ return self.search.origin_search(
+ url_pattern=query,
+ page_token=after,
+ limit=first,
+ )
diff --git a/swh/graphql/resolvers/origin.py b/swh/graphql/resolvers/origin.py
--- a/swh/graphql/resolvers/origin.py
+++ b/swh/graphql/resolvers/origin.py
@@ -3,14 +3,23 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from swh.graphql.backends import archive
+from swh.graphql.backends import archive, search
+from swh.model.model import Origin
from swh.storage.interface import PagedResult
from .base_connection import BaseConnection
from .base_node import BaseSWHNode
+from .search import SearchResultNode
-class OriginNode(BaseSWHNode):
+class BaseOriginNode(BaseSWHNode):
+ def is_type_of(self):
+ # is_type_of is required only when resolving a UNION type
+ # This is for ariadne to return the right type
+ return "Origin"
+
+
+class OriginNode(BaseOriginNode):
"""
Node resolver for an origin requested directly with its URL
"""
@@ -19,16 +28,40 @@
return archive.Archive().get_origin(self.kwargs.get("url"))
+class TargetOriginNode(BaseOriginNode):
+ """
+ Node resolver for an origin requested as a target
+ """
+
+ obj: SearchResultNode
+
+ def _get_node_data(self):
+ # URL is named as target_hash in SearchResultNode for consistency
+
+ # The target origin URL is guaranteed to exist in the archive
+ # Hence returning the origin object without any explicit check in the archive
+ # This assumes that the search index and archive are in sync
+ return Origin(self.obj.target_hash)
+
+
class OriginConnection(BaseConnection):
"""
Connection resolver for the origins
"""
- _node_class = OriginNode
+ _node_class = BaseOriginNode
def _get_paged_result(self) -> PagedResult:
+ # Use the search backend if a urlPattern is given
+ if self.kwargs.get("urlPattern"):
+ origins = search.Search().get_origins(
+ query=self.kwargs.get("urlPattern"),
+ after=self._get_after_arg(),
+ first=self._get_first_arg(),
+ )
+ results = [Origin(ori["url"]) for ori in origins.results]
+ return PagedResult(results=results, next_page_token=origins.next_page_token)
+ # Use the archive backend by default
return archive.Archive().get_origins(
- after=self._get_after_arg(),
- first=self._get_first_arg(),
- url_pattern=self.kwargs.get("urlPattern"),
+ after=self._get_after_arg(), first=self._get_first_arg()
)
diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py
--- a/swh/graphql/resolvers/resolver_factory.py
+++ b/swh/graphql/resolvers/resolver_factory.py
@@ -6,7 +6,7 @@
from .content import ContentNode, HashContentNode, TargetContentNode
from .directory import DirectoryNode, RevisionDirectoryNode, TargetDirectoryNode
from .directory_entry import DirectoryEntryConnection
-from .origin import OriginConnection, OriginNode
+from .origin import OriginConnection, OriginNode, TargetOriginNode
from .release import ReleaseNode, TargetReleaseNode
from .revision import (
LogRevisionConnection,
@@ -14,7 +14,7 @@
RevisionNode,
TargetRevisionNode,
)
-from .search import ResolveSwhidConnection
+from .search import ResolveSwhidConnection, SearchConnection
from .snapshot import (
OriginSnapshotConnection,
SnapshotNode,
@@ -52,6 +52,7 @@
"content-by-hash": HashContentNode,
"dir-entry-dir": TargetDirectoryNode,
"dir-entry-file": TargetContentNode,
+ "search-result-origin": TargetOriginNode,
"search-result-snapshot": TargetSnapshotNode,
"search-result-revision": TargetRevisionNode,
"search-result-release": TargetReleaseNode,
@@ -75,6 +76,7 @@
"revision-log": LogRevisionConnection,
"directory-entries": DirectoryEntryConnection,
"resolve-swhid": ResolveSwhidConnection,
+ "search": SearchConnection,
}
if resolver_type not in mapping:
raise AttributeError(f"Invalid connection type: {resolver_type}")
diff --git a/swh/graphql/resolvers/resolvers.py b/swh/graphql/resolvers/resolvers.py
--- a/swh/graphql/resolvers/resolvers.py
+++ b/swh/graphql/resolvers/resolvers.py
@@ -266,6 +266,14 @@
return resolver(obj, info, **kw)
+@query.field("search")
+def search_resolver(
+ obj, info: GraphQLResolveInfo, **kw
+) -> rs.search.ResolveSwhidConnection:
+ resolver = get_connection_resolver("search")
+ return resolver(obj, info, **kw)
+
+
# Any other type of resolver
diff --git a/swh/graphql/resolvers/search.py b/swh/graphql/resolvers/search.py
--- a/swh/graphql/resolvers/search.py
+++ b/swh/graphql/resolvers/search.py
@@ -3,7 +3,7 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from swh.graphql.backends import archive
+from swh.graphql.backends import archive, search
from swh.storage.interface import PagedResult
from .base_connection import BaseConnection
@@ -29,3 +29,22 @@
}
]
return PagedResult(results=results)
+
+
+class SearchConnection(BaseConnection):
+
+ _node_class = SearchResultNode
+
+ def _get_paged_result(self) -> PagedResult:
+ origins = search.Search().get_origins(
+ query=self.kwargs.get("query"),
+ after=self._get_after_arg(),
+ first=self._get_first_arg(),
+ )
+
+ # FIXME hard coding type to origin for now, as it is the only searchable object
+ # using the name 'target_hash' for URL to be consistent with other objects
+ results = [
+ {"target_hash": ori["url"], "type": "origin"} for ori in origins.results
+ ]
+ return PagedResult(results=results, next_page_token=origins.next_page_token)
diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql
--- a/swh/graphql/schema/schema.graphql
+++ b/swh/graphql/schema/schema.graphql
@@ -921,6 +921,10 @@
target: SearchResultTarget
}
+enum SearchType {
+ origin
+}
+
"""
The query root of the GraphQL interface.
"""
@@ -1040,4 +1044,24 @@
"""
swhid: SWHID!
): SearchResultConnection!
+
+ """
+ Search in SWH
+ """
+ search(
+ """
+ String to search for
+ """
+ query: String!
+
+ """
+ Returns the first _n_ elements from the list
+ """
+ first: Int!
+
+ """
+ Returns the page after the cursor
+ """
+ after: String
+ ): SearchResultConnection!
}
diff --git a/swh/graphql/server.py b/swh/graphql/server.py
--- a/swh/graphql/server.py
+++ b/swh/graphql/server.py
@@ -7,19 +7,28 @@
from typing import Any, Dict, Optional
from swh.core import config
-from swh.storage import get_storage as get_swhstorage
+from swh.search import get_search as get_swh_search
+from swh.storage import get_storage as get_swh_storage
graphql_cfg = None
storage = None
+search = None
def get_storage():
global storage
if not storage:
- storage = get_swhstorage(**graphql_cfg["storage"])
+ storage = get_swh_storage(**graphql_cfg["storage"])
return storage
+def get_search():
+ global search
+ if not search:
+ search = get_swh_search(**graphql_cfg["search"])
+ return search
+
+
def load_and_check_config(config_path: Optional[str]) -> Dict[str, Any]:
"""Check the minimal configuration is set to run the api or raise an
error explanation.
diff --git a/swh/graphql/tests/conftest.py b/swh/graphql/tests/conftest.py
--- a/swh/graphql/tests/conftest.py
+++ b/swh/graphql/tests/conftest.py
@@ -10,14 +10,15 @@
from swh.graphql import server as app_server
from swh.graphql.app import schema
-from swh.storage import get_storage as get_swhstorage
+from swh.search import get_search as get_swh_search
+from swh.storage import get_storage as get_swh_storage
-from .data import populate_dummy_data
+from .data import populate_dummy_data, populate_search_data
@pytest.fixture
def storage():
- storage = get_swhstorage(cls="memory")
+ storage = get_swh_storage(cls="memory")
# set the global var to use the in-memory storage
app_server.storage = storage
# populate the in-memory storage
@@ -26,7 +27,18 @@
@pytest.fixture
-def test_app(storage):
+def search():
+ search = get_swh_search("memory")
+ # set the global var to use the in-memory search
+ app_server.search = search
+ search.initialize()
+ # populate the in-memory search
+ populate_search_data(search)
+ return search
+
+
+@pytest.fixture
+def test_app(storage, search):
app = Flask(__name__)
@app.route("/", methods=["POST"])
diff --git a/swh/graphql/tests/data.py b/swh/graphql/tests/data.py
--- a/swh/graphql/tests/data.py
+++ b/swh/graphql/tests/data.py
@@ -12,6 +12,10 @@
method(objects)
+def populate_search_data(search):
+ search.origin_update({"url": origin.url} for origin in get_origins())
+
+
def get_origins():
return swh_model_data.ORIGINS
diff --git a/swh/graphql/tests/functional/test_search.py b/swh/graphql/tests/functional/test_search.py
new file mode 100644
--- /dev/null
+++ b/swh/graphql/tests/functional/test_search.py
@@ -0,0 +1,48 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from . import utils
+
+
+def test_search_origins(client):
+ query_str = """
+ {
+ search(query: "fox", first: 1) {
+ nodes {
+ type
+ }
+ pageInfo {
+ hasNextPage
+ endCursor
+ }
+ }
+ }
+ """
+ data, _ = utils.get_query_response(client, query_str)
+ assert len(data["search"]["nodes"]) == 1
+ assert data == {
+ "search": {
+ "nodes": [{"type": "origin"}],
+ "pageInfo": {"endCursor": "MQ==", "hasNextPage": True},
+ }
+ }
+
+
+def test_search_missing_url(client):
+ query_str = """
+ {
+ search(query: "missing-fox", first: 1) {
+ nodes {
+ type
+ }
+ pageInfo {
+ hasNextPage
+ endCursor
+ }
+ }
+ }
+ """
+ data, _ = utils.get_query_response(client, query_str)
+ assert len(data["search"]["nodes"]) == 0

File Metadata

Mime Type
text/plain
Expires
Wed, Sep 17, 4:56 PM (21 h, 54 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226666

Event Timeline