Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F11023681
D8260.id29899.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
12 KB
Subscribers
None
D8260.id29899.diff
View Options
diff --git a/config/dev.yml b/config/dev.yml
--- a/config/dev.yml
+++ b/config/dev.yml
@@ -2,6 +2,10 @@
cls: remote
url: http://moma.internal.softwareheritage.org:5002
+search:
+ cls: remote
+ url: http://moma.internal.softwareheritage.org:5010
+
debug: yes
server-type: asgi
diff --git a/config/staging.yml b/config/staging.yml
--- a/config/staging.yml
+++ b/config/staging.yml
@@ -2,6 +2,10 @@
cls: remote
url: http://webapp.internal.staging.swh.network:5002
+search:
+ cls: remote
+ url: http://webapp.internal.staging.swh.network:5010
+
debug: yes
server-type: wsgi
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -2,3 +2,4 @@
swh.core[http] >= 0.3 # [http] is required by swh.core.pytest_plugin
swh.storage
swh.model
+swh.search
diff --git a/swh/graphql/backends/archive.py b/swh/graphql/backends/archive.py
--- a/swh/graphql/backends/archive.py
+++ b/swh/graphql/backends/archive.py
@@ -14,15 +14,8 @@
def get_origin(self, url):
return self.storage.origin_get([url])[0]
- def get_origins(self, after=None, first=50, url_pattern=None):
- # STORAGE-TODO
- # Make them a single function in the backend
- if url_pattern is None:
- return self.storage.origin_list(page_token=after, limit=first)
-
- return self.storage.origin_search(
- url_pattern=url_pattern, page_token=after, limit=first
- )
+ def get_origins(self, after=None, first=50):
+ return self.storage.origin_list(page_token=after, limit=first)
def get_origin_visits(self, origin_url, after=None, first=50):
return self.storage.origin_visit_get(origin_url, page_token=after, limit=first)
diff --git a/swh/graphql/backends/search.py b/swh/graphql/backends/search.py
new file mode 100644
--- /dev/null
+++ b/swh/graphql/backends/search.py
@@ -0,0 +1,18 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.graphql import server
+
+
+class Search:
+ def __init__(self):
+ self.search = server.get_search()
+
+ def get_origins(self, query: str, after=None, first=50):
+ return self.search.origin_search(
+ url_pattern=query,
+ page_token=after,
+ limit=first,
+ )
diff --git a/swh/graphql/resolvers/origin.py b/swh/graphql/resolvers/origin.py
--- a/swh/graphql/resolvers/origin.py
+++ b/swh/graphql/resolvers/origin.py
@@ -3,14 +3,23 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from swh.graphql.backends import archive
+from swh.graphql.backends import archive, search
+from swh.model.model import Origin
from swh.storage.interface import PagedResult
from .base_connection import BaseConnection
from .base_node import BaseSWHNode
+from .search import SearchResultNode
-class OriginNode(BaseSWHNode):
+class BaseOriginNode(BaseSWHNode):
+ def is_type_of(self):
+ # is_type_of is required only when resolving a UNION type
+ # This is for ariadne to return the right type
+ return "Origin"
+
+
+class OriginNode(BaseOriginNode):
"""
Node resolver for an origin requested directly with its URL
"""
@@ -19,16 +28,40 @@
return archive.Archive().get_origin(self.kwargs.get("url"))
+class TargetOriginNode(BaseOriginNode):
+ """
+ Node resolver for an origin requested as a target
+ """
+
+ obj: SearchResultNode
+
+ def _get_node_data(self):
+ # URL is named as target_hash in SearchResultNode for consistency
+
+ # The target origin URL is guaranteed to exist in the archive
+ # Hence returning the origin object without any explicit check in the archive
+ # This assumes that the search index and archive are in sync
+ return Origin(self.obj.target_hash)
+
+
class OriginConnection(BaseConnection):
"""
Connection resolver for the origins
"""
- _node_class = OriginNode
+ _node_class = BaseOriginNode
def _get_paged_result(self) -> PagedResult:
+ # Use the search backend if a urlPattern is given
+ if self.kwargs.get("urlPattern"):
+ origins = search.Search().get_origins(
+ query=self.kwargs.get("urlPattern"),
+ after=self._get_after_arg(),
+ first=self._get_first_arg(),
+ )
+ results = [Origin(ori["url"]) for ori in origins.results]
+ return PagedResult(results=results, next_page_token=origins.next_page_token)
+ # Use the archive backend by default
return archive.Archive().get_origins(
- after=self._get_after_arg(),
- first=self._get_first_arg(),
- url_pattern=self.kwargs.get("urlPattern"),
+ after=self._get_after_arg(), first=self._get_first_arg()
)
diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py
--- a/swh/graphql/resolvers/resolver_factory.py
+++ b/swh/graphql/resolvers/resolver_factory.py
@@ -6,7 +6,7 @@
from .content import ContentNode, HashContentNode, TargetContentNode
from .directory import DirectoryNode, RevisionDirectoryNode, TargetDirectoryNode
from .directory_entry import DirectoryEntryConnection
-from .origin import OriginConnection, OriginNode
+from .origin import OriginConnection, OriginNode, TargetOriginNode
from .release import ReleaseNode, TargetReleaseNode
from .revision import (
LogRevisionConnection,
@@ -14,7 +14,7 @@
RevisionNode,
TargetRevisionNode,
)
-from .search import ResolveSwhidConnection
+from .search import ResolveSwhidConnection, SearchConnection
from .snapshot import (
OriginSnapshotConnection,
SnapshotNode,
@@ -52,6 +52,7 @@
"content-by-hash": HashContentNode,
"dir-entry-dir": TargetDirectoryNode,
"dir-entry-file": TargetContentNode,
+ "search-result-origin": TargetOriginNode,
"search-result-snapshot": TargetSnapshotNode,
"search-result-revision": TargetRevisionNode,
"search-result-release": TargetReleaseNode,
@@ -75,6 +76,7 @@
"revision-log": LogRevisionConnection,
"directory-entries": DirectoryEntryConnection,
"resolve-swhid": ResolveSwhidConnection,
+ "search": SearchConnection,
}
if resolver_type not in mapping:
raise AttributeError(f"Invalid connection type: {resolver_type}")
diff --git a/swh/graphql/resolvers/resolvers.py b/swh/graphql/resolvers/resolvers.py
--- a/swh/graphql/resolvers/resolvers.py
+++ b/swh/graphql/resolvers/resolvers.py
@@ -266,6 +266,14 @@
return resolver(obj, info, **kw)
+@query.field("search")
+def search_resolver(
+ obj, info: GraphQLResolveInfo, **kw
+) -> rs.search.ResolveSwhidConnection:
+ resolver = get_connection_resolver("search")
+ return resolver(obj, info, **kw)
+
+
# Any other type of resolver
diff --git a/swh/graphql/resolvers/search.py b/swh/graphql/resolvers/search.py
--- a/swh/graphql/resolvers/search.py
+++ b/swh/graphql/resolvers/search.py
@@ -3,7 +3,7 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from swh.graphql.backends import archive
+from swh.graphql.backends import archive, search
from swh.storage.interface import PagedResult
from .base_connection import BaseConnection
@@ -29,3 +29,22 @@
}
]
return PagedResult(results=results)
+
+
+class SearchConnection(BaseConnection):
+
+ _node_class = SearchResultNode
+
+ def _get_paged_result(self) -> PagedResult:
+ origins = search.Search().get_origins(
+ query=self.kwargs.get("query"),
+ after=self._get_after_arg(),
+ first=self._get_first_arg(),
+ )
+
+ # FIXME hard coding type to origin for now, as it is the only searchable object
+ # using the name 'target_hash' for URL to be consistent with other objects
+ results = [
+ {"target_hash": ori["url"], "type": "origin"} for ori in origins.results
+ ]
+ return PagedResult(results=results, next_page_token=origins.next_page_token)
diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql
--- a/swh/graphql/schema/schema.graphql
+++ b/swh/graphql/schema/schema.graphql
@@ -921,6 +921,10 @@
target: SearchResultTarget
}
+enum SearchType {
+ origin
+}
+
"""
The query root of the GraphQL interface.
"""
@@ -1040,4 +1044,24 @@
"""
swhid: SWHID!
): SearchResultConnection!
+
+ """
+ Search in SWH
+ """
+ search(
+ """
+ String to search for
+ """
+ query: String!
+
+ """
+ Returns the first _n_ elements from the list
+ """
+ first: Int!
+
+ """
+ Returns the page after the cursor
+ """
+ after: String
+ ): SearchResultConnection!
}
diff --git a/swh/graphql/server.py b/swh/graphql/server.py
--- a/swh/graphql/server.py
+++ b/swh/graphql/server.py
@@ -7,19 +7,28 @@
from typing import Any, Dict, Optional
from swh.core import config
-from swh.storage import get_storage as get_swhstorage
+from swh.search import get_search as get_swh_search
+from swh.storage import get_storage as get_swh_storage
graphql_cfg = None
storage = None
+search = None
def get_storage():
global storage
if not storage:
- storage = get_swhstorage(**graphql_cfg["storage"])
+ storage = get_swh_storage(**graphql_cfg["storage"])
return storage
+def get_search():
+ global search
+ if not search:
+ search = get_swh_search(**graphql_cfg["search"])
+ return search
+
+
def load_and_check_config(config_path: Optional[str]) -> Dict[str, Any]:
"""Check the minimal configuration is set to run the api or raise an
error explanation.
diff --git a/swh/graphql/tests/conftest.py b/swh/graphql/tests/conftest.py
--- a/swh/graphql/tests/conftest.py
+++ b/swh/graphql/tests/conftest.py
@@ -10,14 +10,15 @@
from swh.graphql import server as app_server
from swh.graphql.app import schema
-from swh.storage import get_storage as get_swhstorage
+from swh.search import get_search as get_swh_search
+from swh.storage import get_storage as get_swh_storage
-from .data import populate_dummy_data
+from .data import populate_dummy_data, populate_search_data
@pytest.fixture
def storage():
- storage = get_swhstorage(cls="memory")
+ storage = get_swh_storage(cls="memory")
# set the global var to use the in-memory storage
app_server.storage = storage
# populate the in-memory storage
@@ -26,7 +27,18 @@
@pytest.fixture
-def test_app(storage):
+def search():
+ search = get_swh_search("memory")
+ # set the global var to use the in-memory search
+ app_server.search = search
+ search.initialize()
+ # populate the in-memory search
+ populate_search_data(search)
+ return search
+
+
+@pytest.fixture
+def test_app(storage, search):
app = Flask(__name__)
@app.route("/", methods=["POST"])
diff --git a/swh/graphql/tests/data.py b/swh/graphql/tests/data.py
--- a/swh/graphql/tests/data.py
+++ b/swh/graphql/tests/data.py
@@ -12,6 +12,10 @@
method(objects)
+def populate_search_data(search):
+ search.origin_update({"url": origin.url} for origin in get_origins())
+
+
def get_origins():
return swh_model_data.ORIGINS
diff --git a/swh/graphql/tests/functional/test_search.py b/swh/graphql/tests/functional/test_search.py
new file mode 100644
--- /dev/null
+++ b/swh/graphql/tests/functional/test_search.py
@@ -0,0 +1,48 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from . import utils
+
+
+def test_search_origins(client):
+ query_str = """
+ {
+ search(query: "fox", first: 1) {
+ nodes {
+ type
+ }
+ pageInfo {
+ hasNextPage
+ endCursor
+ }
+ }
+ }
+ """
+ data, _ = utils.get_query_response(client, query_str)
+ assert len(data["search"]["nodes"]) == 1
+ assert data == {
+ "search": {
+ "nodes": [{"type": "origin"}],
+ "pageInfo": {"endCursor": "MQ==", "hasNextPage": True},
+ }
+ }
+
+
+def test_search_missing_url(client):
+ query_str = """
+ {
+ search(query: "missing-fox", first: 1) {
+ nodes {
+ type
+ }
+ pageInfo {
+ hasNextPage
+ endCursor
+ }
+ }
+ }
+ """
+ data, _ = utils.get_query_response(client, query_str)
+ assert len(data["search"]["nodes"]) == 0
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Sep 17, 4:56 PM (21 h, 54 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226666
Attached To
D8260: Integrate with swh-search and add a search entrypoint
Event Timeline
Log In to Comment