diff --git a/swh/search/api/server.py b/swh/search/api/server.py index aa08b75..bead475 100644 --- a/swh/search/api/server.py +++ b/swh/search/api/server.py @@ -1,106 +1,107 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import os from typing import Any, Dict from swh.core import config from swh.core.api import RPCServerApp from swh.core.api import encode_data_server as encode_data from swh.core.api import error_handler from swh.search.metrics import timed from .. import get_search from ..exc import SearchException from ..interface import SearchInterface logger = logging.getLogger(__name__) def _get_search(): global search if not search: search = get_search(**app.config["search"]) return search app = RPCServerApp(__name__, backend_class=SearchInterface, backend_factory=_get_search) search = None @app.errorhandler(SearchException) def search_error_handler(exception): return error_handler(exception, encode_data, status_code=400) @app.errorhandler(Exception) def my_error_handler(exception): return error_handler(exception, encode_data) @app.route("/") @timed def index(): return "SWH Search API server" @app.before_first_request def initialized_index(): search = _get_search() - logger.info("Initializing indexes with configuration: ", search.origin_config) + if app.config["search"]["cls"] == "elasticsearch": + logger.info("Initializing indexes with configuration: ", search.origin_config) search.initialize() api_cfg = None def load_and_check_config(config_file: str) -> Dict[str, Any]: """Check the minimal configuration is set to run the api or raise an error explanation. Args: config_file: Path to the configuration file to load type: configuration type. For 'local' type, more checks are done. Raises: Error if the setup is not as expected Returns: configuration as a dict """ if not config_file: raise EnvironmentError("Configuration file must be defined") if not os.path.exists(config_file): raise FileNotFoundError("Configuration file %s does not exist" % (config_file,)) cfg = config.read(config_file) if "search" not in cfg: raise KeyError("Missing 'search' configuration") return cfg def make_app_from_configfile(): """Run the WSGI app from the webserver, loading the configuration from a configuration file. SWH_CONFIG_FILENAME environment variable defines the configuration path to load. """ global api_cfg if not api_cfg: config_file = os.environ.get("SWH_CONFIG_FILENAME") api_cfg = load_and_check_config(config_file) app.config.update(api_cfg) handler = logging.StreamHandler() app.logger.addHandler(handler) return app diff --git a/swh/search/tests/test_api_client.py b/swh/search/tests/test_api_client.py index 68f01e1..012d52f 100644 --- a/swh/search/tests/test_api_client.py +++ b/swh/search/tests/test_api_client.py @@ -1,67 +1,89 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import pytest from swh.core.api.tests.server_testing import ServerTestFixture from swh.search import get_search from swh.search.api.server import app from .test_elasticsearch import CommonElasticsearchSearchTest +from .test_in_memory import CommonInmemorySearchTest -class TestRemoteSearch( +class TestRemoteSearchElasticSearch( CommonElasticsearchSearchTest, ServerTestFixture, unittest.TestCase ): @pytest.fixture(autouse=True) def _instantiate_search(self, elasticsearch_host): self._elasticsearch_host = elasticsearch_host def setUp(self): self.config = { "search": { "cls": "elasticsearch", "args": { "hosts": [self._elasticsearch_host], "indexes": { "origin": { "index": "test", "read_alias": "test-read", "write_alias": "test-write", } }, }, } } self.app = app super().setUp() self.reset() self.search = get_search( "remote", url=self.url(), ) def reset(self): search = get_search( "elasticsearch", hosts=[self._elasticsearch_host], indexes={ "origin": { "index": "test", "read_alias": "test-read", "write_alias": "test-write", } }, ) search.deinitialize() search.initialize() @pytest.mark.skip( "Elasticsearch also returns close matches, so this test would fail" ) def test_origin_url_paging(self, count): pass + + +class TestRemoteSearchInMemory( + CommonInmemorySearchTest, ServerTestFixture, unittest.TestCase +): + def setUp(self): + self.config = { + "search": { + "cls": "memory", + } + } + self.app = app + super().setUp() + # self.reset() + self.search = get_search( + "remote", + url=self.url(), + ) + + def reset(self): + pass diff --git a/swh/search/tests/test_in_memory.py b/swh/search/tests/test_in_memory.py index fbadd0d..d859c38 100644 --- a/swh/search/tests/test_in_memory.py +++ b/swh/search/tests/test_in_memory.py @@ -1,72 +1,74 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import pytest from swh.indexer import codemeta from swh.search import get_search from swh.search.in_memory import _nested_get from .test_search import CommonSearchTest -class InmemorySearchTest(unittest.TestCase, CommonSearchTest): +class CommonInmemorySearchTest(CommonSearchTest): @pytest.fixture(autouse=True) def _instantiate_search(self): self.search = get_search("memory") + +class InmemorySearchTest(CommonInmemorySearchTest, unittest.TestCase): def setUp(self): self.reset() def reset(self): self.search.deinitialize() self.search.initialize() def test_nested_get_helper_function(): instrinsic_metadata = codemeta.expand( { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "keywords": ["bar", "baz"], "description": "foo bar 3", "programmingLanguage": "cpp", "license": "https://spdx.org/licenses/LGPL-2.0-only", } ) expected_expansion = [ { "http://schema.org/description": [{"@value": "foo bar 3"}], "http://schema.org/license": [ {"@id": "https://spdx.org/licenses/LGPL-2.0-only"} ], "http://schema.org/keywords": [{"@value": "bar"}, {"@value": "baz"}], "http://schema.org/programmingLanguage": [{"@value": "cpp"}], } ] assert instrinsic_metadata == expected_expansion assert _nested_get(instrinsic_metadata, ["http://schema.org/license", "@id"]) == [ "https://spdx.org/licenses/LGPL-2.0-only" ] new_field = [ {"name": [{"@value": {"first": "f1", "last": "l1"}}], "address": "XYZ"}, {"name": [{"@value": {"first": "f2", "last": "l2"}}], "address": "ABC"}, {"name": [{"@value": {"first": "f3"}}], "address": {}}, {"name": [{"@value": {"first": "f4"}}], "address": []}, ] assert _nested_get(new_field, ["name", "@value", "last"]) == ["l1", "l2", "", ""] assert _nested_get(new_field, ["name", "@value", "first"]) == [ "f1", "f2", "f3", "f4", ] assert _nested_get(new_field, ["address"]) == ["XYZ", "ABC", {}, []] # shouldn't allow fetching intermediate values assert _nested_get(new_field, ["name", "@value"]) == ["", "", "", ""]