diff --git a/requirements-test.txt b/requirements-test.txt index 90fac90..2576dc1 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,2 +1,3 @@ pytest +pytest-mock confluent-kafka diff --git a/swh/search/__init__.py b/swh/search/__init__.py index d66b553..f4cb97c 100644 --- a/swh/search/__init__.py +++ b/swh/search/__init__.py @@ -1,32 +1,51 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import importlib +import warnings -def get_search(cls, args): - """Get an search object of class `search_class` with - arguments `search_args`. +from typing import Any, Dict + + +SEARCH_IMPLEMENTATIONS = { + "elasticsearch": ".elasticsearch.ElasticSearch", + "remote": ".api.client.RemoteSearch", + "memory": ".in_memory.InMemorySearch", +} + + +def get_search(cls: str, **kwargs: Dict[str, Any]): + """Get an search object of class `cls` with arguments `args`. Args: - cls (str): search's class, either 'local' or 'remote' - args (dict): dictionary of arguments passed to the + cls: search's class, either 'local' or 'remote' + args: dictionary of arguments passed to the search class constructor Returns: an instance of swh.search's classes (either local or remote) Raises: ValueError if passed an unknown search class. """ - if cls == "remote": - from .api.client import RemoteSearch as Search - elif cls == "elasticsearch": - from .elasticsearch import ElasticSearch as Search - elif cls == "memory": - from .in_memory import InMemorySearch as Search - else: - raise ValueError("Unknown indexer search class `%s`" % cls) - - return Search(**args) + if "args" in kwargs: + warnings.warn( + 'Explicit "args" key is deprecated, use keys directly instead.', + DeprecationWarning, + ) + kwargs = kwargs["args"] + + class_path = SEARCH_IMPLEMENTATIONS.get(cls) + if class_path is None: + raise ValueError( + "Unknown search class `%s`. Supported: %s" + % (cls, ", ".join(SEARCH_IMPLEMENTATIONS)) + ) + + (module_path, class_name) = class_path.rsplit(".", 1) + module = importlib.import_module(module_path, package=__package__) + Search = getattr(module, class_name) + return Search(**kwargs) diff --git a/swh/search/tests/conftest.py b/swh/search/tests/conftest.py index 1859fc2..5c9f5b9 100644 --- a/swh/search/tests/conftest.py +++ b/swh/search/tests/conftest.py @@ -1,130 +1,130 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import socket import subprocess import time import elasticsearch import pytest from swh.search import get_search logger = logging.getLogger(__name__) def free_port(): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(("127.0.0.1", 0)) port = sock.getsockname()[1] sock.close() return port def wait_for_peer(addr, port): while True: try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((addr, port)) except ConnectionRefusedError: time.sleep(0.1) else: sock.close() break CONFIG_TEMPLATE = """ node.name: node-1 path.data: {data} path.logs: {logs} network.host: 127.0.0.1 http.port: {http_port} transport.port: {transport_port} """ def _run_elasticsearch(conf_dir, data_dir, logs_dir, http_port, transport_port): es_home = "/usr/share/elasticsearch" with open(conf_dir + "/elasticsearch.yml", "w") as fd: fd.write( CONFIG_TEMPLATE.format( data=data_dir, logs=logs_dir, http_port=http_port, transport_port=transport_port, ) ) with open(conf_dir + "/log4j2.properties", "w") as fd: pass cmd = [ "/usr/share/elasticsearch/jdk/bin/java", "-Des.path.home={}".format(es_home), "-Des.path.conf={}".format(conf_dir), "-Des.bundled_jdk=true", "-Dlog4j2.disable.jmx=true", "-cp", "{}/lib/*".format(es_home), "org.elasticsearch.bootstrap.Elasticsearch", ] host = "127.0.0.1:{}".format(http_port) with open(logs_dir + "/output.txt", "w") as fd: p = subprocess.Popen(cmd) wait_for_peer("127.0.0.1", http_port) client = elasticsearch.Elasticsearch([host]) assert client.ping() return p @pytest.fixture(scope="session") def elasticsearch_session(tmpdir_factory): tmpdir = tmpdir_factory.mktemp("elasticsearch") es_conf = tmpdir.mkdir("conf") http_port = free_port() transport_port = free_port() p = _run_elasticsearch( conf_dir=str(es_conf), data_dir=str(tmpdir.mkdir("data")), logs_dir=str(tmpdir.mkdir("logs")), http_port=http_port, transport_port=transport_port, ) yield "127.0.0.1:{}".format(http_port) # Check ES didn't stop assert p.returncode is None, p.returncode p.kill() p.wait() @pytest.fixture(scope="class") def elasticsearch_host(elasticsearch_session): yield elasticsearch_session @pytest.fixture def swh_search(elasticsearch_host): """Instantiate a search client, initialize the elasticsearch instance, and returns it """ logger.debug("swh_search: elasticsearch_host: %s", elasticsearch_host) - search = get_search("elasticsearch", {"hosts": [elasticsearch_host],}) + search = get_search("elasticsearch", hosts=[elasticsearch_host],) search.deinitialize() # To reset internal state from previous runs search.initialize() # install required index yield search diff --git a/swh/search/tests/test_api_client.py b/swh/search/tests/test_api_client.py index ad6b4d0..c766e5e 100644 --- a/swh/search/tests/test_api_client.py +++ b/swh/search/tests/test_api_client.py @@ -1,43 +1,43 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import pytest from swh.core.api.tests.server_testing import ServerTestFixture from swh.search import get_search from swh.search.api.server import app from .test_search import CommonSearchTest class TestRemoteSearch(CommonSearchTest, ServerTestFixture, unittest.TestCase): @pytest.fixture(autouse=True) def _instantiate_search(self, elasticsearch_host): self._elasticsearch_host = elasticsearch_host def setUp(self): self.config = { "search": { "cls": "elasticsearch", "args": {"hosts": [self._elasticsearch_host],}, } } self.app = app super().setUp() self.reset() - self.search = get_search("remote", {"url": self.url(),}) + self.search = get_search("remote", url=self.url(),) def reset(self): - search = get_search("elasticsearch", {"hosts": [self._elasticsearch_host],}) + search = get_search("elasticsearch", hosts=[self._elasticsearch_host],) search.deinitialize() search.initialize() @pytest.mark.skip( "Elasticsearch also returns close matches, so this test would fail" ) def test_origin_url_paging(self, count): pass diff --git a/swh/search/tests/test_in_memory.py b/swh/search/tests/test_in_memory.py index f48eb02..427f4c3 100644 --- a/swh/search/tests/test_in_memory.py +++ b/swh/search/tests/test_in_memory.py @@ -1,40 +1,40 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import pytest from swh.search import get_search from .test_search import CommonSearchTest class InmemorySearchTest(unittest.TestCase, CommonSearchTest): @pytest.fixture(autouse=True) def _instantiate_search(self): - self.search = get_search("memory", {}) + self.search = get_search("memory") def setUp(self): self.reset() def reset(self): self.search.deinitialize() self.search.initialize() @pytest.mark.skip("Not implemented in the in-memory search") def test_origin_intrinsic_metadata_description(self): pass @pytest.mark.skip("Not implemented in the in-memory search") def test_origin_intrinsic_metadata_all_terms(self): pass @pytest.mark.skip("Not implemented in the in-memory search") def test_origin_intrinsic_metadata_nested(self): pass @pytest.mark.skip("Not implemented in the in-memory search") def test_origin_intrinsic_metadata_paging(self): pass diff --git a/swh/search/tests/test_init.py b/swh/search/tests/test_init.py new file mode 100644 index 0000000..90451aa --- /dev/null +++ b/swh/search/tests/test_init.py @@ -0,0 +1,44 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + +from swh.search import get_search + +from swh.search.elasticsearch import ElasticSearch +from swh.search.api.client import RemoteSearch +from swh.search.in_memory import InMemorySearch + + +SEARCH_IMPLEMENTATIONS_KWARGS = [ + ("remote", RemoteSearch, {"url": "localhost"}), + ("elasticsearch", ElasticSearch, {"hosts": ["localhost"]}), +] + +SEARCH_IMPLEMENTATIONS = SEARCH_IMPLEMENTATIONS_KWARGS + [ + ("memory", InMemorySearch, None), +] + + +def test_get_search_failure(): + with pytest.raises(ValueError, match="Unknown search class"): + get_search("unknown-search") + + +@pytest.mark.parametrize("class_,expected_class,kwargs", SEARCH_IMPLEMENTATIONS) +def test_get_search(mocker, class_, expected_class, kwargs): + mocker.patch("swh.search.elasticsearch.Elasticsearch") + if kwargs: + concrete_search = get_search(class_, **kwargs) + else: + concrete_search = get_search(class_) + assert isinstance(concrete_search, expected_class) + + +@pytest.mark.parametrize("class_,expected_class,kwargs", SEARCH_IMPLEMENTATIONS_KWARGS) +def test_get_search_deprecation_warning(mocker, class_, expected_class, kwargs): + with pytest.warns(DeprecationWarning): + concrete_search = get_search(class_, args=kwargs) + assert isinstance(concrete_search, expected_class)