diff --git a/swh/search/cli.py b/swh/search/cli.py --- a/swh/search/cli.py +++ b/swh/search/cli.py @@ -113,15 +113,16 @@ @click.argument("config-path", required=True) @click.option("--host", default="0.0.0.0", help="Host to run the server") @click.option("--port", default=5010, type=click.INT, help="Binding port of the server") +@click.option("--index", default="origin", help="The index to use") @click.option( "--debug/--nodebug", default=True, help="Indicates if the server should run in debug mode", ) -def rpc_server(config_path, host, port, debug): +def rpc_server(config_path, host, port, index, debug): """Starts a Software Heritage Indexer RPC HTTP server.""" from .api.server import app, load_and_check_config api_cfg = load_and_check_config(config_path, type="any") app.config.update(api_cfg) - app.run(host, port=int(port), debug=bool(debug)) + app.run(host, port=int(port), index=index, debug=bool(debug)) diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py --- a/swh/search/elasticsearch.py +++ b/swh/search/elasticsearch.py @@ -59,8 +59,9 @@ class ElasticSearch: - def __init__(self, hosts: List[str]): + def __init__(self, hosts: List[str], index): self._backend = Elasticsearch(hosts=hosts) + self.index = index def check(self): return self._backend.ping() @@ -71,10 +72,10 @@ def initialize(self) -> None: """Declare Elasticsearch indices and mappings""" - if not self._backend.indices.exists(index="origin"): - self._backend.indices.create(index="origin") + if not self._backend.indices.exists(index=self.index): + self._backend.indices.create(index=self.index) self._backend.indices.put_mapping( - index="origin", + index=self.index, body={ "properties": { # sha1 of the URL; used as the document id @@ -112,7 +113,7 @@ ) def flush(self) -> None: - self._backend.indices.refresh(index="_all") + self._backend.indices.refresh(index=self.index) def origin_update(self, documents: Iterable[Dict]) -> None: documents = map(_sanitize_origin, documents) @@ -123,18 +124,20 @@ { "_op_type": "update", "_id": sha1, - "_index": "origin", + "_index": self.index, "doc": {**document, "sha1": sha1,}, "doc_as_upsert": True, } for (sha1, document) in documents_with_sha1 ] - bulk(self._backend, actions, index="origin") + bulk(self._backend, actions, index=self.index) def origin_dump(self) -> Iterator[model.Origin]: - results = scan(self._backend, index="*") + results = scan(self._backend, index=self.index) for hit in results: - yield self._backend.termvectors(index="origin", id=hit["_id"], fields=["*"]) + yield self._backend.termvectors( + index=self.index, id=hit["_id"], fields=["*"] + ) def origin_search( self, @@ -208,7 +211,7 @@ page_token_content[b"sha1"].decode("ascii"), ] - res = self._backend.search(index="origin", body=body, size=limit) + res = self._backend.search(index=self.index, body=body, size=limit) hits = res["hits"]["hits"] diff --git a/swh/search/tests/conftest.py b/swh/search/tests/conftest.py --- a/swh/search/tests/conftest.py +++ b/swh/search/tests/conftest.py @@ -123,7 +123,9 @@ """ logger.debug("swh_search: elasticsearch_host: %s", elasticsearch_host) - search = get_search("elasticsearch", hosts=[elasticsearch_host],) + search = get_search( + "elasticsearch", hosts=[elasticsearch_host], index="origin-test" + ) search.deinitialize() # To reset internal state from previous runs search.initialize() # install required index yield search diff --git a/swh/search/tests/test_api_client.py b/swh/search/tests/test_api_client.py --- a/swh/search/tests/test_api_client.py +++ b/swh/search/tests/test_api_client.py @@ -23,7 +23,7 @@ self.config = { "search": { "cls": "elasticsearch", - "args": {"hosts": [self._elasticsearch_host],}, + "args": {"hosts": [self._elasticsearch_host], "index": "origin-test"}, } } self.app = app @@ -32,7 +32,9 @@ self.search = get_search("remote", url=self.url(),) def reset(self): - search = get_search("elasticsearch", hosts=[self._elasticsearch_host],) + search = get_search( + "elasticsearch", hosts=[self._elasticsearch_host], index="origin-test" + ) search.deinitialize() search.initialize() diff --git a/swh/search/tests/test_cli.py b/swh/search/tests/test_cli.py --- a/swh/search/tests/test_cli.py +++ b/swh/search/tests/test_cli.py @@ -21,6 +21,7 @@ args: hosts: - '%(elasticsearch_host)s' + index: origin-test """ JOURNAL_OBJECTS_CONFIG_TEMPLATE = """ diff --git a/swh/search/tests/test_init.py b/swh/search/tests/test_init.py --- a/swh/search/tests/test_init.py +++ b/swh/search/tests/test_init.py @@ -15,7 +15,7 @@ SEARCH_IMPLEMENTATIONS_KWARGS = [ ("remote", RemoteSearch, {"url": "localhost"}), - ("elasticsearch", ElasticSearch, {"hosts": ["localhost"]}), + ("elasticsearch", ElasticSearch, {"hosts": ["localhost"], "index": "origin-test"}), ] SEARCH_IMPLEMENTATIONS = SEARCH_IMPLEMENTATIONS_KWARGS + [