diff --git a/swh/search/cli.py b/swh/search/cli.py --- a/swh/search/cli.py +++ b/swh/search/cli.py @@ -113,15 +113,18 @@ @click.argument("config-path", required=True) @click.option("--host", default="0.0.0.0", help="Host to run the server") @click.option("--port", default=5010, type=click.INT, help="Binding port of the server") +@click.option( + "--index-prefix", required=False, help="The prefix to add before the index names" +) @click.option( "--debug/--nodebug", default=True, help="Indicates if the server should run in debug mode", ) -def rpc_server(config_path, host, port, debug): +def rpc_server(config_path, host, port, index_prefix, debug): """Starts a Software Heritage Indexer RPC HTTP server.""" from .api.server import app, load_and_check_config api_cfg = load_and_check_config(config_path, type="any") app.config.update(api_cfg) - app.run(host, port=int(port), debug=bool(debug)) + app.run(host, port=int(port), index_prefix=index_prefix, debug=bool(debug)) diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py --- a/swh/search/elasticsearch.py +++ b/swh/search/elasticsearch.py @@ -59,8 +59,14 @@ class ElasticSearch: - def __init__(self, hosts: List[str]): + def __init__(self, hosts: List[str], index_prefix=None): self._backend = Elasticsearch(hosts=hosts) + self.index_prefix = index_prefix + + self.origin_index = "origin" + + if index_prefix: + self.origin_index = index_prefix + "_" + self.origin_index def check(self): return self._backend.ping() @@ -71,10 +77,10 @@ def initialize(self) -> None: """Declare Elasticsearch indices and mappings""" - if not self._backend.indices.exists(index="origin"): - self._backend.indices.create(index="origin") + if not self._backend.indices.exists(index=self.origin_index): + self._backend.indices.create(index=self.origin_index) self._backend.indices.put_mapping( - index="origin", + index=self.origin_index, body={ "properties": { # sha1 of the URL; used as the document id @@ -112,7 +118,7 @@ ) def flush(self) -> None: - self._backend.indices.refresh(index="_all") + self._backend.indices.refresh(index=self.origin_index) def origin_update(self, documents: Iterable[Dict]) -> None: documents = map(_sanitize_origin, documents) @@ -123,18 +129,20 @@ { "_op_type": "update", "_id": sha1, - "_index": "origin", + "_index": self.origin_index, "doc": {**document, "sha1": sha1,}, "doc_as_upsert": True, } for (sha1, document) in documents_with_sha1 ] - bulk(self._backend, actions, index="origin") + bulk(self._backend, actions, index=self.origin_index) def origin_dump(self) -> Iterator[model.Origin]: - results = scan(self._backend, index="*") + results = scan(self._backend, index=self.origin_index) for hit in results: - yield self._backend.termvectors(index="origin", id=hit["_id"], fields=["*"]) + yield self._backend.termvectors( + index=self.origin_index, id=hit["_id"], fields=["*"] + ) def origin_search( self, @@ -208,7 +216,7 @@ page_token_content[b"sha1"].decode("ascii"), ] - res = self._backend.search(index="origin", body=body, size=limit) + res = self._backend.search(index=self.origin_index, body=body, size=limit) hits = res["hits"]["hits"] diff --git a/swh/search/tests/conftest.py b/swh/search/tests/conftest.py --- a/swh/search/tests/conftest.py +++ b/swh/search/tests/conftest.py @@ -123,7 +123,9 @@ """ logger.debug("swh_search: elasticsearch_host: %s", elasticsearch_host) - search = get_search("elasticsearch", hosts=[elasticsearch_host],) + search = get_search( + "elasticsearch", hosts=[elasticsearch_host], index_prefix="test" + ) search.deinitialize() # To reset internal state from previous runs search.initialize() # install required index yield search diff --git a/swh/search/tests/test_api_client.py b/swh/search/tests/test_api_client.py --- a/swh/search/tests/test_api_client.py +++ b/swh/search/tests/test_api_client.py @@ -23,7 +23,7 @@ self.config = { "search": { "cls": "elasticsearch", - "args": {"hosts": [self._elasticsearch_host],}, + "args": {"hosts": [self._elasticsearch_host], "index_prefix": "test"}, } } self.app = app @@ -32,7 +32,9 @@ self.search = get_search("remote", url=self.url(),) def reset(self): - search = get_search("elasticsearch", hosts=[self._elasticsearch_host],) + search = get_search( + "elasticsearch", hosts=[self._elasticsearch_host], index_prefix="test" + ) search.deinitialize() search.initialize() diff --git a/swh/search/tests/test_cli.py b/swh/search/tests/test_cli.py --- a/swh/search/tests/test_cli.py +++ b/swh/search/tests/test_cli.py @@ -13,6 +13,7 @@ from swh.journal.serializers import value_to_kafka from swh.model.hashutil import hash_to_bytes +from swh.search import get_search from swh.search.cli import search_cli_group CLI_CONFIG = """ @@ -21,6 +22,7 @@ args: hosts: - '%(elasticsearch_host)s' + index_prefix: test """ JOURNAL_OBJECTS_CONFIG_TEMPLATE = """ @@ -389,3 +391,21 @@ journal_cfg, elasticsearch_host=elasticsearch_host, ) + + +def test__initialize__with_prefix(elasticsearch_host): + """Initializing the index with a prefix should create an _origin index""" + + search = get_search( + "elasticsearch", hosts=[elasticsearch_host], index_prefix="test" + ) + + assert search.origin_index == "test_origin" + + +def test__initialize__without_prefix(elasticsearch_host): + """Initializing the index without a prefix should create an origin index""" + + search = get_search("elasticsearch", hosts=[elasticsearch_host]) + + assert search.origin_index == "origin" diff --git a/swh/search/tests/test_init.py b/swh/search/tests/test_init.py --- a/swh/search/tests/test_init.py +++ b/swh/search/tests/test_init.py @@ -15,7 +15,7 @@ SEARCH_IMPLEMENTATIONS_KWARGS = [ ("remote", RemoteSearch, {"url": "localhost"}), - ("elasticsearch", ElasticSearch, {"hosts": ["localhost"]}), + ("elasticsearch", ElasticSearch, {"hosts": ["localhost"], "index_prefix": "test"}), ] SEARCH_IMPLEMENTATIONS = SEARCH_IMPLEMENTATIONS_KWARGS + [