diff --git a/swh/search/cli.py b/swh/search/cli.py --- a/swh/search/cli.py +++ b/swh/search/cli.py @@ -114,17 +114,36 @@ @click.option("--host", default="0.0.0.0", help="Host to run the server") @click.option("--port", default=5010, type=click.INT, help="Binding port of the server") @click.option( - "--index-prefix", required=False, help="The prefix to add before the index names" + "--index-name", default="origin", required=False, help="The index name to use" +) +@click.option( + "--read-alias", + default="origin-read", + required=False, + help="The alias name used to search", +) +@click.option( + "--write-alias", + default="origin-write", + required=False, + help="The alias name used to index data", ) @click.option( "--debug/--nodebug", default=True, help="Indicates if the server should run in debug mode", ) -def rpc_server(config_path, host, port, index_prefix, debug): +def rpc_server(config_path, host, port, index_name, read_alias, write_alias, debug): """Starts a Software Heritage Indexer RPC HTTP server.""" from .api.server import app, load_and_check_config api_cfg = load_and_check_config(config_path, type="any") app.config.update(api_cfg) - app.run(host, port=int(port), index_prefix=index_prefix, debug=bool(debug)) + app.run( + host, + port=int(port), + index_name=index_name, + read_alias=read_alias, + write_alias=write_alias, + debug=bool(debug), + ) diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py --- a/swh/search/elasticsearch.py +++ b/swh/search/elasticsearch.py @@ -59,14 +59,18 @@ class ElasticSearch: - def __init__(self, hosts: List[str], index_prefix=None): + def __init__( + self, + hosts: List[str], + index_name: str = "origin", + read_alias: str = "origin-read", + write_alias: str = "origin-write", + ): self._backend = Elasticsearch(hosts=hosts) - self.index_prefix = index_prefix - - self.origin_index = "origin" - if index_prefix: - self.origin_index = index_prefix + "_" + self.origin_index + self.index_name = index_name + self.read_alias = read_alias + self.write_alias = write_alias @timed def check(self): @@ -77,11 +81,20 @@ self._backend.indices.delete(index="*") def initialize(self) -> None: - """Declare Elasticsearch indices and mappings""" - if not self._backend.indices.exists(index=self.origin_index): - self._backend.indices.create(index=self.origin_index) + """Declare Elasticsearch indices, aliases and mappings""" + if not self._backend.indices.exists(index=self.index_name): + self._backend.indices.create(index=self.index_name) + + if not self._backend.indices.exists_alias(self.read_alias): + self._backend.indices.put_alias(index=self.index_name, name=self.read_alias) + + if not self._backend.indices.exists_alias(self.write_alias): + self._backend.indices.put_alias( + index=self.index_name, name=self.write_alias + ) + self._backend.indices.put_mapping( - index=self.origin_index, + index=self.index_name, body={ "date_detection": False, "properties": { @@ -122,7 +135,7 @@ @timed def flush(self) -> None: - self._backend.indices.refresh(index=self.origin_index) + self._backend.indices.refresh(index=self.write_alias) @timed def origin_update(self, documents: Iterable[OriginDict]) -> None: @@ -152,7 +165,7 @@ { "_op_type": "update", "_id": sha1, - "_index": self.origin_index, + "_index": self.write_alias, "scripted_upsert": True, "upsert": {**document, "sha1": sha1,}, "script": { @@ -165,7 +178,7 @@ ] indexed_count, errors = helpers.bulk( - self._backend, actions, index=self.origin_index + self._backend, actions, index=self.write_alias ) assert isinstance(errors, List) # Make mypy happy @@ -175,10 +188,10 @@ ) def origin_dump(self) -> Iterator[model.Origin]: - results = helpers.scan(self._backend, index=self.origin_index) + results = helpers.scan(self._backend, index=self.read_alias) for hit in results: yield self._backend.termvectors( - index=self.origin_index, id=hit["_id"], fields=["*"] + index=self.read_alias, id=hit["_id"], fields=["*"] ) @timed @@ -258,7 +271,7 @@ page_token_content[b"sha1"].decode("ascii"), ] - res = self._backend.search(index=self.origin_index, body=body, size=limit) + res = self._backend.search(index=self.read_alias, body=body, size=limit) hits = res["hits"]["hits"] diff --git a/swh/search/tests/conftest.py b/swh/search/tests/conftest.py --- a/swh/search/tests/conftest.py +++ b/swh/search/tests/conftest.py @@ -124,7 +124,11 @@ """ logger.debug("swh_search: elasticsearch_host: %s", elasticsearch_host) search = get_search( - "elasticsearch", hosts=[elasticsearch_host], index_prefix="test" + "elasticsearch", + hosts=[elasticsearch_host], + index_name="test", + read_alias="test-read", + write_alias="test-write", ) search.deinitialize() # To reset internal state from previous runs search.initialize() # install required index diff --git a/swh/search/tests/test_api_client.py b/swh/search/tests/test_api_client.py --- a/swh/search/tests/test_api_client.py +++ b/swh/search/tests/test_api_client.py @@ -23,7 +23,12 @@ self.config = { "search": { "cls": "elasticsearch", - "args": {"hosts": [self._elasticsearch_host], "index_prefix": "test"}, + "args": { + "hosts": [self._elasticsearch_host], + "index_name": "test", + "read_alias": "test-read", + "write_alias": "test-write", + }, } } self.app = app @@ -33,7 +38,11 @@ def reset(self): search = get_search( - "elasticsearch", hosts=[self._elasticsearch_host], index_prefix="test" + "elasticsearch", + hosts=[self._elasticsearch_host], + index_name="test", + read_alias="test-read", + write_alias="test-write", ) search.deinitialize() search.initialize() diff --git a/swh/search/tests/test_cli.py b/swh/search/tests/test_cli.py --- a/swh/search/tests/test_cli.py +++ b/swh/search/tests/test_cli.py @@ -21,7 +21,9 @@ cls: elasticsearch hosts: - '%(elasticsearch_host)s' - index_prefix: test + index_name: test + read_alias: test-read + write_alias: test-write """ JOURNAL_OBJECTS_CONFIG_TEMPLATE = """ @@ -392,19 +394,37 @@ ) -def test__initialize__with_prefix(elasticsearch_host): - """Initializing the index with a prefix should create an _origin index""" +def test__initialize__with_index_name(elasticsearch_host): + """Initializing the index with an index name should create the right index""" + + search = get_search("elasticsearch", hosts=[elasticsearch_host], index_name="test") + + assert search.index_name == "test" + assert search.read_alias == "origin-read" + assert search.write_alias == "origin-write" + + +def test__initialize__with_read_alias(elasticsearch_host): + """Initializing the index with a search alias name should create + the right search alias""" search = get_search( - "elasticsearch", hosts=[elasticsearch_host], index_prefix="test" + "elasticsearch", hosts=[elasticsearch_host], read_alias="test-read" ) - assert search.origin_index == "test_origin" + assert search.index_name == "origin" + assert search.read_alias == "test-read" + assert search.write_alias == "origin-write" -def test__initialize__without_prefix(elasticsearch_host): - """Initializing the index without a prefix should create an origin index""" +def test__initialize__with_write_alias(elasticsearch_host): + """Initializing the index with an indexing alias name should create + the right indexing alias""" - search = get_search("elasticsearch", hosts=[elasticsearch_host]) + search = get_search( + "elasticsearch", hosts=[elasticsearch_host], write_alias="test-write" + ) - assert search.origin_index == "origin" + assert search.index_name == "origin" + assert search.read_alias == "origin-read" + assert search.write_alias == "test-write" diff --git a/swh/search/tests/test_elasticsearch.py b/swh/search/tests/test_elasticsearch.py --- a/swh/search/tests/test_elasticsearch.py +++ b/swh/search/tests/test_elasticsearch.py @@ -69,3 +69,19 @@ "operation": "index_error", }, ) + + def test_write_alias_usage(self): + mock = self.mocker.patch("elasticsearch.helpers.bulk") + mock.return_value = 2, ["result"] + + self.search.origin_update([{"url": "http://foobar.baz"}]) + + assert mock.call_args[1]["index"] == "test-write" + + def test_read_alias_usage(self): + mock = self.mocker.patch("elasticsearch.Elasticsearch.search") + mock.return_value = {"hits": {"hits": []}} + + self.search.origin_search(url_pattern="foobar.baz") + + assert mock.call_args[1]["index"] == "test-read" diff --git a/swh/search/tests/test_init.py b/swh/search/tests/test_init.py --- a/swh/search/tests/test_init.py +++ b/swh/search/tests/test_init.py @@ -15,7 +15,7 @@ SEARCH_IMPLEMENTATIONS_KWARGS = [ ("remote", RemoteSearch, {"url": "localhost"}), - ("elasticsearch", ElasticSearch, {"hosts": ["localhost"], "index_prefix": "test"}), + ("elasticsearch", ElasticSearch, {"hosts": ["localhost"], "index_name": "test"}), ] SEARCH_IMPLEMENTATIONS = SEARCH_IMPLEMENTATIONS_KWARGS + [