Changeset View
Changeset View
Standalone View
Standalone View
swh/search/tests/test_cli.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import copy | import copy | ||||
import tempfile | import tempfile | ||||
import yaml | import yaml | ||||
import pytest | import pytest | ||||
from confluent_kafka import Producer | from confluent_kafka import Producer | ||||
from click.testing import CliRunner | from click.testing import CliRunner | ||||
from swh.model.model import Origin | |||||
from swh.journal.serializers import value_to_kafka | from swh.journal.serializers import value_to_kafka | ||||
from swh.search.cli import cli | from swh.search.cli import cli | ||||
from swh.search.interface import PagedResult | |||||
CLI_CONFIG = """ | CLI_CONFIG = """ | ||||
search: | search: | ||||
cls: elasticsearch | cls: elasticsearch | ||||
args: | args: | ||||
hosts: | hosts: | ||||
- '{elasticsearch_host}' | - '{elasticsearch_host}' | ||||
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | ): | ||||
# Check the output | # Check the output | ||||
expected_output = "Processed 1 messages.\nDone.\n" | expected_output = "Processed 1 messages.\nDone.\n" | ||||
assert result.exit_code == 0, result.output | assert result.exit_code == 0, result.output | ||||
assert result.output == expected_output | assert result.output == expected_output | ||||
swh_search.flush() | swh_search.flush() | ||||
# searching origin without visit as requirement | # searching origin without visit as requirement | ||||
results = swh_search.origin_search(url_pattern="foobar") | actual_page = swh_search.origin_search(url_pattern="foobar") | ||||
# We find it | # We find it | ||||
assert results == { | assert actual_page.next_page_token is None | ||||
"next_page_token": None, | assert actual_page.results == [Origin(url="http://foobar.baz")] | ||||
"results": [{"url": "http://foobar.baz"}], | |||||
} | |||||
# It's an origin with no visit, searching for it with visit | # It's an origin with no visit, searching for it with visit | ||||
results = swh_search.origin_search(url_pattern="foobar", with_visit=True) | actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=True) | ||||
# returns nothing | # returns nothing | ||||
assert results == {"next_page_token": None, "results": []} | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [] | |||||
def test__journal_client__origin_visit( | def test__journal_client__origin_visit( | ||||
swh_search, elasticsearch_host, kafka_prefix: str, kafka_server | swh_search, elasticsearch_host, kafka_prefix: str, kafka_server | ||||
): | ): | ||||
"""Tests the re-indexing when origin_batch_size*task_batch_size is a | """Tests the re-indexing when origin_batch_size*task_batch_size is a | ||||
divisor of nb_origins.""" | divisor of nb_origins.""" | ||||
producer = Producer( | producer = Producer( | ||||
Show All 19 Lines | ): | ||||
# Check the output | # Check the output | ||||
expected_output = "Processed 1 messages.\nDone.\n" | expected_output = "Processed 1 messages.\nDone.\n" | ||||
assert result.exit_code == 0, result.output | assert result.exit_code == 0, result.output | ||||
assert result.output == expected_output | assert result.output == expected_output | ||||
swh_search.flush() | swh_search.flush() | ||||
expected_result = { | expected_page = PagedResult( | ||||
"next_page_token": None, | next_page_token=None, results=[Origin(url="http://baz.foobar")], | ||||
"results": [{"url": "http://baz.foobar"}], | ) | ||||
} | |||||
# Both search returns the visit | # Both search returns the visit | ||||
results = swh_search.origin_search(url_pattern="foobar", with_visit=False) | actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=False) | ||||
assert results == expected_result | assert actual_page == expected_page | ||||
results = swh_search.origin_search(url_pattern="foobar", with_visit=True) | actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=True) | ||||
assert results == expected_result | assert actual_page == expected_page | ||||
def test__journal_client__missing_main_journal_config_key(elasticsearch_host): | def test__journal_client__missing_main_journal_config_key(elasticsearch_host): | ||||
"""Missing configuration on journal should raise""" | """Missing configuration on journal should raise""" | ||||
with pytest.raises(KeyError, match="journal"): | with pytest.raises(KeyError, match="journal"): | ||||
invoke( | invoke( | ||||
catch_exceptions=False, | catch_exceptions=False, | ||||
args=["journal-client", "objects", "--stop-after-objects", "1",], | args=["journal-client", "objects", "--stop-after-objects", "1",], | ||||
Show All 26 Lines |