Changeset View
Changeset View
Standalone View
Standalone View
swh/search/tests/test_cli.py
Show All 10 Lines | |||||
from confluent_kafka import Producer | from confluent_kafka import Producer | ||||
from click.testing import CliRunner | from click.testing import CliRunner | ||||
from swh.journal.serializers import value_to_kafka | from swh.journal.serializers import value_to_kafka | ||||
from swh.search.cli import cli | from swh.search.cli import cli | ||||
from swh.search.tests.utils import assert_page_match | |||||
CLI_CONFIG = """ | CLI_CONFIG = """ | ||||
search: | search: | ||||
cls: elasticsearch | cls: elasticsearch | ||||
args: | args: | ||||
hosts: | hosts: | ||||
- '{elasticsearch_host}' | - '{elasticsearch_host}' | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines | ): | ||||
# Check the output | # Check the output | ||||
expected_output = "Processed 1 messages.\nDone.\n" | expected_output = "Processed 1 messages.\nDone.\n" | ||||
assert result.exit_code == 0, result.output | assert result.exit_code == 0, result.output | ||||
assert result.output == expected_output | assert result.output == expected_output | ||||
swh_search.flush() | swh_search.flush() | ||||
# searching origin without visit as requirement | # searching origin without visit as requirement | ||||
results = swh_search.origin_search(url_pattern="foobar") | actual_page = swh_search.origin_search(url_pattern="foobar") | ||||
# We find it | # We find it | ||||
assert results == { | assert_page_match(actual_page, [{"url": "http://foobar.baz"}]) | ||||
"next_page_token": None, | |||||
"results": [{"url": "http://foobar.baz"}], | |||||
} | |||||
# It's an origin with no visit, searching for it with visit | # It's an origin with no visit, searching for it with visit | ||||
results = swh_search.origin_search(url_pattern="foobar", with_visit=True) | actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=True) | ||||
# returns nothing | # returns nothing | ||||
assert results == {"next_page_token": None, "results": []} | assert_page_match(actual_page, []) | ||||
def test__journal_client__origin_visit( | def test__journal_client__origin_visit( | ||||
swh_search, elasticsearch_host, kafka_prefix: str, kafka_server | swh_search, elasticsearch_host, kafka_prefix: str, kafka_server | ||||
): | ): | ||||
"""Tests the re-indexing when origin_batch_size*task_batch_size is a | """Tests the re-indexing when origin_batch_size*task_batch_size is a | ||||
divisor of nb_origins.""" | divisor of nb_origins.""" | ||||
origin_foobar = {"url": "http://baz.foobar"} | |||||
producer = Producer( | producer = Producer( | ||||
{ | { | ||||
"bootstrap.servers": kafka_server, | "bootstrap.servers": kafka_server, | ||||
"client.id": "test search origin visit producer", | "client.id": "test search origin visit producer", | ||||
"acks": "all", | "acks": "all", | ||||
} | } | ||||
) | ) | ||||
topic = f"{kafka_prefix}.origin_visit" | topic = f"{kafka_prefix}.origin_visit" | ||||
value = value_to_kafka({"origin": "http://baz.foobar",}) | value = value_to_kafka({"origin": origin_foobar["url"]}) | ||||
producer.produce(topic=topic, key=b"bogus-origin-visit", value=value) | producer.produce(topic=topic, key=b"bogus-origin-visit", value=value) | ||||
journal_objects_config = JOURNAL_OBJECTS_CONFIG_TEMPLATE.format( | journal_objects_config = JOURNAL_OBJECTS_CONFIG_TEMPLATE.format( | ||||
broker=kafka_server, prefix=kafka_prefix, group_id="test-consumer" | broker=kafka_server, prefix=kafka_prefix, group_id="test-consumer" | ||||
) | ) | ||||
result = invoke( | result = invoke( | ||||
False, | False, | ||||
["journal-client", "objects", "--stop-after-objects", "1",], | ["journal-client", "objects", "--stop-after-objects", "1",], | ||||
journal_objects_config, | journal_objects_config, | ||||
elasticsearch_host=elasticsearch_host, | elasticsearch_host=elasticsearch_host, | ||||
) | ) | ||||
# Check the output | # Check the output | ||||
expected_output = "Processed 1 messages.\nDone.\n" | expected_output = "Processed 1 messages.\nDone.\n" | ||||
assert result.exit_code == 0, result.output | assert result.exit_code == 0, result.output | ||||
assert result.output == expected_output | assert result.output == expected_output | ||||
swh_search.flush() | swh_search.flush() | ||||
expected_result = { | |||||
"next_page_token": None, | |||||
"results": [{"url": "http://baz.foobar"}], | |||||
} | |||||
# Both search returns the visit | # Both search returns the visit | ||||
results = swh_search.origin_search(url_pattern="foobar", with_visit=False) | actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=False) | ||||
assert results == expected_result | assert_page_match(actual_page, [origin_foobar]) | ||||
results = swh_search.origin_search(url_pattern="foobar", with_visit=True) | |||||
assert results == expected_result | actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=True) | ||||
assert_page_match(actual_page, [origin_foobar]) | |||||
def test__journal_client__missing_main_journal_config_key(elasticsearch_host): | def test__journal_client__missing_main_journal_config_key(elasticsearch_host): | ||||
"""Missing configuration on journal should raise""" | """Missing configuration on journal should raise""" | ||||
with pytest.raises(KeyError, match="journal"): | with pytest.raises(KeyError, match="journal"): | ||||
invoke( | invoke( | ||||
catch_exceptions=False, | catch_exceptions=False, | ||||
args=["journal-client", "objects", "--stop-after-objects", "1",], | args=["journal-client", "objects", "--stop-after-objects", "1",], | ||||
Show All 26 Lines |