Changeset View
Changeset View
Standalone View
Standalone View
swh/search/tests/test_cli.py
Show First 20 Lines • Show All 54 Lines • ▼ Show 20 Lines | ): | ||||
divisor of nb_origins.""" | divisor of nb_origins.""" | ||||
producer = Producer( | producer = Producer( | ||||
{ | { | ||||
"bootstrap.servers": kafka_server, | "bootstrap.servers": kafka_server, | ||||
"client.id": "test search origin producer", | "client.id": "test search origin producer", | ||||
"acks": "all", | "acks": "all", | ||||
} | } | ||||
) | ) | ||||
value = value_to_kafka({"url": "http://foobar.baz",}) | origin_foobar_baz = { | ||||
"url": "http://foobar.baz", | |||||
} | |||||
value = value_to_kafka(origin_foobar_baz) | |||||
topic = f"{kafka_prefix}.origin" | topic = f"{kafka_prefix}.origin" | ||||
producer.produce(topic=topic, key=b"bogus-origin", value=value) | producer.produce(topic=topic, key=b"bogus-origin", value=value) | ||||
journal_objects_config = JOURNAL_OBJECTS_CONFIG_TEMPLATE.format( | journal_objects_config = JOURNAL_OBJECTS_CONFIG_TEMPLATE.format( | ||||
broker=kafka_server, prefix=kafka_prefix, group_id="test-consumer" | broker=kafka_server, prefix=kafka_prefix, group_id="test-consumer" | ||||
) | ) | ||||
result = invoke( | result = invoke( | ||||
False, | False, | ||||
["journal-client", "objects", "--stop-after-objects", "1",], | ["journal-client", "objects", "--stop-after-objects", "1",], | ||||
journal_objects_config, | journal_objects_config, | ||||
elasticsearch_host=elasticsearch_host, | elasticsearch_host=elasticsearch_host, | ||||
) | ) | ||||
# Check the output | # Check the output | ||||
expected_output = "Processed 1 messages.\nDone.\n" | expected_output = "Processed 1 messages.\nDone.\n" | ||||
assert result.exit_code == 0, result.output | assert result.exit_code == 0, result.output | ||||
assert result.output == expected_output | assert result.output == expected_output | ||||
swh_search.flush() | swh_search.flush() | ||||
# searching origin without visit as requirement | # searching origin without visit as requirement | ||||
results = swh_search.origin_search(url_pattern="foobar") | actual_page = swh_search.origin_search(url_pattern="foobar") | ||||
# We find it | # We find it | ||||
assert results == { | assert actual_page.next_page_token is None | ||||
"next_page_token": None, | assert actual_page.results == [origin_foobar_baz] | ||||
"results": [{"url": "http://foobar.baz"}], | |||||
} | |||||
# It's an origin with no visit, searching for it with visit | # It's an origin with no visit, searching for it with visit | ||||
results = swh_search.origin_search(url_pattern="foobar", with_visit=True) | actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=True) | ||||
# returns nothing | # returns nothing | ||||
assert results == {"next_page_token": None, "results": []} | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [] | |||||
def test__journal_client__origin_visit( | def test__journal_client__origin_visit( | ||||
swh_search, elasticsearch_host, kafka_prefix: str, kafka_server | swh_search, elasticsearch_host, kafka_prefix: str, kafka_server | ||||
): | ): | ||||
"""Tests the re-indexing when origin_batch_size*task_batch_size is a | """Tests the re-indexing when origin_batch_size*task_batch_size is a | ||||
divisor of nb_origins.""" | divisor of nb_origins.""" | ||||
origin_foobar = {"url": "http://baz.foobar"} | |||||
producer = Producer( | producer = Producer( | ||||
{ | { | ||||
"bootstrap.servers": kafka_server, | "bootstrap.servers": kafka_server, | ||||
"client.id": "test search origin visit producer", | "client.id": "test search origin visit producer", | ||||
"acks": "all", | "acks": "all", | ||||
} | } | ||||
) | ) | ||||
topic = f"{kafka_prefix}.origin_visit" | topic = f"{kafka_prefix}.origin_visit" | ||||
value = value_to_kafka({"origin": "http://baz.foobar",}) | value = value_to_kafka({"origin": origin_foobar["url"]}) | ||||
producer.produce(topic=topic, key=b"bogus-origin-visit", value=value) | producer.produce(topic=topic, key=b"bogus-origin-visit", value=value) | ||||
journal_objects_config = JOURNAL_OBJECTS_CONFIG_TEMPLATE.format( | journal_objects_config = JOURNAL_OBJECTS_CONFIG_TEMPLATE.format( | ||||
broker=kafka_server, prefix=kafka_prefix, group_id="test-consumer" | broker=kafka_server, prefix=kafka_prefix, group_id="test-consumer" | ||||
) | ) | ||||
result = invoke( | result = invoke( | ||||
False, | False, | ||||
["journal-client", "objects", "--stop-after-objects", "1",], | ["journal-client", "objects", "--stop-after-objects", "1",], | ||||
journal_objects_config, | journal_objects_config, | ||||
elasticsearch_host=elasticsearch_host, | elasticsearch_host=elasticsearch_host, | ||||
) | ) | ||||
# Check the output | # Check the output | ||||
expected_output = "Processed 1 messages.\nDone.\n" | expected_output = "Processed 1 messages.\nDone.\n" | ||||
assert result.exit_code == 0, result.output | assert result.exit_code == 0, result.output | ||||
assert result.output == expected_output | assert result.output == expected_output | ||||
swh_search.flush() | swh_search.flush() | ||||
expected_result = { | |||||
"next_page_token": None, | |||||
"results": [{"url": "http://baz.foobar"}], | |||||
} | |||||
# Both search returns the visit | # Both search returns the visit | ||||
results = swh_search.origin_search(url_pattern="foobar", with_visit=False) | actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=False) | ||||
assert results == expected_result | assert actual_page.next_page_token is None | ||||
results = swh_search.origin_search(url_pattern="foobar", with_visit=True) | assert actual_page.results == [origin_foobar] | ||||
assert results == expected_result | |||||
actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=True) | |||||
assert actual_page.next_page_token is None | |||||
assert actual_page.results == [origin_foobar] | |||||
def test__journal_client__missing_main_journal_config_key(elasticsearch_host): | def test__journal_client__missing_main_journal_config_key(elasticsearch_host): | ||||
"""Missing configuration on journal should raise""" | """Missing configuration on journal should raise""" | ||||
with pytest.raises(KeyError, match="journal"): | with pytest.raises(KeyError, match="journal"): | ||||
invoke( | invoke( | ||||
catch_exceptions=False, | catch_exceptions=False, | ||||
args=["journal-client", "objects", "--stop-after-objects", "1",], | args=["journal-client", "objects", "--stop-after-objects", "1",], | ||||
Show All 26 Lines |