Changeset View
Changeset View
Standalone View
Standalone View
swh/search/tests/test_cli.py
Show First 20 Lines • Show All 106 Lines • ▼ Show 20 Lines | ): | ||||
# It's an origin with no visit, searching for it with visit | # It's an origin with no visit, searching for it with visit | ||||
actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=True) | actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=True) | ||||
# returns nothing | # returns nothing | ||||
assert actual_page.next_page_token is None | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [] | assert actual_page.results == [] | ||||
def test__journal_client__origin_visit( | |||||
swh_search, elasticsearch_host, kafka_prefix: str, kafka_server | |||||
): | |||||
"""Tests the re-indexing when origin_batch_size*task_batch_size is a | |||||
divisor of nb_origins.""" | |||||
origin_foobar = {"url": "http://baz.foobar"} | |||||
producer = Producer( | |||||
{ | |||||
"bootstrap.servers": kafka_server, | |||||
"client.id": "test search origin visit producer", | |||||
"acks": "all", | |||||
} | |||||
) | |||||
topic = f"{kafka_prefix}.origin_visit" | |||||
value = value_to_kafka({"origin": origin_foobar["url"], "type": "git"}) | |||||
producer.produce(topic=topic, key=b"bogus-origin-visit", value=value) | |||||
journal_objects_config = JOURNAL_OBJECTS_CONFIG_TEMPLATE.format( | |||||
broker=kafka_server, prefix=kafka_prefix, group_id="test-consumer" | |||||
) | |||||
result = invoke( | |||||
False, | |||||
[ | |||||
"journal-client", | |||||
"objects", | |||||
"--stop-after-objects", | |||||
"1", | |||||
"--object-type", | |||||
"origin_visit", | |||||
], | |||||
journal_objects_config, | |||||
elasticsearch_host=elasticsearch_host, | |||||
) | |||||
# Check the output | |||||
expected_output = "Processed 1 messages.\nDone.\n" | |||||
assert result.exit_code == 0, result.output | |||||
assert result.output == expected_output | |||||
swh_search.flush() | |||||
actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=False) | |||||
assert actual_page.next_page_token is None | |||||
assert actual_page.results == [origin_foobar] | |||||
# Not considered visited unless the visit is full | |||||
actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=True) | |||||
assert actual_page.next_page_token is None | |||||
assert actual_page.results == [] | |||||
def test__journal_client__origin_visit_status( | def test__journal_client__origin_visit_status( | ||||
swh_search, elasticsearch_host, kafka_prefix: str, kafka_server | swh_search, elasticsearch_host, kafka_prefix: str, kafka_server | ||||
): | ): | ||||
"""Subscribing to origin-visit-status should result in swh-search indexation | """Subscribing to origin-visit-status should result in swh-search indexation | ||||
""" | """ | ||||
origin_foobar = {"url": "http://baz.foobar"} | origin_foobar = {"url": "http://baz.foobar"} | ||||
producer = Producer( | producer = Producer( | ||||
{ | { | ||||
"bootstrap.servers": kafka_server, | "bootstrap.servers": kafka_server, | ||||
"client.id": "test search origin visit status producer", | "client.id": "test search origin visit status producer", | ||||
"acks": "all", | "acks": "all", | ||||
} | } | ||||
) | ) | ||||
topic = f"{kafka_prefix}.origin_visit_status" | topic = f"{kafka_prefix}.origin_visit_status" | ||||
value = value_to_kafka( | value = value_to_kafka( | ||||
{ | { | ||||
"origin": origin_foobar["url"], | "origin": origin_foobar["url"], | ||||
"visit": 1, | "visit": 1, | ||||
"type": "git", | |||||
"date": datetime.now(tz=timezone.utc), | "date": datetime.now(tz=timezone.utc), | ||||
"snapshot": None, | "snapshot": None, | ||||
"status": "full", | "status": "full", | ||||
} | } | ||||
) | ) | ||||
producer.produce(topic=topic, key=b"bogus-origin-visit-status", value=value) | producer.produce(topic=topic, key=b"bogus-origin-visit-status", value=value) | ||||
journal_objects_config = JOURNAL_OBJECTS_CONFIG_TEMPLATE.format( | journal_objects_config = JOURNAL_OBJECTS_CONFIG_TEMPLATE.format( | ||||
▲ Show 20 Lines • Show All 252 Lines • Show Last 20 Lines |