Changeset View
Changeset View
Standalone View
Standalone View
swh/search/tests/test_cli.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import copy | import copy | ||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
import tempfile | import tempfile | ||||
from click.testing import CliRunner | from click.testing import CliRunner | ||||
from confluent_kafka import Producer | from confluent_kafka import Producer | ||||
import pytest | import pytest | ||||
from typing_extensions import Literal | |||||
import yaml | import yaml | ||||
from swh.journal.serializers import value_to_kafka | from swh.journal.serializers import value_to_kafka | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.search import get_search | from swh.search import get_search | ||||
from swh.search.cli import search_cli_group | from swh.search.cli import search_cli_group | ||||
CLI_CONFIG = """ | CLI_CONFIG = """ | ||||
▲ Show 20 Lines • Show All 149 Lines • ▼ Show 20 Lines | ): | ||||
assert actual_page.next_page_token is None | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [origin_foobar] | assert actual_page.results == [origin_foobar] | ||||
actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=True) | actual_page = swh_search.origin_search(url_pattern="foobar", with_visit=True) | ||||
assert actual_page.next_page_token is None | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [origin_foobar] | assert actual_page.results == [origin_foobar] | ||||
def test__journal_client__origin_intrinsic_metadata( | @pytest.mark.parametrize("metadata_source", ["intrinsic", "extrinsic"]) | ||||
swh_search, elasticsearch_host, kafka_prefix: str, kafka_server | def test__journal_client__origin_metadata( | ||||
swh_search, | |||||
elasticsearch_host, | |||||
kafka_prefix: str, | |||||
kafka_server, | |||||
metadata_source: Literal["intrinsic", "extrinsic"], | |||||
): | ): | ||||
"""Subscribing to origin-intrinsic-metadata should result in swh-search indexation""" | """Subscribing to origin-intrinsic-metadata should result in swh-search indexation""" | ||||
origin_foobar = {"url": "https://github.com/clojure/clojure"} | origin_foobar = {"url": "https://github.com/clojure/clojure"} | ||||
origin_intrinsic_metadata = { | origin_metadata = { | ||||
"id": origin_foobar["url"], | "id": origin_foobar["url"], | ||||
"metadata": { | "metadata": { | ||||
"name": "clojure", | "name": "clojure", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"license": "http://opensource.org/licenses/eclipse-1.0.php", | "license": "http://opensource.org/licenses/eclipse-1.0.php", | ||||
"version": "1.10.2-master-SNAPSHOT", | "version": "1.10.2-master-SNAPSHOT", | ||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"identifier": "org.clojure", | "identifier": "org.clojure", | ||||
"description": "Clojure core environment and runtime library.", | "description": "Clojure core environment and runtime library.", | ||||
"codeRepository": "https://repo.maven.apache.org/maven2/org/clojure/clojure", # noqa | "codeRepository": "https://repo.maven.apache.org/maven2/org/clojure/clojure", # noqa | ||||
}, | }, | ||||
"indexer_configuration_id": 1, | "indexer_configuration_id": 1, | ||||
"from_revision": hash_to_bytes("f47c139e20970ee0852166f48ee2a4626632b86e"), | } | ||||
if metadata_source == "intrinsic": | |||||
origin_metadata.update( | |||||
{ | |||||
"from_revision": hash_to_bytes( | |||||
"f47c139e20970ee0852166f48ee2a4626632b86e" | |||||
), | |||||
"mappings": ["maven"], | "mappings": ["maven"], | ||||
} | } | ||||
) | |||||
elif metadata_source == "extrinsic": | |||||
origin_metadata.update( | |||||
{ | |||||
"from_revision": hash_to_bytes( | |||||
"f47c139e20970ee0852166f48ee2a4626632b86e" | |||||
), | |||||
"mappings": ["github"], | |||||
} | |||||
) | |||||
else: | |||||
assert False, metadata_source | |||||
producer = Producer( | producer = Producer( | ||||
{ | { | ||||
"bootstrap.servers": kafka_server, | "bootstrap.servers": kafka_server, | ||||
"client.id": "test search origin intrinsic metadata producer", | "client.id": "test search origin intrinsic metadata producer", | ||||
"acks": "all", | "acks": "all", | ||||
} | } | ||||
) | ) | ||||
topic = f"{kafka_prefix}.origin_intrinsic_metadata" | topic = f"{kafka_prefix}.origin_{metadata_source}_metadata" | ||||
value = value_to_kafka(origin_intrinsic_metadata) | value = value_to_kafka(origin_metadata) | ||||
producer.produce(topic=topic, key=b"bogus-origin-intrinsic-metadata", value=value) | producer.produce(topic=topic, key=b"bogus-origin-metadata", value=value) | ||||
producer.flush() | producer.flush() | ||||
journal_objects_config = JOURNAL_OBJECTS_CONFIG_TEMPLATE.format( | journal_objects_config = JOURNAL_OBJECTS_CONFIG_TEMPLATE.format( | ||||
broker=kafka_server, prefix=kafka_prefix, group_id="test-consumer" | broker=kafka_server, prefix=kafka_prefix, group_id="test-consumer" | ||||
) | ) | ||||
result = invoke( | result = invoke( | ||||
False, | False, | ||||
[ | [ | ||||
"journal-client", | "journal-client", | ||||
"objects", | "objects", | ||||
"--stop-after-objects", | "--stop-after-objects", | ||||
"1", | "1", | ||||
"--object-type", | "--object-type", | ||||
"origin_intrinsic_metadata", | f"origin_{metadata_source}_metadata", | ||||
], | ], | ||||
journal_objects_config, | journal_objects_config, | ||||
elasticsearch_host=elasticsearch_host, | elasticsearch_host=elasticsearch_host, | ||||
) | ) | ||||
# Check the output | # Check the output | ||||
expected_output = "Processed 1 messages.\nDone.\n" | expected_output = "Processed 1 messages.\nDone.\n" | ||||
assert result.exit_code == 0, result.output | assert result.exit_code == 0, result.output | ||||
▲ Show 20 Lines • Show All 166 Lines • Show Last 20 Lines |