Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_cli.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
from functools import reduce | from functools import reduce | ||||
import re | import re | ||||
from typing import Any, Dict, List | from typing import Any, Dict, List | ||||
from unittest.mock import patch | from unittest.mock import patch | ||||
import attr | |||||
from click.testing import CliRunner | from click.testing import CliRunner | ||||
from confluent_kafka import Consumer | from confluent_kafka import Consumer | ||||
import pytest | import pytest | ||||
from swh.indexer.cli import indexer_cli_group | from swh.indexer.cli import indexer_cli_group | ||||
from swh.indexer.storage.interface import IndexerStorageInterface | from swh.indexer.storage.interface import IndexerStorageInterface | ||||
from swh.indexer.storage.model import ( | from swh.indexer.storage.model import ( | ||||
DirectoryIntrinsicMetadataRow, | DirectoryIntrinsicMetadataRow, | ||||
OriginExtrinsicMetadataRow, | |||||
OriginIntrinsicMetadataRow, | OriginIntrinsicMetadataRow, | ||||
) | ) | ||||
from swh.journal.writer import get_journal_writer | from swh.journal.writer import get_journal_writer | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import OriginVisitStatus | from swh.model.model import Origin, OriginVisitStatus | ||||
from .test_metadata import REMD | |||||
from .utils import DIRECTORY2, REVISION | from .utils import DIRECTORY2, REVISION | ||||
def fill_idx_storage(idx_storage: IndexerStorageInterface, nb_rows: int) -> List[int]: | def fill_idx_storage(idx_storage: IndexerStorageInterface, nb_rows: int) -> List[int]: | ||||
tools: List[Dict[str, Any]] = [ | tools: List[Dict[str, Any]] = [ | ||||
{ | { | ||||
"tool_name": "tool %d" % i, | "tool_name": "tool %d" % i, | ||||
"tool_version": "0.0.1", | "tool_version": "0.0.1", | ||||
▲ Show 20 Lines • Show All 492 Lines • ▼ Show 20 Lines | with pytest.raises(ValueError, match="brokers"): | ||||
swh_config, | swh_config, | ||||
"journal-client", | "journal-client", | ||||
], | ], | ||||
catch_exceptions=False, | catch_exceptions=False, | ||||
) | ) | ||||
@pytest.mark.parametrize("indexer_name", ["origin-intrinsic-metadata", "*"]) | @pytest.mark.parametrize("indexer_name", ["origin-intrinsic-metadata", "*"]) | ||||
def test_cli_journal_client_index( | def test_cli_journal_client_index__origin_intrinsic_metadata( | ||||
cli_runner, | cli_runner, | ||||
swh_config, | swh_config, | ||||
kafka_prefix: str, | kafka_prefix: str, | ||||
kafka_server, | kafka_server, | ||||
consumer: Consumer, | consumer: Consumer, | ||||
idx_storage, | idx_storage, | ||||
storage, | storage, | ||||
mocker, | mocker, | ||||
▲ Show 20 Lines • Show All 108 Lines • ▼ Show 20 Lines | expected_results = [ | ||||
from_directory=DIRECTORY2.id, | from_directory=DIRECTORY2.id, | ||||
tool={"id": 1, **swh_indexer_config["tools"]}, | tool={"id": 1, **swh_indexer_config["tools"]}, | ||||
mappings=["cff"], | mappings=["cff"], | ||||
metadata={"foo": "bar"}, | metadata={"foo": "bar"}, | ||||
) | ) | ||||
for status in sorted(visit_statuses_full, key=lambda r: r.origin) | for status in sorted(visit_statuses_full, key=lambda r: r.origin) | ||||
] | ] | ||||
assert sorted(results, key=lambda r: r.id) == expected_results | assert sorted(results, key=lambda r: r.id) == expected_results | ||||
@pytest.mark.parametrize("indexer_name", ["extrinsic-metadata", "*"]) | |||||
def test_cli_journal_client_index__origin_extrinsic_metadata( | |||||
cli_runner, | |||||
swh_config, | |||||
kafka_prefix: str, | |||||
kafka_server, | |||||
consumer: Consumer, | |||||
idx_storage, | |||||
storage, | |||||
mocker, | |||||
swh_indexer_config, | |||||
indexer_name: str, | |||||
): | |||||
"""Test the 'swh indexer journal-client' cli tool.""" | |||||
journal_writer = get_journal_writer( | |||||
"kafka", | |||||
brokers=[kafka_server], | |||||
prefix=kafka_prefix, | |||||
client_id="test producer", | |||||
value_sanitizer=lambda object_type, value: value, | |||||
flush_timeout=3, # fail early if something is going wrong | |||||
) | |||||
origin = Origin("http://example.org/repo.git") | |||||
storage.origin_add([origin]) | |||||
raw_extrinsic_metadata = attr.evolve(REMD, target=origin.swhid()) | |||||
raw_extrinsic_metadata = attr.evolve( | |||||
raw_extrinsic_metadata, id=raw_extrinsic_metadata.compute_hash() | |||||
) | |||||
journal_writer.write_additions("raw_extrinsic_metadata", [raw_extrinsic_metadata]) | |||||
result = cli_runner.invoke( | |||||
indexer_cli_group, | |||||
[ | |||||
"-C", | |||||
swh_config, | |||||
"journal-client", | |||||
indexer_name, | |||||
"--broker", | |||||
kafka_server, | |||||
"--prefix", | |||||
kafka_prefix, | |||||
"--group-id", | |||||
"test-consumer", | |||||
"--stop-after-objects", | |||||
1, | |||||
], | |||||
catch_exceptions=False, | |||||
) | |||||
# Check the output | |||||
expected_output = "Done.\n" | |||||
assert result.exit_code == 0, result.output | |||||
assert result.output == expected_output | |||||
results = idx_storage.origin_extrinsic_metadata_get([origin.url]) | |||||
expected_results = [ | |||||
OriginExtrinsicMetadataRow( | |||||
id=origin.url, | |||||
from_remd_id=raw_extrinsic_metadata.id, | |||||
tool={"id": 1, **swh_indexer_config["tools"]}, | |||||
mappings=["github"], | |||||
metadata={ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | |||||
"type": "https://forgefed.org/ns#Repository", | |||||
"name": "test software", | |||||
}, | |||||
) | |||||
] | |||||
assert sorted(results, key=lambda r: r.id) == expected_results |