Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_cli.py
# Copyright (C) 2019-2022 The Software Heritage developers | # Copyright (C) 2019-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
from functools import reduce | from functools import reduce | ||||
import re | import re | ||||
from typing import Any, Dict, List | from typing import Any, Dict, List | ||||
from unittest.mock import patch | from unittest.mock import patch | ||||
import attr | import attr | ||||
from click.testing import CliRunner | from click.testing import CliRunner | ||||
from confluent_kafka import Consumer | from confluent_kafka import Consumer | ||||
import pytest | import pytest | ||||
from swh.indexer import fossology_license | |||||
from swh.indexer.cli import indexer_cli_group | from swh.indexer.cli import indexer_cli_group | ||||
from swh.indexer.storage.interface import IndexerStorageInterface | from swh.indexer.storage.interface import IndexerStorageInterface | ||||
from swh.indexer.storage.model import ( | from swh.indexer.storage.model import ( | ||||
ContentLicenseRow, | |||||
ContentMimetypeRow, | ContentMimetypeRow, | ||||
DirectoryIntrinsicMetadataRow, | DirectoryIntrinsicMetadataRow, | ||||
OriginExtrinsicMetadataRow, | OriginExtrinsicMetadataRow, | ||||
OriginIntrinsicMetadataRow, | OriginIntrinsicMetadataRow, | ||||
) | ) | ||||
from swh.journal.writer import get_journal_writer | from swh.journal.writer import get_journal_writer | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import Content, Origin, OriginVisitStatus | from swh.model.model import Content, Origin, OriginVisitStatus | ||||
from .test_metadata import REMD | from .test_metadata import REMD | ||||
from .utils import DIRECTORY2, RAW_CONTENTS, REVISION | from .utils import ( | ||||
DIRECTORY2, | |||||
RAW_CONTENT_IDS, | |||||
RAW_CONTENTS, | |||||
REVISION, | |||||
SHA1_TO_LICENSES, | |||||
mock_compute_license, | |||||
) | |||||
def fill_idx_storage(idx_storage: IndexerStorageInterface, nb_rows: int) -> List[int]: | def fill_idx_storage(idx_storage: IndexerStorageInterface, nb_rows: int) -> List[int]: | ||||
tools: List[Dict[str, Any]] = [ | tools: List[Dict[str, Any]] = [ | ||||
{ | { | ||||
"tool_name": "tool %d" % i, | "tool_name": "tool %d" % i, | ||||
"tool_version": "0.0.1", | "tool_version": "0.0.1", | ||||
"tool_configuration": {}, | "tool_configuration": {}, | ||||
▲ Show 20 Lines • Show All 742 Lines • ▼ Show 20 Lines | for content_id, (raw_content, mimetypes, encoding) in RAW_CONTENTS.items(): | ||||
encoding=encoding, | encoding=encoding, | ||||
) | ) | ||||
for mimetype in all_mimetypes | for mimetype in all_mimetypes | ||||
] | ] | ||||
) | ) | ||||
assert len(contents) == len(RAW_CONTENTS) | assert len(contents) == len(RAW_CONTENTS) | ||||
storage.content_add(contents) | |||||
ardumont: (It's already done by the fixture storage ^) | |||||
journal_writer.write_additions("content", contents) | journal_writer.write_additions("content", contents) | ||||
result = cli_runner.invoke( | result = cli_runner.invoke( | ||||
indexer_cli_group, | indexer_cli_group, | ||||
[ | [ | ||||
"-C", | "-C", | ||||
swh_config, | swh_config, | ||||
"journal-client", | "journal-client", | ||||
Show All 11 Lines | ): | ||||
) | ) | ||||
# Check the output | # Check the output | ||||
expected_output = "Done.\n" | expected_output = "Done.\n" | ||||
assert result.exit_code == 0, result.output | assert result.exit_code == 0, result.output | ||||
assert result.output == expected_output | assert result.output == expected_output | ||||
results = idx_storage.content_mimetype_get(content_ids) | results = idx_storage.content_mimetype_get(content_ids) | ||||
assert len(results) > 0 | assert len(results) == len(contents) | ||||
for result in results: | |||||
assert result in expected_results | |||||
def test_cli_journal_client_index__fossology_license( | |||||
cli_runner, | |||||
swh_config, | |||||
kafka_prefix: str, | |||||
kafka_server, | |||||
consumer: Consumer, | |||||
idx_storage, | |||||
obj_storage, | |||||
storage, | |||||
mocker, | |||||
swh_indexer_config, | |||||
): | |||||
"""Test the 'swh indexer journal-client' cli tool.""" | |||||
# Patch | |||||
fossology_license.compute_license = mock_compute_license | |||||
journal_writer = get_journal_writer( | |||||
"kafka", | |||||
brokers=[kafka_server], | |||||
prefix=kafka_prefix, | |||||
client_id="test producer", | |||||
value_sanitizer=lambda object_type, value: value, | |||||
flush_timeout=3, # fail early if something is going wrong | |||||
) | |||||
tool = {"id": 1, **swh_indexer_config["tools"]} | |||||
Not Done Inline Actionsmove the imports to the top vlorentz: move the imports to the top | |||||
Done Inline Actionsah yes, i forgot, thx. (i usually open it locally to ease writing without having to scroll too much...) ardumont: ah yes, i forgot, thx.
(i usually open it locally to ease writing without having to scroll too… | |||||
id0, id1, id2 = RAW_CONTENT_IDS | |||||
contents = [] | |||||
content_ids = [] | |||||
expected_results = [] | |||||
for content_id, (raw_content, _, _) in RAW_CONTENTS.items(): | |||||
content = Content.from_data(raw_content) | |||||
assert content_id == content.sha1 | |||||
contents.append(content) | |||||
content_ids.append(content_id) | |||||
expected_results.extend( | |||||
[ | |||||
ContentLicenseRow(id=content_id, tool=tool, license=license) | |||||
for license in SHA1_TO_LICENSES[content_id] | |||||
] | |||||
) | |||||
assert len(contents) == len(RAW_CONTENTS) | |||||
journal_writer.write_additions("content", contents) | |||||
result = cli_runner.invoke( | |||||
indexer_cli_group, | |||||
[ | |||||
"-C", | |||||
swh_config, | |||||
"journal-client", | |||||
"content-fossology-license", | |||||
"--broker", | |||||
kafka_server, | |||||
"--prefix", | |||||
kafka_prefix, | |||||
"--group-id", | |||||
"test-consumer", | |||||
"--stop-after-objects", | |||||
len(contents), | |||||
], | |||||
catch_exceptions=False, | |||||
) | |||||
# Check the output | |||||
expected_output = "Done.\n" | |||||
assert result.exit_code == 0, result.output | |||||
assert result.output == expected_output | |||||
results = idx_storage.content_fossology_license_get(content_ids) | |||||
assert len(results) == len(expected_results) | |||||
for result in results: | for result in results: | ||||
assert result in expected_results | assert result in expected_results |
(It's already done by the fixture storage ^)