Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_cli.py
Show First 20 Lines • Show All 128 Lines • ▼ Show 20 Lines | def test_mapping_list_terms_exclude(indexer_scheduler): | ||||
assert result.exit_code == 0, result.output | assert result.exit_code == 0, result.output | ||||
assert re.search(r'http://schema.org/url:\n.*npm', result.output) | assert re.search(r'http://schema.org/url:\n.*npm', result.output) | ||||
assert not re.search(r'http://schema.org/url:\n.*codemeta', result.output) | assert not re.search(r'http://schema.org/url:\n.*codemeta', result.output) | ||||
assert not re.search( | assert not re.search( | ||||
r'https://codemeta.github.io/terms/developmentStatus:\n\tcodemeta', | r'https://codemeta.github.io/terms/developmentStatus:\n\tcodemeta', | ||||
result.output) | result.output) | ||||
@patch('swh.scheduler.cli.utils.TASK_BATCH_SIZE', 3) | |||||
@patch('swh.scheduler.cli_utils.TASK_BATCH_SIZE', 3) | @patch('swh.scheduler.cli_utils.TASK_BATCH_SIZE', 3) | ||||
def test_origin_metadata_reindex_empty_db( | def test_origin_metadata_reindex_empty_db( | ||||
indexer_scheduler, idx_storage, storage): | indexer_scheduler, idx_storage, storage): | ||||
result = invoke(indexer_scheduler, False, [ | result = invoke(indexer_scheduler, False, [ | ||||
'schedule', 'reindex_origin_metadata', | 'schedule', 'reindex_origin_metadata', | ||||
]) | ]) | ||||
expected_output = ( | expected_output = ( | ||||
'Nothing to do (no origin metadata matched the criteria).\n' | 'Nothing to do (no origin metadata matched the criteria).\n' | ||||
) | ) | ||||
assert result.exit_code == 0, result.output | assert result.exit_code == 0, result.output | ||||
assert result.output == expected_output | assert result.output == expected_output | ||||
tasks = indexer_scheduler.search_tasks() | tasks = indexer_scheduler.search_tasks() | ||||
assert len(tasks) == 0 | assert len(tasks) == 0 | ||||
@patch('swh.scheduler.cli.utils.TASK_BATCH_SIZE', 3) | |||||
@patch('swh.scheduler.cli_utils.TASK_BATCH_SIZE', 3) | @patch('swh.scheduler.cli_utils.TASK_BATCH_SIZE', 3) | ||||
def test_origin_metadata_reindex_divisor( | def test_origin_metadata_reindex_divisor( | ||||
indexer_scheduler, idx_storage, storage): | indexer_scheduler, idx_storage, storage): | ||||
"""Tests the re-indexing when origin_batch_size*task_batch_size is a | """Tests the re-indexing when origin_batch_size*task_batch_size is a | ||||
divisor of nb_origins.""" | divisor of nb_origins.""" | ||||
fill_idx_storage(idx_storage, 90) | fill_idx_storage(idx_storage, 90) | ||||
result = invoke(indexer_scheduler, False, [ | result = invoke(indexer_scheduler, False, [ | ||||
Show All 11 Lines | def test_origin_metadata_reindex_divisor( | ||||
assert result.output == expected_output | assert result.output == expected_output | ||||
# Check scheduled tasks | # Check scheduled tasks | ||||
tasks = indexer_scheduler.search_tasks() | tasks = indexer_scheduler.search_tasks() | ||||
assert len(tasks) == 9 | assert len(tasks) == 9 | ||||
_assert_tasks_for_origins(tasks, range(90)) | _assert_tasks_for_origins(tasks, range(90)) | ||||
@patch('swh.scheduler.cli.utils.TASK_BATCH_SIZE', 3) | |||||
@patch('swh.scheduler.cli_utils.TASK_BATCH_SIZE', 3) | @patch('swh.scheduler.cli_utils.TASK_BATCH_SIZE', 3) | ||||
def test_origin_metadata_reindex_dry_run( | def test_origin_metadata_reindex_dry_run( | ||||
indexer_scheduler, idx_storage, storage): | indexer_scheduler, idx_storage, storage): | ||||
"""Tests the re-indexing when origin_batch_size*task_batch_size is a | """Tests the re-indexing when origin_batch_size*task_batch_size is a | ||||
divisor of nb_origins.""" | divisor of nb_origins.""" | ||||
fill_idx_storage(idx_storage, 90) | fill_idx_storage(idx_storage, 90) | ||||
result = invoke(indexer_scheduler, False, [ | result = invoke(indexer_scheduler, False, [ | ||||
Show All 10 Lines | def test_origin_metadata_reindex_dry_run( | ||||
assert result.exit_code == 0, result.output | assert result.exit_code == 0, result.output | ||||
assert result.output == expected_output | assert result.output == expected_output | ||||
# Check scheduled tasks | # Check scheduled tasks | ||||
tasks = indexer_scheduler.search_tasks() | tasks = indexer_scheduler.search_tasks() | ||||
assert len(tasks) == 0 | assert len(tasks) == 0 | ||||
@patch('swh.scheduler.cli.utils.TASK_BATCH_SIZE', 3) | |||||
@patch('swh.scheduler.cli_utils.TASK_BATCH_SIZE', 3) | @patch('swh.scheduler.cli_utils.TASK_BATCH_SIZE', 3) | ||||
def test_origin_metadata_reindex_nondivisor( | def test_origin_metadata_reindex_nondivisor( | ||||
indexer_scheduler, idx_storage, storage): | indexer_scheduler, idx_storage, storage): | ||||
"""Tests the re-indexing when neither origin_batch_size or | """Tests the re-indexing when neither origin_batch_size or | ||||
task_batch_size is a divisor of nb_origins.""" | task_batch_size is a divisor of nb_origins.""" | ||||
fill_idx_storage(idx_storage, 70) | fill_idx_storage(idx_storage, 70) | ||||
result = invoke(indexer_scheduler, False, [ | result = invoke(indexer_scheduler, False, [ | ||||
Show All 11 Lines | def test_origin_metadata_reindex_nondivisor( | ||||
assert result.output == expected_output | assert result.output == expected_output | ||||
# Check scheduled tasks | # Check scheduled tasks | ||||
tasks = indexer_scheduler.search_tasks() | tasks = indexer_scheduler.search_tasks() | ||||
assert len(tasks) == 4 | assert len(tasks) == 4 | ||||
_assert_tasks_for_origins(tasks, range(70)) | _assert_tasks_for_origins(tasks, range(70)) | ||||
@patch('swh.scheduler.cli.utils.TASK_BATCH_SIZE', 3) | |||||
@patch('swh.scheduler.cli_utils.TASK_BATCH_SIZE', 3) | @patch('swh.scheduler.cli_utils.TASK_BATCH_SIZE', 3) | ||||
def test_origin_metadata_reindex_filter_one_mapping( | def test_origin_metadata_reindex_filter_one_mapping( | ||||
indexer_scheduler, idx_storage, storage): | indexer_scheduler, idx_storage, storage): | ||||
"""Tests the re-indexing when origin_batch_size*task_batch_size is a | """Tests the re-indexing when origin_batch_size*task_batch_size is a | ||||
divisor of nb_origins.""" | divisor of nb_origins.""" | ||||
fill_idx_storage(idx_storage, 110) | fill_idx_storage(idx_storage, 110) | ||||
result = invoke(indexer_scheduler, False, [ | result = invoke(indexer_scheduler, False, [ | ||||
Show All 12 Lines | def test_origin_metadata_reindex_filter_one_mapping( | ||||
# Check scheduled tasks | # Check scheduled tasks | ||||
tasks = indexer_scheduler.search_tasks() | tasks = indexer_scheduler.search_tasks() | ||||
assert len(tasks) == 2 | assert len(tasks) == 2 | ||||
_assert_tasks_for_origins( | _assert_tasks_for_origins( | ||||
tasks, | tasks, | ||||
[1, 11, 21, 31, 41, 51, 61, 71, 81, 91, 101]) | [1, 11, 21, 31, 41, 51, 61, 71, 81, 91, 101]) | ||||
@patch('swh.scheduler.cli.utils.TASK_BATCH_SIZE', 3) | |||||
@patch('swh.scheduler.cli_utils.TASK_BATCH_SIZE', 3) | @patch('swh.scheduler.cli_utils.TASK_BATCH_SIZE', 3) | ||||
def test_origin_metadata_reindex_filter_two_mappings( | def test_origin_metadata_reindex_filter_two_mappings( | ||||
indexer_scheduler, idx_storage, storage): | indexer_scheduler, idx_storage, storage): | ||||
"""Tests the re-indexing when origin_batch_size*task_batch_size is a | """Tests the re-indexing when origin_batch_size*task_batch_size is a | ||||
divisor of nb_origins.""" | divisor of nb_origins.""" | ||||
fill_idx_storage(idx_storage, 110) | fill_idx_storage(idx_storage, 110) | ||||
result = invoke(indexer_scheduler, False, [ | result = invoke(indexer_scheduler, False, [ | ||||
Show All 13 Lines | def test_origin_metadata_reindex_filter_two_mappings( | ||||
tasks = indexer_scheduler.search_tasks() | tasks = indexer_scheduler.search_tasks() | ||||
assert len(tasks) == 3 | assert len(tasks) == 3 | ||||
_assert_tasks_for_origins( | _assert_tasks_for_origins( | ||||
tasks, | tasks, | ||||
[1, 11, 21, 31, 41, 51, 61, 71, 81, 91, 101, | [1, 11, 21, 31, 41, 51, 61, 71, 81, 91, 101, | ||||
2, 12, 22, 32, 42, 52, 62, 72, 82, 92, 102]) | 2, 12, 22, 32, 42, 52, 62, 72, 82, 92, 102]) | ||||
@patch('swh.scheduler.cli.utils.TASK_BATCH_SIZE', 3) | |||||
@patch('swh.scheduler.cli_utils.TASK_BATCH_SIZE', 3) | @patch('swh.scheduler.cli_utils.TASK_BATCH_SIZE', 3) | ||||
def test_origin_metadata_reindex_filter_one_tool( | def test_origin_metadata_reindex_filter_one_tool( | ||||
indexer_scheduler, idx_storage, storage): | indexer_scheduler, idx_storage, storage): | ||||
"""Tests the re-indexing when origin_batch_size*task_batch_size is a | """Tests the re-indexing when origin_batch_size*task_batch_size is a | ||||
divisor of nb_origins.""" | divisor of nb_origins.""" | ||||
tool_ids = fill_idx_storage(idx_storage, 110) | tool_ids = fill_idx_storage(idx_storage, 110) | ||||
result = invoke(indexer_scheduler, False, [ | result = invoke(indexer_scheduler, False, [ | ||||
Show All 19 Lines |