Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_journal_client.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from typing import Dict, List | |||||
from unittest.mock import patch | |||||
from unittest.mock import Mock, patch | import pytest | ||||
from swh.indexer.journal_client import process_journal_objects | from swh.indexer.journal_client import process_journal_objects | ||||
from swh.scheduler.interface import SchedulerInterface | |||||
def test_one_origin_visit_status(): | def search_tasks(indexer_scheduler: SchedulerInterface, task_type) -> List[Dict]: | ||||
mock_scheduler = Mock() | tasks = indexer_scheduler.search_tasks(task_type=task_type) | ||||
messages = { | |||||
vlorentz: this function doesn't compare; it only excludes keys from its result | |||||
Done Inline Actionsyes, to avoid the comparison which would not work (well for some fields at least [1]) for those fields so I just wanted it to make it clear... any better name? [1] id is generated, ... ardumont: yes, to avoid the comparison which would not work (well for some fields at least [1]) for those… | |||||
"origin_visit_status": [{"status": "full", "origin": "file:///dev/zero",},] | keys_not_to_compare = ["next_run", "current_interval", "id", "priority", "status"] | ||||
} | |||||
process_journal_objects( | result_tasks = [] | ||||
messages, scheduler=mock_scheduler, task_names={"origin_metadata": "task-name"}, | for task in tasks: | ||||
) | task = dict(task) | ||||
assert mock_scheduler.create_tasks.called is True | |||||
call_args = mock_scheduler.create_tasks.call_args | for key in keys_not_to_compare: | ||||
(args, kwargs) = call_args | del task[key] | ||||
assert kwargs == {} | |||||
del args[0][0]["next_run"] | result_tasks.append(task) | ||||
assert args == ( | |||||
return result_tasks | |||||
@pytest.mark.parametrize( | |||||
"origin", | |||||
[ | [ | ||||
{ | "file:///dev/zero", # current format | ||||
"arguments": {"kwargs": {}, "args": (["file:///dev/zero"],),}, | {"url": "file:///dev/zero",}, # legacy format | ||||
"policy": "oneshot", | |||||
"type": "task-name", | |||||
"retries_left": 1, | |||||
}, | |||||
], | ], | ||||
) | ) | ||||
def test_journal_client_origin_visit_status(origin, indexer_scheduler): | |||||
messages = {"origin_visit_status": [{"status": "full", "origin": origin},]} | |||||
def test_origin_visit_legacy(): | |||||
mock_scheduler = Mock() | |||||
messages = { | |||||
"origin_visit_status": [ | |||||
{"status": "full", "origin": {"url": "file:///dev/zero",}}, | |||||
] | |||||
} | |||||
process_journal_objects( | process_journal_objects( | ||||
messages, scheduler=mock_scheduler, task_names={"origin_metadata": "task-name"}, | messages, | ||||
scheduler=indexer_scheduler, | |||||
task_names={"origin_metadata": "index-origin-metadata"}, | |||||
) | ) | ||||
assert mock_scheduler.create_tasks.called is True | actual_tasks = search_tasks(indexer_scheduler, task_type="index-origin-metadata") | ||||
call_args = mock_scheduler.create_tasks.call_args | |||||
(args, kwargs) = call_args | assert actual_tasks == [ | ||||
assert kwargs == {} | |||||
del args[0][0]["next_run"] | |||||
assert args == ( | |||||
[ | |||||
{ | { | ||||
"arguments": {"kwargs": {}, "args": (["file:///dev/zero"],),}, | "arguments": {"kwargs": {}, "args": [["file:///dev/zero"]],}, | ||||
"policy": "oneshot", | "policy": "oneshot", | ||||
"type": "task-name", | "type": "index-origin-metadata", | ||||
"retries_left": 1, | "retries_left": 1, | ||||
}, | } | ||||
], | ] | ||||
) | |||||
def test_one_origin_visit_batch(): | def test_journal_client_one_origin_visit_batch(indexer_scheduler): | ||||
mock_scheduler = Mock() | |||||
messages = { | messages = { | ||||
"origin_visit_status": [ | "origin_visit_status": [ | ||||
{"status": "full", "origin": "file:///dev/zero",}, | {"status": "full", "origin": "file:///dev/zero",}, | ||||
{"status": "full", "origin": "file:///tmp/foobar",}, | {"status": "full", "origin": "file:///tmp/foobar",}, | ||||
] | ] | ||||
} | } | ||||
process_journal_objects( | process_journal_objects( | ||||
messages, scheduler=mock_scheduler, task_names={"origin_metadata": "task-name"}, | messages, | ||||
scheduler=indexer_scheduler, | |||||
task_names={"origin_metadata": "index-origin-metadata"}, | |||||
) | ) | ||||
assert mock_scheduler.create_tasks.called is True | |||||
call_args = mock_scheduler.create_tasks.call_args | actual_tasks = search_tasks(indexer_scheduler, task_type="index-origin-metadata") | ||||
(args, kwargs) = call_args | assert actual_tasks == [ | ||||
assert kwargs == {} | |||||
del args[0][0]["next_run"] | |||||
assert args == ( | |||||
[ | |||||
{ | { | ||||
"arguments": { | "arguments": { | ||||
"kwargs": {}, | "kwargs": {}, | ||||
"args": (["file:///dev/zero", "file:///tmp/foobar"],), | "args": [["file:///dev/zero", "file:///tmp/foobar"]], | ||||
}, | }, | ||||
"policy": "oneshot", | "policy": "oneshot", | ||||
"type": "task-name", | "type": "index-origin-metadata", | ||||
"retries_left": 1, | "retries_left": 1, | ||||
}, | } | ||||
], | ] | ||||
) | |||||
@patch("swh.indexer.journal_client.MAX_ORIGINS_PER_TASK", 2) | @patch("swh.indexer.journal_client.MAX_ORIGINS_PER_TASK", 2) | ||||
def test_origin_visit_batches(): | def test_journal_client_origin_visit_batches(indexer_scheduler): | ||||
mock_scheduler = Mock() | |||||
messages = { | messages = { | ||||
"origin_visit_status": [ | "origin_visit_status": [ | ||||
{"status": "full", "origin": "file:///dev/zero",}, | {"status": "full", "origin": "file:///dev/zero",}, | ||||
{"status": "full", "origin": "file:///tmp/foobar",}, | {"status": "full", "origin": "file:///tmp/foobar",}, | ||||
{"status": "full", "origin": "file:///tmp/spamegg",}, | {"status": "full", "origin": "file:///tmp/spamegg",}, | ||||
] | ] | ||||
} | } | ||||
process_journal_objects( | process_journal_objects( | ||||
messages, scheduler=mock_scheduler, task_names={"origin_metadata": "task-name"}, | messages, | ||||
scheduler=indexer_scheduler, | |||||
task_names={"origin_metadata": "index-origin-metadata"}, | |||||
) | ) | ||||
assert mock_scheduler.create_tasks.called is True | actual_tasks = search_tasks(indexer_scheduler, task_type="index-origin-metadata") | ||||
call_args = mock_scheduler.create_tasks.call_args | assert actual_tasks == [ | ||||
(args, kwargs) = call_args | |||||
assert kwargs == {} | |||||
del args[0][0]["next_run"] | |||||
del args[0][1]["next_run"] | |||||
assert args == ( | |||||
[ | |||||
{ | { | ||||
"arguments": { | "arguments": { | ||||
"kwargs": {}, | "kwargs": {}, | ||||
"args": (["file:///dev/zero", "file:///tmp/foobar"],), | "args": [["file:///dev/zero", "file:///tmp/foobar"],], | ||||
}, | }, | ||||
"policy": "oneshot", | "policy": "oneshot", | ||||
"type": "task-name", | "type": "index-origin-metadata", | ||||
"retries_left": 1, | "retries_left": 1, | ||||
}, | }, | ||||
{ | { | ||||
"arguments": {"kwargs": {}, "args": (["file:///tmp/spamegg"],),}, | "arguments": {"kwargs": {}, "args": [["file:///tmp/spamegg"]],}, | ||||
"policy": "oneshot", | "policy": "oneshot", | ||||
"type": "task-name", | "type": "index-origin-metadata", | ||||
"retries_left": 1, | "retries_left": 1, | ||||
}, | }, | ||||
], | ] | ||||
) |
this function doesn't compare; it only excludes keys from its result