diff --git a/swh/scheduler/cli/task.py b/swh/scheduler/cli/task.py --- a/swh/scheduler/cli/task.py +++ b/swh/scheduler/cli/task.py @@ -14,6 +14,8 @@ import click from typing import Any, Dict +from itertools import islice + from . import cli @@ -298,17 +300,17 @@ help="Number of origins per task", ) @click.option( - "--min-id", + "--page-token", default=0, show_default=True, - type=int, + type=str, help="Only schedule tasks for origins whose ID is greater", ) @click.option( - "--max-id", + "--limit", default=None, type=int, - help="Only schedule tasks for origins whose ID is lower", + help="Limit the tasks scheduling up to this number of tasks", ) @click.option("--storage-url", "-g", help="URL of the (graph) storage API") @click.option( @@ -319,7 +321,7 @@ ) @click.pass_context def schedule_origin_metadata_index( - ctx, type, options, storage_url, origin_batch_size, min_id, max_id, dry_run + ctx, type, options, storage_url, origin_batch_size, page_token, limit, dry_run ): """Schedules tasks for origins that are already known. @@ -345,9 +347,11 @@ if args: raise click.ClickException("Only keywords arguments are allowed.") - origins = iter_origins(storage, origin_from=min_id, origin_to=max_id) - origin_urls = (origin.url for origin in origins) + origins = iter_origins(storage, page_token=page_token) + if limit: + origins = islice(origins, limit) + origin_urls = (origin.url for origin in origins) schedule_origin_batches(scheduler, type, origin_urls, origin_batch_size, kw) diff --git a/swh/scheduler/tests/test_cli.py b/swh/scheduler/tests/test_cli.py --- a/swh/scheduler/tests/test_cli.py +++ b/swh/scheduler/tests/test_cli.py @@ -707,3 +707,66 @@ task["arguments"]["kwargs"] == {"key1": "value1", "key2": "value2"} for task in tasks ) + + +def test_task_schedule_origins_with_limit(swh_scheduler, storage): + """Tests support of extra keyword-arguments.""" + origins = _fill_storage_with_origins(storage, 50) + + limit = 20 + result = invoke( + swh_scheduler, + False, + [ + "task", + "schedule_origins", + "swh-test-ping", + "--batch-size", + "5", + "--limit", + limit, + ], + ) + + # Check the output + expected = r""" +Scheduled 4 tasks \(20 origins\). +Done. +""".lstrip() + assert result.exit_code == 0, result.output + assert re.fullmatch(expected, result.output, re.MULTILINE), repr(result.output) + + # Check tasks + tasks = swh_scheduler.search_tasks() + _assert_origin_tasks_contraints(tasks, 10, 5, origins[:limit]) + + +def test_task_schedule_origins_with_page_token(swh_scheduler, storage): + """Tests support of extra keyword-arguments.""" + origins = _fill_storage_with_origins(storage, 50) + + result = invoke( + swh_scheduler, + False, + [ + "task", + "schedule_origins", + "swh-test-ping", + "--batch-size", + "5", + "--page-token", + "11", + ], + ) + + # Check the output + expected = r""" +Scheduled 8 tasks \(40 origins\). +Done. +""".lstrip() + assert result.exit_code == 0, result.output + assert re.fullmatch(expected, result.output, re.MULTILINE), repr(result.output) + + # Check tasks + tasks = swh_scheduler.search_tasks() + _assert_origin_tasks_contraints(tasks, 10, 5, origins[10:])