diff --git a/swh/scheduler/cli/task.py b/swh/scheduler/cli/task.py --- a/swh/scheduler/cli/task.py +++ b/swh/scheduler/cli/task.py @@ -13,7 +13,9 @@ import csv import click -from typing import Any, Dict +from typing import Any, Dict, Iterable, Iterator +from itertools import islice + from . import cli @@ -285,6 +287,11 @@ click.echo("\n".join(output)) +def take(n: int, iterable: Iterable[Any]) -> Iterator[Any]: + "Return first n items of the iterable as a list" + yield from islice(iterable, n) + + @task.command("schedule_origins") @click.argument("type", nargs=1, required=True) @click.argument("options", nargs=-1) @@ -345,9 +352,21 @@ if args: raise click.ClickException("Only keywords arguments are allowed.") - origins = iter_origins(storage, origin_from=min_id, origin_to=max_id) - origin_urls = (origin.url for origin in origins) + if min_id is not None: + page_token = str(min_id) # should not do this... don't know better + else: + page_token = None + if max_id is not None and min_id is not None: + limit = max_id - min_id + else: + limit = None + + origins = iter_origins(storage, page_token=page_token) + if limit: + origins = take(origins, limit) + + origin_urls = (origin.url for origin in origins) schedule_origin_batches(scheduler, type, origin_urls, origin_batch_size, kw)