Changeset View
Standalone View
swh/scheduler/cli/task.py
# Copyright (C) 2016-2020 The Software Heritage developers | # Copyright (C) 2016-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import json | import json | ||||
import itertools | import itertools | ||||
import locale | import locale | ||||
import logging | import logging | ||||
import arrow | import arrow | ||||
import csv | import csv | ||||
import click | import click | ||||
from typing import Any, Dict | from typing import Any, Dict, Iterable, Iterator | ||||
from itertools import islice | |||||
from . import cli | from . import cli | ||||
locale.setlocale(locale.LC_ALL, "") | locale.setlocale(locale.LC_ALL, "") | ||||
ARROW_LOCALE = locale.getlocale(locale.LC_TIME)[0] | ARROW_LOCALE = locale.getlocale(locale.LC_TIME)[0] | ||||
▲ Show 20 Lines • Show All 255 Lines • ▼ Show 20 Lines | output = [ | ||||
"Created %d tasks\n" % len(created), | "Created %d tasks\n" % len(created), | ||||
] | ] | ||||
for task in created: | for task in created: | ||||
output.append(pretty_print_task(task)) | output.append(pretty_print_task(task)) | ||||
click.echo("\n".join(output)) | click.echo("\n".join(output)) | ||||
def take(n: int, iterable: Iterable[Any]) -> Iterator[Any]: | |||||
"Return first n items of the iterable as a list" | |||||
yield from islice(iterable, n) | |||||
vlorentz: why not use islice directly? | |||||
ardumontAuthorUnsubmitted Done Inline Actionsyeah, i initially took it from the doc on itertools [1] and then thought to use it directly the generator... [1] https://docs.python.org/3/library/itertools.html#itertools-recipes ardumont: yeah, i initially took it from the doc on itertools [1] and then thought to use it directly the… | |||||
Not Done Inline Actions"Eventually" does not mean "éventuellement", but "finalement" ;) Use "optionally" instead vlorentz: "Eventually" does not mean "éventuellement", but "finalement" ;)
Use "optionally" instead | |||||
@task.command("schedule_origins") | @task.command("schedule_origins") | ||||
@click.argument("type", nargs=1, required=True) | @click.argument("type", nargs=1, required=True) | ||||
@click.argument("options", nargs=-1) | @click.argument("options", nargs=-1) | ||||
@click.option( | @click.option( | ||||
"--batch-size", | "--batch-size", | ||||
"-b", | "-b", | ||||
"origin_batch_size", | "origin_batch_size", | ||||
default=10, | default=10, | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | ): | ||||
storage = get_storage("remote", url=storage_url) | storage = get_storage("remote", url=storage_url) | ||||
if dry_run: | if dry_run: | ||||
scheduler = None | scheduler = None | ||||
(args, kw) = parse_options(options) | (args, kw) = parse_options(options) | ||||
if args: | if args: | ||||
raise click.ClickException("Only keywords arguments are allowed.") | raise click.ClickException("Only keywords arguments are allowed.") | ||||
origins = iter_origins(storage, origin_from=min_id, origin_to=max_id) | if min_id is not None: | ||||
origin_urls = (origin.url for origin in origins) | page_token = str(min_id) # should not do this... don't know better | ||||
else: | |||||
page_token = None | |||||
vlorentzUnsubmitted Not Done Inline ActionsI don't understand the issue. Just change the arguments to take the page_token as argument instead of min_id vlorentz: I don't understand the issue. Just change the arguments to take the `page_token` as argument… | |||||
ardumontAuthorUnsubmitted Done Inline Actionswell for one, iter_origins does not return any page_token after that... ardumont: well for one, `iter_origins` does not return any page_token after that... | |||||
vlorentzUnsubmitted Not Done Inline ActionsHmmm... We could write a variant of iter_origins that yields tokens in addition to the origins. vlorentz: Hmmm...
We could write a variant of `iter_origins` that yields tokens in addition to the… | |||||
ardumontAuthorUnsubmitted Done Inline Actionsor drop iter_origins and use directly storage.origin_list which does just that...? ardumont: or drop iter_origins and use directly storage.origin_list which does just that...? | |||||
vlorentzUnsubmitted Not Done Inline Actionsyeah, indeed vlorentz: yeah, indeed | |||||
ardumontAuthorUnsubmitted Done Inline Actionsthat'd simplify even further D3681 either by:
ardumont: that'd simplify even further D3681 either by:
- dropping `iter_origins` altogether… | |||||
if max_id is not None and min_id is not None: | |||||
limit = max_id - min_id | |||||
else: | |||||
limit = None | |||||
origins = iter_origins(storage, page_token=page_token) | |||||
if limit: | |||||
origins = take(origins, limit) | |||||
vlorentzUnsubmitted Done Inline Actionswrong order of args btw vlorentz: wrong order of args btw | |||||
ardumontAuthorUnsubmitted Done Inline Actionsbecause my brain wanted to use islice all along ;) ardumont: because my brain wanted to use islice all along ;) | |||||
origin_urls = (origin.url for origin in origins) | |||||
schedule_origin_batches(scheduler, type, origin_urls, origin_batch_size, kw) | schedule_origin_batches(scheduler, type, origin_urls, origin_batch_size, kw) | ||||
@task.command("list-pending") | @task.command("list-pending") | ||||
@click.argument("task-types", required=True, nargs=-1) | @click.argument("task-types", required=True, nargs=-1) | ||||
@click.option( | @click.option( | ||||
"--limit", | "--limit", | ||||
"-l", | "-l", | ||||
▲ Show 20 Lines • Show All 362 Lines • Show Last 20 Lines |
why not use islice directly?