Changeset View
Standalone View
swh/scheduler/cli/task.py
# Copyright (C) 2016-2020 The Software Heritage developers | # Copyright (C) 2016-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import json | import json | ||||
import itertools | import itertools | ||||
import locale | import locale | ||||
import logging | import logging | ||||
import arrow | import arrow | ||||
import csv | import csv | ||||
import click | import click | ||||
from typing import Any, Dict | from typing import Any, Dict | ||||
from itertools import islice | |||||
from . import cli | from . import cli | ||||
locale.setlocale(locale.LC_ALL, "") | locale.setlocale(locale.LC_ALL, "") | ||||
ARROW_LOCALE = locale.getlocale(locale.LC_TIME)[0] | ARROW_LOCALE = locale.getlocale(locale.LC_TIME)[0] | ||||
▲ Show 20 Lines • Show All 254 Lines • ▼ Show 20 Lines | def schedule_task(ctx, type, options, policy, priority, next_run): | ||||
output = [ | output = [ | ||||
"Created %d tasks\n" % len(created), | "Created %d tasks\n" % len(created), | ||||
] | ] | ||||
for task in created: | for task in created: | ||||
output.append(pretty_print_task(task)) | output.append(pretty_print_task(task)) | ||||
click.echo("\n".join(output)) | click.echo("\n".join(output)) | ||||
@task.command("schedule_origins") | @task.command("schedule_origins") | ||||
@click.argument("type", nargs=1, required=True) | @click.argument("type", nargs=1, required=True) | ||||
vlorentz: why not use islice directly? | |||||
Done Inline Actionsyeah, i initially took it from the doc on itertools [1] and then thought to use it directly the generator... [1] https://docs.python.org/3/library/itertools.html#itertools-recipes ardumont: yeah, i initially took it from the doc on itertools [1] and then thought to use it directly the… | |||||
@click.argument("options", nargs=-1) | @click.argument("options", nargs=-1) | ||||
Not Done Inline Actions"Eventually" does not mean "éventuellement", but "finalement" ;) Use "optionally" instead vlorentz: "Eventually" does not mean "éventuellement", but "finalement" ;)
Use "optionally" instead | |||||
@click.option( | @click.option( | ||||
"--batch-size", | "--batch-size", | ||||
"-b", | "-b", | ||||
"origin_batch_size", | "origin_batch_size", | ||||
default=10, | default=10, | ||||
show_default=True, | show_default=True, | ||||
type=int, | type=int, | ||||
help="Number of origins per task", | help="Number of origins per task", | ||||
) | ) | ||||
@click.option( | @click.option( | ||||
"--min-id", | "--page-token", | ||||
default=0, | default=0, | ||||
show_default=True, | show_default=True, | ||||
type=int, | type=str, | ||||
help="Only schedule tasks for origins whose ID is greater", | help="Only schedule tasks for origins whose ID is greater", | ||||
) | ) | ||||
@click.option( | @click.option( | ||||
"--max-id", | "--limit", | ||||
default=None, | default=None, | ||||
type=int, | type=int, | ||||
help="Only schedule tasks for origins whose ID is lower", | help="Limit the tasks scheduling up to this number of tasks", | ||||
) | ) | ||||
@click.option("--storage-url", "-g", help="URL of the (graph) storage API") | @click.option("--storage-url", "-g", help="URL of the (graph) storage API") | ||||
@click.option( | @click.option( | ||||
"--dry-run/--no-dry-run", | "--dry-run/--no-dry-run", | ||||
is_flag=True, | is_flag=True, | ||||
default=False, | default=False, | ||||
help="List only what would be scheduled.", | help="List only what would be scheduled.", | ||||
) | ) | ||||
@click.pass_context | @click.pass_context | ||||
def schedule_origin_metadata_index( | def schedule_origin_metadata_index( | ||||
ctx, type, options, storage_url, origin_batch_size, min_id, max_id, dry_run | ctx, type, options, storage_url, origin_batch_size, page_token, limit, dry_run | ||||
): | ): | ||||
"""Schedules tasks for origins that are already known. | """Schedules tasks for origins that are already known. | ||||
The first argument is the name of the task type, further ones are | The first argument is the name of the task type, further ones are | ||||
keyword argument(s) of the task in the form key=value, where value is | keyword argument(s) of the task in the form key=value, where value is | ||||
in YAML format. | in YAML format. | ||||
Usage sample: | Usage sample: | ||||
Show All 9 Lines | ): | ||||
storage = get_storage("remote", url=storage_url) | storage = get_storage("remote", url=storage_url) | ||||
if dry_run: | if dry_run: | ||||
scheduler = None | scheduler = None | ||||
(args, kw) = parse_options(options) | (args, kw) = parse_options(options) | ||||
if args: | if args: | ||||
raise click.ClickException("Only keywords arguments are allowed.") | raise click.ClickException("Only keywords arguments are allowed.") | ||||
origins = iter_origins(storage, origin_from=min_id, origin_to=max_id) | origins = iter_origins(storage, page_token=page_token) | ||||
origin_urls = (origin.url for origin in origins) | if limit: | ||||
origins = islice(origins, limit) | |||||
origin_urls = (origin.url for origin in origins) | |||||
Not Done Inline ActionsI don't understand the issue. Just change the arguments to take the page_token as argument instead of min_id vlorentz: I don't understand the issue. Just change the arguments to take the `page_token` as argument… | |||||
Done Inline Actionswell for one, iter_origins does not return any page_token after that... ardumont: well for one, `iter_origins` does not return any page_token after that... | |||||
Not Done Inline ActionsHmmm... We could write a variant of iter_origins that yields tokens in addition to the origins. vlorentz: Hmmm...
We could write a variant of `iter_origins` that yields tokens in addition to the… | |||||
Done Inline Actionsor drop iter_origins and use directly storage.origin_list which does just that...? ardumont: or drop iter_origins and use directly storage.origin_list which does just that...? | |||||
Not Done Inline Actionsyeah, indeed vlorentz: yeah, indeed | |||||
Done Inline Actionsthat'd simplify even further D3681 either by:
ardumont: that'd simplify even further D3681 either by:
- dropping `iter_origins` altogether… | |||||
schedule_origin_batches(scheduler, type, origin_urls, origin_batch_size, kw) | schedule_origin_batches(scheduler, type, origin_urls, origin_batch_size, kw) | ||||
@task.command("list-pending") | @task.command("list-pending") | ||||
@click.argument("task-types", required=True, nargs=-1) | @click.argument("task-types", required=True, nargs=-1) | ||||
@click.option( | @click.option( | ||||
Done Inline Actionswrong order of args btw vlorentz: wrong order of args btw | |||||
Done Inline Actionsbecause my brain wanted to use islice all along ;) ardumont: because my brain wanted to use islice all along ;) | |||||
"--limit", | "--limit", | ||||
"-l", | "-l", | ||||
required=False, | required=False, | ||||
type=click.INT, | type=click.INT, | ||||
help="The maximum number of tasks to fetch", | help="The maximum number of tasks to fetch", | ||||
) | ) | ||||
@click.option( | @click.option( | ||||
"--before", | "--before", | ||||
▲ Show 20 Lines • Show All 356 Lines • Show Last 20 Lines |
why not use islice directly?