Changeset View
Changeset View
Standalone View
Standalone View
swh/scheduler/cli.py
# Copyright (C) 2016-2018 The Software Heritage developers | # Copyright (C) 2016-2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import arrow | import arrow | ||||
import click | import click | ||||
import csv | import csv | ||||
import itertools | import itertools | ||||
import json | import json | ||||
import locale | import locale | ||||
import logging | import logging | ||||
import time | import time | ||||
import datetime | import datetime | ||||
from swh.core import utils, config | from swh.core import utils, config | ||||
from swh.storage import get_storage | |||||
from swh.storage.algos.origin import iter_origins | |||||
from . import compute_nb_tasks_from | from . import compute_nb_tasks_from | ||||
from .backend_es import SWHElasticSearchClient | from .backend_es import SWHElasticSearchClient | ||||
from . import get_scheduler, DEFAULT_CONFIG | from . import get_scheduler, DEFAULT_CONFIG | ||||
from .cli_utils import parse_options | from .cli_utils import parse_options, schedule_origin_batches | ||||
locale.setlocale(locale.LC_ALL, '') | locale.setlocale(locale.LC_ALL, '') | ||||
ARROW_LOCALE = locale.getlocale(locale.LC_TIME)[0] | ARROW_LOCALE = locale.getlocale(locale.LC_TIME)[0] | ||||
class DateTimeType(click.ParamType): | class DateTimeType(click.ParamType): | ||||
name = 'time and date' | name = 'time and date' | ||||
▲ Show 20 Lines • Show All 254 Lines • ▼ Show 20 Lines | @click.option('--policy', '-p', default='recurring', | ||||
type=click.Choice(['recurring', 'oneshot'])) | type=click.Choice(['recurring', 'oneshot'])) | ||||
@click.option('--priority', '-P', default=None, | @click.option('--priority', '-P', default=None, | ||||
type=click.Choice(['low', 'normal', 'high'])) | type=click.Choice(['low', 'normal', 'high'])) | ||||
@click.option('--next-run', '-n', default=None) | @click.option('--next-run', '-n', default=None) | ||||
@click.pass_context | @click.pass_context | ||||
def schedule_task(ctx, type, options, policy, priority, next_run): | def schedule_task(ctx, type, options, policy, priority, next_run): | ||||
"""Schedule one task from arguments. | """Schedule one task from arguments. | ||||
The first argument is the name of the task type, further ones are | |||||
positional and keyword argument(s) of the task, in YAML format. | |||||
Keyword args are of the form key=value. | |||||
Usage sample: | Usage sample: | ||||
swh-scheduler --database 'service=swh-scheduler' \ | swh-scheduler --database 'service=swh-scheduler' \ | ||||
task add swh-lister-pypi | task add swh-lister-pypi | ||||
swh-scheduler --database 'service=swh-scheduler' \ | swh-scheduler --database 'service=swh-scheduler' \ | ||||
task add swh-lister-debian --policy=oneshot distribution=stretch | task add swh-lister-debian --policy=oneshot distribution=stretch | ||||
Show All 23 Lines | output = [ | ||||
'Created %d tasks\n' % len(created), | 'Created %d tasks\n' % len(created), | ||||
] | ] | ||||
for task in created: | for task in created: | ||||
output.append(pretty_print_task(task)) | output.append(pretty_print_task(task)) | ||||
click.echo('\n'.join(output)) | click.echo('\n'.join(output)) | ||||
@task.command('schedule_origins') | |||||
@click.argument('type', nargs=1, required=True) | |||||
@click.argument('options', nargs=-1) | |||||
@click.option('--batch-size', '-b', 'origin_batch_size', | |||||
default=10, show_default=True, type=int, | |||||
help="Number of origins per task") | |||||
@click.option('--min-id', | |||||
default=0, show_default=True, type=int, | |||||
help="Only schedule tasks for origins whose ID is greater") | |||||
@click.option('--max-id', | |||||
default=None, type=int, | |||||
help="Only schedule tasks for origins whose ID is lower") | |||||
@click.option('--storage-url', '-g', | |||||
help="URL of the (graph) storage API") | |||||
douardda: nitpicking: the --no-dry-run is useless, IMHO. But it's already like this in other subcommands… | |||||
@click.option('--dry-run/--no-dry-run', is_flag=True, | |||||
default=False, | |||||
help='List only what would be scheduled.') | |||||
@click.pass_context | |||||
def schedule_origin_metadata_index( | |||||
ctx, type, options, storage_url, origin_batch_size, | |||||
min_id, max_id, dry_run): | |||||
Done Inline ActionsPlease add a bit more doc in there. What value is expected for type? What are options arguments for? Maybe an example might be handy? douardda: Please add a bit more doc in there. What value is expected for `type`? What are `options`… | |||||
Not Done Inline Actionsknown? douardda: known? | |||||
"""Schedules tasks for origins that are already known. | |||||
The first argument is the name of the task type, further ones are | |||||
keyword argument(s) of the task in the form key=value, where value is | |||||
in YAML format. | |||||
Usage sample: | |||||
swh-scheduler --database 'service=swh-scheduler' \ | |||||
task schedule_origins indexer_origin_metadata | |||||
""" | |||||
scheduler = ctx.obj['scheduler'] | |||||
storage = get_storage('remote', {'url': storage_url}) | |||||
if dry_run: | |||||
scheduler = None | |||||
(args, kw) = parse_options(options) | |||||
if args: | |||||
raise click.ClickException('Only keywords arguments are allowed.') | |||||
origins = iter_origins(storage, origin_from=min_id, origin_to=max_id) | |||||
origin_ids = (origin['id'] for origin in origins) | |||||
schedule_origin_batches( | |||||
scheduler, type, origin_ids, origin_batch_size, kw) | |||||
@task.command('list-pending') | @task.command('list-pending') | ||||
@click.argument('task-types', required=True, nargs=-1) | @click.argument('task-types', required=True, nargs=-1) | ||||
@click.option('--limit', '-l', required=False, type=click.INT, | @click.option('--limit', '-l', required=False, type=click.INT, | ||||
help='The maximum number of tasks to fetch') | help='The maximum number of tasks to fetch') | ||||
@click.option('--before', '-b', required=False, type=DATETIME, | @click.option('--before', '-b', required=False, type=DATETIME, | ||||
help='List all jobs supposed to run before the given date') | help='List all jobs supposed to run before the given date') | ||||
@click.pass_context | @click.pass_context | ||||
def list_pending_tasks(ctx, task_types, limit, before): | def list_pending_tasks(ctx, task_types, limit, before): | ||||
▲ Show 20 Lines • Show All 416 Lines • Show Last 20 Lines |
nitpicking: the --no-dry-run is useless, IMHO. But it's already like this in other subcommands, so meh.