diff --git a/requirements-test.txt b/requirements-test.txt --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,6 +1,5 @@ pytest pytest-mock -celery >= 4.3 hypothesis >= 3.11.0 swh.lister swh.storage[testing] diff --git a/swh/scheduler/cli/add_forge_now.py b/swh/scheduler/cli/add_forge_now.py --- a/swh/scheduler/cli/add_forge_now.py +++ b/swh/scheduler/cli/add_forge_now.py @@ -19,12 +19,84 @@ @cli.group("add-forge-now") +@click.option( + "-e", + "--environment", + default="staging", + type=click.Choice(["production", "staging"]), + help="Determine the environment to use", +) @click.pass_context -def add_forge_now(ctx): +def add_forge_now(ctx, environment): """Manipulate listed origins.""" if not ctx.obj["scheduler"]: raise ValueError("Scheduler class (local/remote) must be instantiated") + ctx.obj["environment"] = environment + + +@add_forge_now.command("register-lister") +@click.argument("lister_name", nargs=1, required=True) +@click.argument("options", nargs=-1) +@click.pass_context +def register_lister_cli( + ctx, + lister_name, + options, +): + """Register the lister tasks in the scheduler. + + Depending on the environment, we'll want different policies: + - staging: only the "full" but limited listing (as "oneshot" task) of disabled origins + - production: both "full" and "incremental" (if that exists) listing (as "recurring" + task). The "full" will be triggered asap, the "incremental" will be triggered the + next day. + + """ + from .utils import parse_options, task_add + + scheduler = ctx.obj["scheduler"] + environment = ctx.obj["environment"] + + # Map the associated task types for the lister + task_type_names = { + lister_type: f"lister-{lister_name}-" for lister_type in ["full", "incremental"] + } + + task_types = {} + for type_name, task_type_name in task_type_names.items(): + task_type = scheduler.get_task_type(task_type_name) + if task_type: + task_types[type_name] = task_type + + if not task_types: + raise ValueError(f"Unknown lister type {lister_name}.") + + (args, kw) = parse_options(options) + + # Recurring policy on production + if environment == "production": + policy = "recurring" + else: # staging, "full" but limited listing as a oneshot + policy = "oneshot" + kw.update({"max_pages": 3, "max_origins_per_page": 10, "enable_origins": False}) + # We want a "full" listing in production if both incremental and full exists + if "full" in task_types: + task_types.pop("incremental", None) + + from swh.scheduler.utils import utcnow + + for type_name, task_type in task_types.items(): + next_run = utcnow() if type_name == "full" else utcnow() + "1 day" + task_add( + scheduler, + task_type_name=task_type["name"], + args=args, + kw=kw, + policy=policy, + next_run=next_run, + ) + @add_forge_now.command("schedule-first-visits") @click.option( @@ -35,14 +107,6 @@ type=str, multiple=True, ) -@click.option( - "--production/--staging", - "enabled", - is_flag=True, - default=True, - help="""Determine whether we want to scheduled enabled origins (on production) or - disabled ones (on staging).""", -) @click.option( "--lister-name", default=None, @@ -57,7 +121,6 @@ def schedule_first_visits_cli( ctx, visit_type_names: List[str], - enabled: bool, lister_name: Optional[str] = None, lister_instance_name: Optional[str] = None, ): @@ -68,6 +131,7 @@ from .utils import get_task_type, send_to_celery scheduler = ctx.obj["scheduler"] + environment = ctx.obj["environment"] visit_type_to_queue: Dict[str, str] = {} unknown_task_types = [] @@ -85,7 +149,7 @@ send_to_celery( scheduler, visit_type_to_queue=visit_type_to_queue, - enabled=enabled, + enabled=environment == "production", lister_name=lister_name, lister_instance_name=lister_instance_name, ) diff --git a/swh/scheduler/cli/origin.py b/swh/scheduler/cli/origin.py --- a/swh/scheduler/cli/origin.py +++ b/swh/scheduler/cli/origin.py @@ -122,7 +122,7 @@ """Send the next COUNT origin visits of the TYPE loader to the scheduler as one-shot tasks.""" from ..utils import utcnow - from .task import pretty_print_task + from .utils import pretty_print_task scheduler = ctx.obj["scheduler"] diff --git a/swh/scheduler/cli/task.py b/swh/scheduler/cli/task.py --- a/swh/scheduler/cli/task.py +++ b/swh/scheduler/cli/task.py @@ -27,128 +27,6 @@ DATETIME = click.DateTime() -def format_dict(d): - """Recursively format date objects in the dict passed as argument""" - import datetime - - ret = {} - for k, v in d.items(): - if isinstance(v, (datetime.date, datetime.datetime)): - v = v.isoformat() - elif isinstance(v, dict): - v = format_dict(v) - ret[k] = v - return ret - - -def pretty_print_list(list, indent=0): - """Pretty-print a list""" - return "".join("%s%r\n" % (" " * indent, item) for item in list) - - -def pretty_print_dict(dict, indent=0): - """Pretty-print a list""" - return "".join( - "%s%s: %r\n" % (" " * indent, click.style(key, bold=True), value) - for key, value in sorted(dict.items()) - ) - - -def pretty_print_run(run, indent=4): - fmt = ( - "{indent}{backend_id} [{status}]\n" - "{indent} scheduled: {scheduled} [{started}:{ended}]" - ) - return fmt.format(indent=" " * indent, **format_dict(run)) - - -def pretty_print_task(task, full=False): - """Pretty-print a task - - If 'full' is True, also print the status and priority fields. - - >>> import datetime - >>> task = { - ... 'id': 1234, - ... 'arguments': { - ... 'args': ['foo', 'bar', True], - ... 'kwargs': {'key': 'value', 'key2': 42}, - ... }, - ... 'current_interval': datetime.timedelta(hours=1), - ... 'next_run': datetime.datetime(2019, 2, 21, 13, 52, 35, 407818), - ... 'policy': 'oneshot', - ... 'priority': None, - ... 'status': 'next_run_not_scheduled', - ... 'type': 'test_task', - ... } - >>> print(click.unstyle(pretty_print_task(task))) - Task 1234 - Next run: ... (2019-02-21T13:52:35.407818) - Interval: 1:00:00 - Type: test_task - Policy: oneshot - Args: - 'foo' - 'bar' - True - Keyword args: - key: 'value' - key2: 42 - - >>> print(click.unstyle(pretty_print_task(task, full=True))) - Task 1234 - Next run: ... (2019-02-21T13:52:35.407818) - Interval: 1:00:00 - Type: test_task - Policy: oneshot - Status: next_run_not_scheduled - Priority:\x20 - Args: - 'foo' - 'bar' - True - Keyword args: - key: 'value' - key2: 42 - - """ - import humanize - - next_run = task["next_run"] - lines = [ - "%s %s\n" % (click.style("Task", bold=True), task["id"]), - click.style(" Next run: ", bold=True), - "%s (%s)" % (humanize.naturaldate(next_run), next_run.isoformat()), - "\n", - click.style(" Interval: ", bold=True), - str(task["current_interval"]), - "\n", - click.style(" Type: ", bold=True), - task["type"] or "", - "\n", - click.style(" Policy: ", bold=True), - task["policy"] or "", - "\n", - ] - if full: - lines += [ - click.style(" Status: ", bold=True), - task["status"] or "", - "\n", - click.style(" Priority: ", bold=True), - task["priority"] or "", - "\n", - ] - lines += [ - click.style(" Args:\n", bold=True), - pretty_print_list(task["arguments"]["args"], indent=4), - click.style(" Keyword args:\n", bold=True), - pretty_print_dict(task["arguments"]["kwargs"], indent=4), - ] - - return "".join(lines) - - @cli.group("task") @click.pass_context def task(ctx): @@ -202,6 +80,8 @@ from swh.scheduler.utils import utcnow + from .utils import pretty_print_task + tasks = [] now = utcnow() scheduler = ctx.obj["scheduler"] @@ -261,39 +141,26 @@ which is considered as the lowest priority level. """ - from swh.scheduler.utils import utcnow - - from .utils import parse_options + from .utils import parse_options, task_add scheduler = ctx.obj["scheduler"] if not scheduler: raise ValueError("Scheduler class (local/remote) must be instantiated") - if scheduler.get_task_type(task_type_name) is None: - raise ValueError(f"Unknown task type {task_type_name}.") - - now = utcnow() + task_type = scheduler.get_task_type(task_type_name) + if not task_type: + raise ValueError(f"Unknown task name {task_type_name}.") (args, kw) = parse_options(options) - task = { - "type": task_type_name, - "policy": policy, - "priority": priority, - "arguments": { - "args": args, - "kwargs": kw, - }, - "next_run": next_run or now, - } - created = scheduler.create_tasks([task]) - - output = [ - "Created %d tasks\n" % len(created), - ] - for task in created: - output.append(pretty_print_task(task)) - - click.echo("\n".join(output)) + task_add( + scheduler, + task_type_name=task_type_name, + policy=policy, + priority=priority, + next_run=next_run, + args=args, + kw=kw, + ) def iter_origins( # use string annotations to prevent some pkg loading @@ -412,6 +279,8 @@ You can override the number of tasks to fetch with the --limit flag. """ + from .utils import pretty_print_task + scheduler = ctx.obj["scheduler"] if not scheduler: raise ValueError("Scheduler class (local/remote) must be instantiated") @@ -511,6 +380,8 @@ """List tasks.""" from operator import itemgetter + from .utils import pretty_print_run, pretty_print_task + scheduler = ctx.obj["scheduler"] if not scheduler: raise ValueError("Scheduler class (local/remote) must be instantiated") diff --git a/swh/scheduler/cli/utils.py b/swh/scheduler/cli/utils.py --- a/swh/scheduler/cli/utils.py +++ b/swh/scheduler/cli/utils.py @@ -13,7 +13,7 @@ import click if TYPE_CHECKING: - from typing import Dict, Optional + from typing import Dict, List, Optional, Tuple from swh.scheduler.interface import SchedulerInterface @@ -73,7 +73,7 @@ raise click.ClickException("Invalid argument: {}".format(option)) -def parse_options(options): +def parse_options(options: List[str]) -> Tuple[List[str], Dict]: """Parses options from a CLI as YAML and turns it into Python args and kwargs. @@ -177,3 +177,156 @@ kwargs=task_dict["arguments"]["kwargs"], queue=queue_name, ) + + +def pretty_print_list(list, indent=0): + """Pretty-print a list""" + return "".join("%s%r\n" % (" " * indent, item) for item in list) + + +def pretty_print_dict(dict, indent=0): + """Pretty-print a list""" + return "".join( + "%s%s: %r\n" % (" " * indent, click.style(key, bold=True), value) + for key, value in sorted(dict.items()) + ) + + +def format_dict(d): + """Recursively format date objects in the dict passed as argument""" + import datetime + + ret = {} + for k, v in d.items(): + if isinstance(v, (datetime.date, datetime.datetime)): + v = v.isoformat() + elif isinstance(v, dict): + v = format_dict(v) + ret[k] = v + return ret + + +def pretty_print_run(run, indent=4): + fmt = ( + "{indent}{backend_id} [{status}]\n" + "{indent} scheduled: {scheduled} [{started}:{ended}]" + ) + return fmt.format(indent=" " * indent, **format_dict(run)) + + +def pretty_print_task(task, full=False): + """Pretty-print a task + + If 'full' is True, also print the status and priority fields. + + >>> import datetime + >>> task = { + ... 'id': 1234, + ... 'arguments': { + ... 'args': ['foo', 'bar', True], + ... 'kwargs': {'key': 'value', 'key2': 42}, + ... }, + ... 'current_interval': datetime.timedelta(hours=1), + ... 'next_run': datetime.datetime(2019, 2, 21, 13, 52, 35, 407818), + ... 'policy': 'oneshot', + ... 'priority': None, + ... 'status': 'next_run_not_scheduled', + ... 'type': 'test_task', + ... } + >>> print(click.unstyle(pretty_print_task(task))) + Task 1234 + Next run: ... (2019-02-21T13:52:35.407818) + Interval: 1:00:00 + Type: test_task + Policy: oneshot + Args: + 'foo' + 'bar' + True + Keyword args: + key: 'value' + key2: 42 + + >>> print(click.unstyle(pretty_print_task(task, full=True))) + Task 1234 + Next run: ... (2019-02-21T13:52:35.407818) + Interval: 1:00:00 + Type: test_task + Policy: oneshot + Status: next_run_not_scheduled + Priority:\x20 + Args: + 'foo' + 'bar' + True + Keyword args: + key: 'value' + key2: 42 + + """ + import humanize + + next_run = task["next_run"] + lines = [ + "%s %s\n" % (click.style("Task", bold=True), task["id"]), + click.style(" Next run: ", bold=True), + "%s (%s)" % (humanize.naturaldate(next_run), next_run.isoformat()), + "\n", + click.style(" Interval: ", bold=True), + str(task["current_interval"]), + "\n", + click.style(" Type: ", bold=True), + task["type"] or "", + "\n", + click.style(" Policy: ", bold=True), + task["policy"] or "", + "\n", + ] + if full: + lines += [ + click.style(" Status: ", bold=True), + task["status"] or "", + "\n", + click.style(" Priority: ", bold=True), + task["priority"] or "", + "\n", + ] + lines += [ + click.style(" Args:\n", bold=True), + pretty_print_list(task["arguments"]["args"], indent=4), + click.style(" Keyword args:\n", bold=True), + pretty_print_dict(task["arguments"]["kwargs"], indent=4), + ] + + return "".join(lines) + + +def task_add( + scheduler: SchedulerInterface, + task_type_name: str, + args: List[str], + kw: Dict, + policy: str, + priority: Optional[str] = None, + next_run: Optional[str] = None, +): + """Add a task task_type_name in the scheduler.""" + from swh.scheduler.utils import utcnow + + task = { + "type": task_type_name, + "policy": policy, + "priority": priority, + "arguments": { + "args": args, + "kwargs": kw, + }, + "next_run": next_run or utcnow(), + } + created = scheduler.create_tasks([task]) + + output = [f"Created {len(created)} tasks\n"] + for task in created: + output.append(pretty_print_task(task)) + + click.echo("\n".join(output)) diff --git a/swh/scheduler/tests/test_cli_add_forge_now.py b/swh/scheduler/tests/test_cli_add_forge_now.py --- a/swh/scheduler/tests/test_cli_add_forge_now.py +++ b/swh/scheduler/tests/test_cli_add_forge_now.py @@ -3,6 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import re from typing import Tuple import attr @@ -40,7 +41,7 @@ [ [], ["--lister-name", "github", "--lister-instance-name", "github"], - ["--staging"], + ["--environment=staging"], ], ) def test_schedule_first_visits_cli( @@ -56,7 +57,14 @@ visit_type = next(iter(listed_origins_by_type)) # enabled origins by default except when --staging flag is provided - enabled = "--staging" not in extra_cmd_args + enabled = "staging" not in extra_cmd_args + + # Environment command are for the main command, so massage a bit the flags + if "--environment" in extra_cmd_args: + prefix_cmd_args = extra_cmd_args + extra_cmd_args = [] + else: + prefix_cmd_args = [] for origins in listed_origins_by_type.values(): swh_scheduler.record_listed_origins( @@ -71,7 +79,11 @@ send_task = mocker.patch.object(swh_scheduler_celery_app, "send_task") send_task.return_value = None - cmd_args = ["schedule-first-visits", "--type-name", visit_type] + extra_cmd_args + cmd_args = ( + prefix_cmd_args + + ["schedule-first-visits", "--type-name", visit_type] + + extra_cmd_args + ) result = invoke(swh_scheduler, args=tuple(cmd_args)) assert result.exit_code == 0 @@ -86,3 +98,45 @@ } assert expected_tasks == scheduled_tasks + + +def test_schedule_register_lister(swh_scheduler): + result = invoke( + swh_scheduler, + [ + "register-lister", + "test", + "arg1", + "arg2", + "key=value", + ], + ) + expected = r""" +Created 1 tasks + +Task 1 + Next run: today \(.*\) + Interval: 1 day, 0:00:00 + Type: swh-test-ping + Policy: recurring + Args: + 'arg1' + 'arg2' + Keyword args: + key: 'value' + +""".lstrip() + assert result.exit_code == 0, result.output + assert re.fullmatch(expected, result.output, re.MULTILINE), result.output + + +def test_register_lister_unknown_task_type(swh_scheduler): + """When scheduling unknown task type, the cli should raise.""" + with pytest.raises(ValueError, match="Unknown"): + invoke( + swh_scheduler, + [ + "register-lister", + "unknown-lister-type-should-raise", + ], + )