diff --git a/swh/scheduler/cli/add_forge_now.py b/swh/scheduler/cli/add_forge_now.py --- a/swh/scheduler/cli/add_forge_now.py +++ b/swh/scheduler/cli/add_forge_now.py @@ -19,50 +19,84 @@ @cli.group("add-forge-now") +@click.option( + "-e", "--environment", + default="staging", + type=click.choice(["production", "staging"]), + help="Determine the environment to use", +) @click.pass_context -def add_forge_now(ctx): +def add_forge_now(ctx, environment): """Manipulate listed origins.""" if not ctx.obj["scheduler"]: raise ValueError("Scheduler class (local/remote) must be instantiated") + ctx.obj["environment"] = "environment" + @add_forge_now.command("register-lister") -@click.argument("task_type_name", nargs=1, required=True) +@click.argument("lister_name", nargs=1, required=True) @click.argument("options", nargs=-1) -@click.option( - "--production/--staging", - "enabled", - is_flag=True, - default=True, - help="""Determine whether we want to scheduled enabled origins (on production) or - disabled ones (on staging).""", -) @click.pass_context def register_lister_cli( ctx, - task_type_name, + lister_name, options, enabled, ): + """Register the lister tasks in the scheduler. + + Depending on the environment, we'll want different policies: + - staging: only the "full" but limited listing (as "oneshot" task) of disabled origins + - production: both "full" and "incremental" (if that exists) listing (as "recurring" + task). The "full" will be triggered asap, the "incremental" will be triggered the + next day. + + """ from .utils import parse_options, task_add scheduler = ctx.obj["scheduler"] + environment = ctx.obj["environment"] + + # Map the associated task types for the lister + task_type_names = { + lister_type: f"lister-{lister_name}-" + for lister_type in ['full', 'incremental'] + } + + task_types = {} + for type_name, task_type_name in task_type_names.items(): + task_type = scheduler.get_task_type(task_type_name) + if task_type: + task_types[type_name] = task_type + + if not task_types: + raise ValueError(f"Unknown lister type {lister_name}.") (args, kw) = parse_options(options) - if enabled: # production + # Recurring policy on production + if environment == "production": policy = "recurring" - else: # staging + else: # staging, "full" but limited listing as a oneshot policy = "oneshot" - kw.update({"max_pages": 1, "max_origins_per_page": 15, "enable_origins": False}) + kw.update({"max_pages": 3, "max_origins_per_page": 10, "enable_origins": False}) + # We want a "full" listing in production if both incremental and full exists + if "full" in task_types: + task_types.pop("incremental", None) - task_add( - scheduler, - task_type_name=task_type_name, - args=args, - kw=kw, - policy=policy, - ) + from swh.scheduler.utils import utcnow + + for type_name, task_type in task_types.items(): + next_run = utcnow() if type_name == "full" else utcnow() + '1 day' + task_add( + scheduler, + task_type_name=task_type["name"], + args=args, + kw=kw, + policy=policy, + next_run=next_run + ) @add_forge_now.command("schedule-first-visits") @@ -74,14 +108,6 @@ type=str, multiple=True, ) -@click.option( - "--production/--staging", - "enabled", - is_flag=True, - default=True, - help="""Determine whether we want to scheduled enabled origins (on production) or - disabled ones (on staging).""", -) @click.option( "--lister-name", default=None, @@ -96,7 +122,6 @@ def schedule_first_visits_cli( ctx, visit_type_names: List[str], - enabled: bool, lister_name: Optional[str] = None, lister_instance_name: Optional[str] = None, ): @@ -107,6 +132,7 @@ from .utils import get_task_type, send_to_celery scheduler = ctx.obj["scheduler"] + environment = ctx.obj["environment"] visit_type_to_queue: Dict[str, str] = {} unknown_task_types = [] @@ -124,7 +150,7 @@ send_to_celery( scheduler, visit_type_to_queue=visit_type_to_queue, - enabled=enabled, + enabled=environment=="production", lister_name=lister_name, lister_instance_name=lister_instance_name, ) diff --git a/swh/scheduler/cli/task.py b/swh/scheduler/cli/task.py --- a/swh/scheduler/cli/task.py +++ b/swh/scheduler/cli/task.py @@ -147,6 +147,10 @@ if not scheduler: raise ValueError("Scheduler class (local/remote) must be instantiated") + task_type = scheduler.get_task_type(task_type_name) + if not task_type: + raise ValueError(f"Unknown task name {task_type_name}.") + (args, kw) = parse_options(options) task_add( scheduler, diff --git a/swh/scheduler/cli/utils.py b/swh/scheduler/cli/utils.py --- a/swh/scheduler/cli/utils.py +++ b/swh/scheduler/cli/utils.py @@ -310,11 +310,9 @@ priority: Optional[str] = None, next_run: Optional[str] = None, ): + """Add a task task_type_name in the scheduler. """ from swh.scheduler.utils import utcnow - if scheduler.get_task_type(task_type_name) is None: - raise ValueError(f"Unknown task type {task_type_name}.") - task = { "type": task_type_name, "policy": policy, diff --git a/swh/scheduler/tests/test_cli_add_forge_now.py b/swh/scheduler/tests/test_cli_add_forge_now.py --- a/swh/scheduler/tests/test_cli_add_forge_now.py +++ b/swh/scheduler/tests/test_cli_add_forge_now.py @@ -41,7 +41,7 @@ [ [], ["--lister-name", "github", "--lister-instance-name", "github"], - ["--staging"], + ["--environment=staging"], ], ) def test_schedule_first_visits_cli( @@ -57,7 +57,14 @@ visit_type = next(iter(listed_origins_by_type)) # enabled origins by default except when --staging flag is provided - enabled = "--staging" not in extra_cmd_args + enabled = "staging" not in extra_cmd_args + + # Environment command are for the main command, so massage a bit the flags + if "--environment" in extra_cmd_args: + prefix_cmd_args = extra_cmd_args + extra_cmd_args = [] + else: + prefix_cmd_args = [] for origins in listed_origins_by_type.values(): swh_scheduler.record_listed_origins( @@ -72,7 +79,7 @@ send_task = mocker.patch.object(swh_scheduler_celery_app, "send_task") send_task.return_value = None - cmd_args = ["schedule-first-visits", "--type-name", visit_type] + extra_cmd_args + cmd_args = prefix_cmd_args + ["schedule-first-visits", "--type-name", visit_type] + extra_cmd_args result = invoke(swh_scheduler, args=tuple(cmd_args)) assert result.exit_code == 0