Page MenuHomeSoftware Heritage

D8940.id32226.diff
No OneTemporary

D8940.id32226.diff

diff --git a/swh/scheduler/cli/add_forge_now.py b/swh/scheduler/cli/add_forge_now.py
--- a/swh/scheduler/cli/add_forge_now.py
+++ b/swh/scheduler/cli/add_forge_now.py
@@ -19,50 +19,84 @@
@cli.group("add-forge-now")
+@click.option(
+ "-e", "--environment",
+ default="staging",
+ type=click.choice(["production", "staging"]),
+ help="Determine the environment to use",
+)
@click.pass_context
-def add_forge_now(ctx):
+def add_forge_now(ctx, environment):
"""Manipulate listed origins."""
if not ctx.obj["scheduler"]:
raise ValueError("Scheduler class (local/remote) must be instantiated")
+ ctx.obj["environment"] = "environment"
+
@add_forge_now.command("register-lister")
-@click.argument("task_type_name", nargs=1, required=True)
+@click.argument("lister_name", nargs=1, required=True)
@click.argument("options", nargs=-1)
-@click.option(
- "--production/--staging",
- "enabled",
- is_flag=True,
- default=True,
- help="""Determine whether we want to scheduled enabled origins (on production) or
- disabled ones (on staging).""",
-)
@click.pass_context
def register_lister_cli(
ctx,
- task_type_name,
+ lister_name,
options,
enabled,
):
+ """Register the lister tasks in the scheduler.
+
+ Depending on the environment, we'll want different policies:
+ - staging: only the "full" but limited listing (as "oneshot" task) of disabled origins
+ - production: both "full" and "incremental" (if that exists) listing (as "recurring"
+ task). The "full" will be triggered asap, the "incremental" will be triggered the
+ next day.
+
+ """
from .utils import parse_options, task_add
scheduler = ctx.obj["scheduler"]
+ environment = ctx.obj["environment"]
+
+ # Map the associated task types for the lister
+ task_type_names = {
+ lister_type: f"lister-{lister_name}-"
+ for lister_type in ['full', 'incremental']
+ }
+
+ task_types = {}
+ for type_name, task_type_name in task_type_names.items():
+ task_type = scheduler.get_task_type(task_type_name)
+ if task_type:
+ task_types[type_name] = task_type
+
+ if not task_types:
+ raise ValueError(f"Unknown lister type {lister_name}.")
(args, kw) = parse_options(options)
- if enabled: # production
+ # Recurring policy on production
+ if environment == "production":
policy = "recurring"
- else: # staging
+ else: # staging, "full" but limited listing as a oneshot
policy = "oneshot"
- kw.update({"max_pages": 1, "max_origins_per_page": 15, "enable_origins": False})
+ kw.update({"max_pages": 3, "max_origins_per_page": 10, "enable_origins": False})
+ # We want a "full" listing in production if both incremental and full exists
+ if "full" in task_types:
+ task_types.pop("incremental", None)
- task_add(
- scheduler,
- task_type_name=task_type_name,
- args=args,
- kw=kw,
- policy=policy,
- )
+ from swh.scheduler.utils import utcnow
+
+ for type_name, task_type in task_types.items():
+ next_run = utcnow() if type_name == "full" else utcnow() + '1 day'
+ task_add(
+ scheduler,
+ task_type_name=task_type["name"],
+ args=args,
+ kw=kw,
+ policy=policy,
+ next_run=next_run
+ )
@add_forge_now.command("schedule-first-visits")
@@ -74,14 +108,6 @@
type=str,
multiple=True,
)
-@click.option(
- "--production/--staging",
- "enabled",
- is_flag=True,
- default=True,
- help="""Determine whether we want to scheduled enabled origins (on production) or
- disabled ones (on staging).""",
-)
@click.option(
"--lister-name",
default=None,
@@ -96,7 +122,6 @@
def schedule_first_visits_cli(
ctx,
visit_type_names: List[str],
- enabled: bool,
lister_name: Optional[str] = None,
lister_instance_name: Optional[str] = None,
):
@@ -107,6 +132,7 @@
from .utils import get_task_type, send_to_celery
scheduler = ctx.obj["scheduler"]
+ environment = ctx.obj["environment"]
visit_type_to_queue: Dict[str, str] = {}
unknown_task_types = []
@@ -124,7 +150,7 @@
send_to_celery(
scheduler,
visit_type_to_queue=visit_type_to_queue,
- enabled=enabled,
+ enabled=environment=="production",
lister_name=lister_name,
lister_instance_name=lister_instance_name,
)
diff --git a/swh/scheduler/cli/task.py b/swh/scheduler/cli/task.py
--- a/swh/scheduler/cli/task.py
+++ b/swh/scheduler/cli/task.py
@@ -147,6 +147,10 @@
if not scheduler:
raise ValueError("Scheduler class (local/remote) must be instantiated")
+ task_type = scheduler.get_task_type(task_type_name)
+ if not task_type:
+ raise ValueError(f"Unknown task name {task_type_name}.")
+
(args, kw) = parse_options(options)
task_add(
scheduler,
diff --git a/swh/scheduler/cli/utils.py b/swh/scheduler/cli/utils.py
--- a/swh/scheduler/cli/utils.py
+++ b/swh/scheduler/cli/utils.py
@@ -310,11 +310,9 @@
priority: Optional[str] = None,
next_run: Optional[str] = None,
):
+ """Add a task task_type_name in the scheduler. """
from swh.scheduler.utils import utcnow
- if scheduler.get_task_type(task_type_name) is None:
- raise ValueError(f"Unknown task type {task_type_name}.")
-
task = {
"type": task_type_name,
"policy": policy,
diff --git a/swh/scheduler/tests/test_cli_add_forge_now.py b/swh/scheduler/tests/test_cli_add_forge_now.py
--- a/swh/scheduler/tests/test_cli_add_forge_now.py
+++ b/swh/scheduler/tests/test_cli_add_forge_now.py
@@ -41,7 +41,7 @@
[
[],
["--lister-name", "github", "--lister-instance-name", "github"],
- ["--staging"],
+ ["--environment=staging"],
],
)
def test_schedule_first_visits_cli(
@@ -57,7 +57,14 @@
visit_type = next(iter(listed_origins_by_type))
# enabled origins by default except when --staging flag is provided
- enabled = "--staging" not in extra_cmd_args
+ enabled = "staging" not in extra_cmd_args
+
+ # Environment command are for the main command, so massage a bit the flags
+ if "--environment" in extra_cmd_args:
+ prefix_cmd_args = extra_cmd_args
+ extra_cmd_args = []
+ else:
+ prefix_cmd_args = []
for origins in listed_origins_by_type.values():
swh_scheduler.record_listed_origins(
@@ -72,7 +79,7 @@
send_task = mocker.patch.object(swh_scheduler_celery_app, "send_task")
send_task.return_value = None
- cmd_args = ["schedule-first-visits", "--type-name", visit_type] + extra_cmd_args
+ cmd_args = prefix_cmd_args + ["schedule-first-visits", "--type-name", visit_type] + extra_cmd_args
result = invoke(swh_scheduler, args=tuple(cmd_args))
assert result.exit_code == 0

File Metadata

Mime Type
text/plain
Expires
Fri, Jun 20, 5:26 PM (2 w, 6 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3232401

Event Timeline