diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py --- a/swh/scheduler/backend.py +++ b/swh/scheduler/backend.py @@ -342,6 +342,8 @@ visit_type: str, count: int, policy: str, + enabled: bool = True, + lister_uuid: Optional[str] = None, timestamp: Optional[datetime.datetime] = None, scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7), failed_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=14), @@ -363,7 +365,7 @@ common_table_expressions: List[Tuple[str, str]] = [] # "NOT enabled" = the lister said the origin no longer exists - where_clauses.append("enabled") + where_clauses.append("enabled" if enabled else "not enabled") # Only schedule visits of the given type where_clauses.append("visit_type = %s") @@ -466,6 +468,10 @@ else: table = "listed_origins" + if lister_uuid: + where_clauses.append("lister_id = %s") + query_args.append(lister_uuid) + # fmt: off common_table_expressions.insert(0, ("selected_origins", f""" SELECT diff --git a/swh/scheduler/cli/origin.py b/swh/scheduler/cli/origin.py --- a/swh/scheduler/cli/origin.py +++ b/swh/scheduler/cli/origin.py @@ -155,10 +155,30 @@ @click.option( "--tablesample", help="Table sampling percentage", type=float, ) +@click.option( + "--only-enabled/--only-disabled", + "enabled", + is_flag=True, + default=True, + help="""Determine whether we want to scheduled enabled or disabled origins. As default, we + want to reasonably deal with enabled origins. For some edge case though, we + might want the disabled ones.""", +) +@click.option( + "--lister-uuid", + default=None, + help="Limit origins to those listed from such lister", +) @click.argument("type", type=str) @click.pass_context def send_to_celery( - ctx, policy: str, queue: Optional[str], tablesample: Optional[float], type: str + ctx, + policy: str, + queue: Optional[str], + tablesample: Optional[float], + type: str, + enabled: bool, + lister_uuid: Optional[str] = None, ): """Send the next origin visits of the TYPE loader to celery, filling the queue.""" from kombu.utils.uuid import uuid @@ -176,7 +196,12 @@ click.echo(f"{num_tasks} slots available in celery queue") origins = scheduler.grab_next_visits( - type, num_tasks, policy=policy, tablesample=tablesample + type, + num_tasks, + policy=policy, + tablesample=tablesample, + enabled=enabled, + lister_uuid=lister_uuid, ) click.echo(f"{len(origins)} visits to send to celery") diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py --- a/swh/scheduler/interface.py +++ b/swh/scheduler/interface.py @@ -394,6 +394,8 @@ visit_type: str, count: int, policy: str, + enabled: bool = True, + lister_uuid: Optional[str] = None, timestamp: Optional[datetime.datetime] = None, scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7), failed_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=14), @@ -410,6 +412,10 @@ visit_type: type of visits to schedule count: number of visits to schedule policy: the scheduling policy used to select which visits to schedule + enabled: Determine whether we want to list enabled or disabled origins. As + default, we want reasonably enabled origins. For some edge case, we might + want the others. + lister_uuid: Determine the list of origins listed from the lister with uuid timestamp: the mocked timestamp at which we're recording that the visits are being scheduled (defaults to the current time) scheduled_cooldown: the minimal interval before which we can schedule @@ -420,6 +426,7 @@ not_found origin tablesample: the percentage of the table on which we run the query (None: no sampling) + """ ...