diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py --- a/swh/scheduler/backend.py +++ b/swh/scheduler/backend.py @@ -316,22 +316,10 @@ count: int, policy: str, timestamp: Optional[datetime.datetime] = None, + excluded_listers: List[str] = [], db=None, cur=None, ) -> List[ListedOrigin]: - """Get at most the `count` next origins that need to be visited with - the `visit_type` loader according to the given scheduling `policy`. - - This will mark the origins as scheduled in the origin_visit_stats - table, to avoid scheduling multiple visits to the same origin. - - Arguments: - visit_type: type of visits to schedule - count: number of visits to schedule - policy: the scheduling policy used to select which visits to schedule - timestamp: the mocked timestamp at which we're recording that the visits are - being scheduled (defaults to the current time) - """ if timestamp is None: timestamp = utcnow() @@ -339,6 +327,7 @@ query_args: List[Any] = [] + join_clauses = [] where_clauses = [] # "NOT enabled" = the lister said the origin no longer exists @@ -403,6 +392,16 @@ else: raise UnknownPolicy(f"Unknown scheduling policy {policy}") + if excluded_listers: + join_clauses.append("INNER JOIN listers l on listed_origins.lister_id=l.id") + placeholders = ["%s"] * len(excluded_listers) + where_clauses.append( + f"""\ + l.name not in ({', '.join(placeholders)}) + """ + ) + query_args.extend(excluded_listers) + select_query = f""" SELECT {origin_select_cols} @@ -410,6 +409,7 @@ listed_origins LEFT JOIN origin_visit_stats USING (url, visit_type) + {" ".join(join_clauses)} WHERE ({") AND (".join(where_clauses)}) ORDER BY diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py --- a/swh/scheduler/interface.py +++ b/swh/scheduler/interface.py @@ -389,6 +389,7 @@ count: int, policy: str, timestamp: Optional[datetime.datetime] = None, + excluded_listers: List[str] = [], ) -> List[ListedOrigin]: """Get at most the `count` next origins that need to be visited with the `visit_type` loader according to the given scheduling `policy`. @@ -402,6 +403,8 @@ policy: the scheduling policy used to select which visits to schedule timestamp: the mocked timestamp at which we're recording that the visits are being scheduled (defaults to the current time) + excluded_listers: List of lister names to exclude from the result listing of + visits """ ...