Changeset View
Changeset View
Standalone View
Standalone View
swh/scheduler/backend.py
Show First 20 Lines • Show All 336 Lines • ▼ Show 20 Lines | ) -> PaginatedListedOriginList: | ||||
return PaginatedListedOriginList(origins, page_token) | return PaginatedListedOriginList(origins, page_token) | ||||
@db_transaction() | @db_transaction() | ||||
def grab_next_visits( | def grab_next_visits( | ||||
self, | self, | ||||
visit_type: str, | visit_type: str, | ||||
count: int, | count: int, | ||||
policy: str, | policy: str, | ||||
enabled: bool = True, | |||||
lister_uuid: Optional[str] = None, | |||||
timestamp: Optional[datetime.datetime] = None, | timestamp: Optional[datetime.datetime] = None, | ||||
scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7), | scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7), | ||||
failed_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=14), | failed_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=14), | ||||
not_found_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=31), | not_found_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=31), | ||||
tablesample: Optional[float] = None, | tablesample: Optional[float] = None, | ||||
db=None, | db=None, | ||||
cur=None, | cur=None, | ||||
) -> List[ListedOrigin]: | ) -> List[ListedOrigin]: | ||||
if timestamp is None: | if timestamp is None: | ||||
timestamp = utcnow() | timestamp = utcnow() | ||||
origin_select_cols = ", ".join(ListedOrigin.select_columns()) | origin_select_cols = ", ".join(ListedOrigin.select_columns()) | ||||
query_args: List[Any] = [] | query_args: List[Any] = [] | ||||
where_clauses = [] | where_clauses = [] | ||||
# list of (name, query) handled as CTEs before the main query | # list of (name, query) handled as CTEs before the main query | ||||
common_table_expressions: List[Tuple[str, str]] = [] | common_table_expressions: List[Tuple[str, str]] = [] | ||||
# "NOT enabled" = the lister said the origin no longer exists | # "NOT enabled" = the lister said the origin no longer exists | ||||
where_clauses.append("enabled") | where_clauses.append("enabled" if enabled else "not enabled") | ||||
# Only schedule visits of the given type | # Only schedule visits of the given type | ||||
where_clauses.append("visit_type = %s") | where_clauses.append("visit_type = %s") | ||||
query_args.append(visit_type) | query_args.append(visit_type) | ||||
if scheduled_cooldown: | if scheduled_cooldown: | ||||
# Don't re-schedule visits if they're already scheduled but we haven't | # Don't re-schedule visits if they're already scheduled but we haven't | ||||
# recorded a result yet, unless they've been scheduled more than a week | # recorded a result yet, unless they've been scheduled more than a week | ||||
▲ Show 20 Lines • Show All 86 Lines • ▼ Show 20 Lines | ) -> List[ListedOrigin]: | ||||
raise UnknownPolicy(f"Unknown scheduling policy {policy}") | raise UnknownPolicy(f"Unknown scheduling policy {policy}") | ||||
if tablesample: | if tablesample: | ||||
table = "listed_origins tablesample SYSTEM (%s)" | table = "listed_origins tablesample SYSTEM (%s)" | ||||
query_args.insert(0, tablesample) | query_args.insert(0, tablesample) | ||||
else: | else: | ||||
table = "listed_origins" | table = "listed_origins" | ||||
if lister_uuid: | |||||
where_clauses.append("lister_id = %s") | |||||
query_args.append(lister_uuid) | |||||
# fmt: off | # fmt: off | ||||
common_table_expressions.insert(0, ("selected_origins", f""" | common_table_expressions.insert(0, ("selected_origins", f""" | ||||
SELECT | SELECT | ||||
{origin_select_cols}, next_visit_queue_position | {origin_select_cols}, next_visit_queue_position | ||||
FROM | FROM | ||||
{table} | {table} | ||||
LEFT JOIN | LEFT JOIN | ||||
origin_visit_stats USING (url, visit_type) | origin_visit_stats USING (url, visit_type) | ||||
▲ Show 20 Lines • Show All 623 Lines • Show Last 20 Lines |