diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py --- a/swh/scheduler/backend.py +++ b/swh/scheduler/backend.py @@ -333,6 +333,7 @@ timestamp: Optional[datetime.datetime] = None, scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7), failed_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=14), + notfound_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=31), db=None, cur=None, ) -> List[ListedOrigin]: @@ -379,6 +380,15 @@ query_args.append(timestamp) query_args.append(failed_cooldown) + if notfound_cooldown: + # Don't retry not found origins too often + where_clauses.append( + "origin_visit_stats.last_notfound is null " + "or origin_visit_stats.last_notfound < %s - %s" + ) + query_args.append(timestamp) + query_args.append(notfound_cooldown) + if policy == "oldest_scheduled_first": order_by = "origin_visit_stats.last_scheduled NULLS FIRST" elif policy == "never_visited_oldest_update_first": diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py --- a/swh/scheduler/interface.py +++ b/swh/scheduler/interface.py @@ -397,6 +397,7 @@ timestamp: Optional[datetime.datetime] = None, scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7), failed_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=14), + notfound_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=31), ) -> List[ListedOrigin]: """Get at most the `count` next origins that need to be visited with the `visit_type` loader according to the given scheduling `policy`. @@ -414,6 +415,8 @@ the same origin again failed_cooldown: the minimal interval before which we can reschedule a failed origin + notfound_cooldown: the minimal interval before which we can reschedule a + notfound origin """ ... diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py --- a/swh/scheduler/tests/test_scheduler.py +++ b/swh/scheduler/tests/test_scheduler.py @@ -870,7 +870,7 @@ expected=expected, ) - @pytest.mark.parametrize("which_cooldown", ("scheduled", "failed")) + @pytest.mark.parametrize("which_cooldown", ("scheduled", "failed", "notfound")) @pytest.mark.parametrize("cooldown", (7, 15)) def test_grab_next_visits_cooldowns( self, swh_scheduler, listed_origins_by_type, which_cooldown, cooldown, @@ -887,7 +887,7 @@ # Mark all the visits as `{which_cooldown}` (scheduled, failed or notfound) on # the `after` timestamp - ovs_args = {"last_failed": None, "last_scheduled": None} + ovs_args = {"last_failed": None, "last_notfound": None, "last_scheduled": None} ovs_args[f"last_{which_cooldown}"] = after visit_stats = [ @@ -897,7 +897,6 @@ last_snapshot=None, last_eventful=None, last_uneventful=None, - last_notfound=None, **ovs_args, ) for i, origin in enumerate(origins) @@ -908,6 +907,7 @@ cooldown_args = { "scheduled_cooldown": None, "failed_cooldown": None, + "notfound_cooldown": None, } cooldown_args[f"{which_cooldown}_cooldown"] = cooldown_td