diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py --- a/swh/scheduler/backend.py +++ b/swh/scheduler/backend.py @@ -318,6 +318,7 @@ timestamp: Optional[datetime.datetime] = None, scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7), failed_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=14), + notfound_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=31), db=None, cur=None, ) -> List[ListedOrigin]: @@ -364,6 +365,15 @@ query_args.append(timestamp) query_args.append(failed_cooldown) + if notfound_cooldown: + # Don't retry not found origins too often + where_clauses.append( + "origin_visit_stats.last_notfound is null " + "or origin_visit_stats.last_notfound < %s - %s" + ) + query_args.append(timestamp) + query_args.append(notfound_cooldown) + if policy == "oldest_scheduled_first": order_by = "origin_visit_stats.last_scheduled NULLS FIRST" elif policy == "never_visited_oldest_update_first": diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py --- a/swh/scheduler/interface.py +++ b/swh/scheduler/interface.py @@ -391,6 +391,7 @@ timestamp: Optional[datetime.datetime] = None, scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7), failed_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=14), + notfound_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=31), ) -> List[ListedOrigin]: """Get at most the `count` next origins that need to be visited with the `visit_type` loader according to the given scheduling `policy`. @@ -408,6 +409,8 @@ the same origin again failed_cooldown: the minimal interval before which we can reschedule a failed origin + notfound_cooldown: the minimal interval before which we can reschedule a + notfound origin """ ... diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py --- a/swh/scheduler/tests/test_scheduler.py +++ b/swh/scheduler/tests/test_scheduler.py @@ -861,7 +861,7 @@ expected=expected, ) - @pytest.mark.parametrize("which_cooldown", ("scheduled", "failed")) + @pytest.mark.parametrize("which_cooldown", ("scheduled", "failed", "notfound")) @pytest.mark.parametrize("cooldown", (7, 15)) def test_grab_next_visits_cooldowns( self, swh_scheduler, listed_origins_by_type, which_cooldown, cooldown, @@ -878,7 +878,7 @@ # Mark all the visits as `{which_cooldown}` (scheduled, failed or notfound) on # the `after` timestamp - ovs_args = {"last_failed": None, "last_scheduled": None} + ovs_args = {"last_failed": None, "last_notfound": None, "last_scheduled": None} ovs_args[f"last_{which_cooldown}"] = after visit_stats = [ @@ -888,7 +888,6 @@ last_snapshot=None, last_eventful=None, last_uneventful=None, - last_notfound=None, **ovs_args, ) for i, origin in enumerate(origins) @@ -899,6 +898,7 @@ cooldown_args = { "scheduled_cooldown": None, "failed_cooldown": None, + "notfound_cooldown": None, } cooldown_args[f"{which_cooldown}_cooldown"] = cooldown_td