diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py --- a/swh/scheduler/backend.py +++ b/swh/scheduler/backend.py @@ -377,6 +377,7 @@ enabled: bool = True, lister_uuid: Optional[str] = None, timestamp: Optional[datetime.datetime] = None, + absolute_cooldown: Optional[datetime.timedelta] = datetime.timedelta(hours=12), scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7), failed_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=14), not_found_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=31), @@ -403,6 +404,15 @@ where_clauses.append("visit_type = %s") query_args.append(visit_type) + if absolute_cooldown: + # Don't schedule visits if they've been scheduled since the absolute cooldown + where_clauses.append( + """origin_visit_stats.last_scheduled IS NULL + OR origin_visit_stats.last_scheduled < %s + """ + ) + query_args.append(timestamp - absolute_cooldown) + if scheduled_cooldown: # Don't re-schedule visits if they're already scheduled but we haven't # recorded a result yet, unless they've been scheduled more than a week diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py --- a/swh/scheduler/interface.py +++ b/swh/scheduler/interface.py @@ -414,6 +414,7 @@ enabled: bool = True, lister_uuid: Optional[str] = None, timestamp: Optional[datetime.datetime] = None, + absolute_cooldown: Optional[datetime.timedelta] = datetime.timedelta(hours=12), scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7), failed_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=14), not_found_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=31), @@ -435,8 +436,9 @@ lister_uuid: Determine the list of origins listed from the lister with uuid timestamp: the mocked timestamp at which we're recording that the visits are being scheduled (defaults to the current time) + absolute_cooldown: the minimal interval between two visits of the same origin scheduled_cooldown: the minimal interval before which we can schedule - the same origin again + the same origin again if it's not been visited failed_cooldown: the minimal interval before which we can reschedule a failed origin not_found_cooldown: the minimal interval before which we can reschedule a diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py --- a/swh/scheduler/tests/test_scheduler.py +++ b/swh/scheduler/tests/test_scheduler.py @@ -947,7 +947,9 @@ expected=expected, ) - @pytest.mark.parametrize("which_cooldown", ("scheduled", "failed", "not_found")) + @pytest.mark.parametrize( + "which_cooldown", ("scheduled", "failed", "not_found", "absolute") + ) @pytest.mark.parametrize("cooldown", (7, 15)) def test_grab_next_visits_cooldowns( self, @@ -966,14 +968,22 @@ expected=expected, ) - # Mark all the visits as scheduled, failed or notfound on the `after` timestamp + # Mark all the visits as scheduled, failed or not_found on the `after` timestamp. + # If we're testing the `absolute_cooldown`, mark the visit as successful. ovs_args = { "last_visit": None, "last_visit_status": None, "last_scheduled": None, + "last_successful": None, + "last_snapshot": None, } if which_cooldown == "scheduled": ovs_args["last_scheduled"] = after + elif which_cooldown == "absolute": + ovs_args["last_visit"] = after + ovs_args["last_successful"] = after + ovs_args["last_visit_status"] = LastVisitStatus.successful + ovs_args["last_snapshot"] = b"\x00" * 20 else: ovs_args["last_visit"] = after ovs_args["last_visit_status"] = LastVisitStatus(which_cooldown) @@ -982,8 +992,6 @@ OriginVisitStats( url=origin.url, visit_type=origin.visit_type, - last_snapshot=None, - last_successful=None, **ovs_args, ) for i, origin in enumerate(origins) @@ -995,6 +1003,7 @@ "scheduled_cooldown": None, "failed_cooldown": None, "not_found_cooldown": None, + "absolute_cooldown": None, } cooldown_args[f"{which_cooldown}_cooldown"] = cooldown_td