Page MenuHomeSoftware Heritage

D8770.id31630.diff
No OneTemporary

D8770.id31630.diff

diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py
--- a/swh/scheduler/backend.py
+++ b/swh/scheduler/backend.py
@@ -377,6 +377,7 @@
enabled: bool = True,
lister_uuid: Optional[str] = None,
timestamp: Optional[datetime.datetime] = None,
+ absolute_cooldown: Optional[datetime.timedelta] = datetime.timedelta(hours=12),
scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7),
failed_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=14),
not_found_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=31),
@@ -403,6 +404,15 @@
where_clauses.append("visit_type = %s")
query_args.append(visit_type)
+ if absolute_cooldown:
+ # Don't schedule visits if they've been scheduled since the absolute cooldown
+ where_clauses.append(
+ """origin_visit_stats.last_scheduled IS NULL
+ OR origin_visit_stats.last_scheduled < %s
+ """
+ )
+ query_args.append(timestamp - absolute_cooldown)
+
if scheduled_cooldown:
# Don't re-schedule visits if they're already scheduled but we haven't
# recorded a result yet, unless they've been scheduled more than a week
diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py
--- a/swh/scheduler/interface.py
+++ b/swh/scheduler/interface.py
@@ -414,6 +414,7 @@
enabled: bool = True,
lister_uuid: Optional[str] = None,
timestamp: Optional[datetime.datetime] = None,
+ absolute_cooldown: Optional[datetime.timedelta] = datetime.timedelta(hours=12),
scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7),
failed_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=14),
not_found_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=31),
@@ -435,8 +436,9 @@
lister_uuid: Determine the list of origins listed from the lister with uuid
timestamp: the mocked timestamp at which we're recording that the visits are
being scheduled (defaults to the current time)
+ absolute_cooldown: the minimal interval between two visits of the same origin
scheduled_cooldown: the minimal interval before which we can schedule
- the same origin again
+ the same origin again if it's not been visited
failed_cooldown: the minimal interval before which we can reschedule a
failed origin
not_found_cooldown: the minimal interval before which we can reschedule a
diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py
--- a/swh/scheduler/tests/test_scheduler.py
+++ b/swh/scheduler/tests/test_scheduler.py
@@ -947,7 +947,9 @@
expected=expected,
)
- @pytest.mark.parametrize("which_cooldown", ("scheduled", "failed", "not_found"))
+ @pytest.mark.parametrize(
+ "which_cooldown", ("scheduled", "failed", "not_found", "absolute")
+ )
@pytest.mark.parametrize("cooldown", (7, 15))
def test_grab_next_visits_cooldowns(
self,
@@ -966,14 +968,22 @@
expected=expected,
)
- # Mark all the visits as scheduled, failed or notfound on the `after` timestamp
+ # Mark all the visits as scheduled, failed or not_found on the `after` timestamp.
+ # If we're testing the `absolute_cooldown`, mark the visit as successful.
ovs_args = {
"last_visit": None,
"last_visit_status": None,
"last_scheduled": None,
+ "last_successful": None,
+ "last_snapshot": None,
}
if which_cooldown == "scheduled":
ovs_args["last_scheduled"] = after
+ elif which_cooldown == "absolute":
+ ovs_args["last_visit"] = after
+ ovs_args["last_successful"] = after
+ ovs_args["last_visit_status"] = LastVisitStatus.successful
+ ovs_args["last_snapshot"] = b"\x00" * 20
else:
ovs_args["last_visit"] = after
ovs_args["last_visit_status"] = LastVisitStatus(which_cooldown)
@@ -982,8 +992,6 @@
OriginVisitStats(
url=origin.url,
visit_type=origin.visit_type,
- last_snapshot=None,
- last_successful=None,
**ovs_args,
)
for i, origin in enumerate(origins)
@@ -995,6 +1003,7 @@
"scheduled_cooldown": None,
"failed_cooldown": None,
"not_found_cooldown": None,
+ "absolute_cooldown": None,
}
cooldown_args[f"{which_cooldown}_cooldown"] = cooldown_td

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 19, 9:35 AM (15 h, 16 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226267

Event Timeline