Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066553
D4912.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
2 KB
Subscribers
None
D4912.id.diff
View Options
diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py
--- a/swh/scheduler/backend.py
+++ b/swh/scheduler/backend.py
@@ -348,8 +348,21 @@
where_clauses.append("visit_type = %s")
query_args.append(visit_type)
- # TODO: filter on last_scheduled "too recent" to avoid always
- # re-scheduling the same tasks.
+ # Don't re-schedule visits if they're already scheduled but we haven't
+ # recorded a result yet, unless they've been scheduled more than a week
+ # ago (it probably means we've lost them in flight somewhere).
+ where_clauses.append(
+ """origin_visit_stats.last_scheduled IS NULL
+ OR origin_visit_stats.last_scheduled < GREATEST(
+ %s - '7 day'::interval,
+ origin_visit_stats.last_eventful,
+ origin_visit_stats.last_uneventful,
+ origin_visit_stats.last_failed,
+ origin_visit_stats.last_notfound
+ )
+ """
+ )
+ query_args.append(timestamp)
if policy == "oldest_scheduled_first":
order_by = "origin_visit_stats.last_scheduled NULLS FIRST"
diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py
--- a/swh/scheduler/tests/test_scheduler.py
+++ b/swh/scheduler/tests/test_scheduler.py
@@ -764,7 +764,7 @@
"""Calls grab_next_visits with the passed policy, and check that all
the origins returned are the expected ones (in the same order), and
that no extra origins are returned. Also checks the origin visits have
- been marked as scheduled."""
+ been marked as scheduled, and are only re-scheduled a week later"""
assert len(expected) != 0
before = utcnow()
@@ -785,6 +785,25 @@
# Check that last_scheduled got updated
assert before <= visit_stats.last_scheduled <= after
+ # They should not be scheduled again
+ ret = swh_scheduler.grab_next_visits(
+ visit_type=visit_type, count=len(expected) + 1, policy=policy,
+ )
+ assert ret == [], "grab_next_visits returned already-scheduled origins"
+
+ # But a week later, they should
+ ret = swh_scheduler.grab_next_visits(
+ visit_type=visit_type,
+ count=len(expected) + 1,
+ policy=policy,
+ timestamp=after + datetime.timedelta(days=7),
+ )
+ # We need to sort them because their 'last_scheduled' field is updated to
+ # exactly the same value, so the order is not deterministic
+ assert sorted(ret) == sorted(
+ expected
+ ), "grab_next_visits didn't reschedule visits after a week"
+
def test_grab_next_visits_oldest_scheduled_first(
self, swh_scheduler, listed_origins_by_type,
):
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 5 2024, 3:02 PM (12 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225564
Attached To
D4912: grab_next_visits: don't re-schedule visits too fast
Event Timeline
Log In to Comment