Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9312385
D5901.id21179.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
8 KB
Subscribers
None
D5901.id21179.diff
View Options
diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py
--- a/swh/scheduler/backend.py
+++ b/swh/scheduler/backend.py
@@ -316,6 +316,7 @@
count: int,
policy: str,
timestamp: Optional[datetime.datetime] = None,
+ scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7),
db=None,
cur=None,
) -> List[ListedOrigin]:
@@ -335,21 +336,23 @@
where_clauses.append("visit_type = %s")
query_args.append(visit_type)
- # Don't re-schedule visits if they're already scheduled but we haven't
- # recorded a result yet, unless they've been scheduled more than a week
- # ago (it probably means we've lost them in flight somewhere).
- where_clauses.append(
- """origin_visit_stats.last_scheduled IS NULL
- OR origin_visit_stats.last_scheduled < GREATEST(
- %s - '7 day'::interval,
- origin_visit_stats.last_eventful,
- origin_visit_stats.last_uneventful,
- origin_visit_stats.last_failed,
- origin_visit_stats.last_notfound
+ if scheduled_cooldown:
+ # Don't re-schedule visits if they're already scheduled but we haven't
+ # recorded a result yet, unless they've been scheduled more than a week
+ # ago (it probably means we've lost them in flight somewhere).
+ where_clauses.append(
+ """origin_visit_stats.last_scheduled IS NULL
+ OR origin_visit_stats.last_scheduled < GREATEST(
+ %s - %s,
+ origin_visit_stats.last_eventful,
+ origin_visit_stats.last_uneventful,
+ origin_visit_stats.last_failed,
+ origin_visit_stats.last_notfound
+ )
+ """
)
- """
- )
- query_args.append(timestamp)
+ query_args.append(timestamp)
+ query_args.append(scheduled_cooldown)
if policy == "oldest_scheduled_first":
order_by = "origin_visit_stats.last_scheduled NULLS FIRST"
diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py
--- a/swh/scheduler/interface.py
+++ b/swh/scheduler/interface.py
@@ -389,6 +389,7 @@
count: int,
policy: str,
timestamp: Optional[datetime.datetime] = None,
+ scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7),
) -> List[ListedOrigin]:
"""Get at most the `count` next origins that need to be visited with
the `visit_type` loader according to the given scheduling `policy`.
@@ -402,6 +403,8 @@
policy: the scheduling policy used to select which visits to schedule
timestamp: the mocked timestamp at which we're recording that the visits are
being scheduled (defaults to the current time)
+ scheduled_cooldown: the minimal interval before which we can schedule
+ the same origin again
"""
...
diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py
--- a/swh/scheduler/tests/test_scheduler.py
+++ b/swh/scheduler/tests/test_scheduler.py
@@ -751,11 +751,20 @@
return visit_type, recorded_origins
- def _check_grab_next_visit(self, swh_scheduler, visit_type, policy, expected):
- """Calls grab_next_visits with the passed policy, and check that all
- the origins returned are the expected ones (in the same order), and
- that no extra origins are returned. Also checks the origin visits have
- been marked as scheduled, and are only re-scheduled a week later"""
+ def _check_grab_next_visit_basic(
+ self, swh_scheduler, visit_type, policy, expected, **kwargs
+ ):
+ """Calls grab_next_visits with the passed policy, and check that:
+
+ - all the origins returned are the expected ones (in the same order)
+ - no extra origins are returned
+ - the last_scheduled field has been set properly.
+
+ Pass the extra keyword arguments to the calls to grab_next_visits.
+
+ Returns a timestamp greater than all `last_scheduled` values for the grabbed
+ visits.
+ """
assert len(expected) != 0
before = utcnow()
@@ -764,6 +773,7 @@
# Request one more than expected to check that no extra origin is returned
count=len(expected) + 1,
policy=policy,
+ **kwargs,
)
after = utcnow()
@@ -778,11 +788,24 @@
# They should not be scheduled again
ret = swh_scheduler.grab_next_visits(
- visit_type=visit_type, count=len(expected) + 1, policy=policy,
+ visit_type=visit_type, count=len(expected) + 1, policy=policy, **kwargs
)
assert ret == [], "grab_next_visits returned already-scheduled origins"
- # But a week later, they should
+ return after
+
+ def _check_grab_next_visit(
+ self, swh_scheduler, visit_type, policy, expected, **kwargs
+ ):
+ """Run the same check as _check_grab_next_visit_basic, but also checks the
+ origin visits have been marked as scheduled, and are only re-scheduled a
+ week later
+ """
+
+ after = self._check_grab_next_visit_basic(
+ swh_scheduler, visit_type, policy, expected, **kwargs
+ )
+ # But a week, later, they should
ret = swh_scheduler.grab_next_visits(
visit_type=visit_type,
count=len(expected) + 1,
@@ -795,8 +818,8 @@
expected
), "grab_next_visits didn't reschedule visits after a week"
- def test_grab_next_visits_oldest_scheduled_first(
- self, swh_scheduler, listed_origins_by_type,
+ def _prepare_oldest_scheduled_first_origins(
+ self, swh_scheduler, listed_origins_by_type
):
visit_type, origins = self._grab_next_visits_setup(
swh_scheduler, listed_origins_by_type
@@ -823,6 +846,14 @@
# as well as those with the oldest values (i.e. the last ones), in order.
expected = [origins[0]] + origins[1:][::-1]
+ return visit_type, origins, expected
+
+ def test_grab_next_visits_oldest_scheduled_first(
+ self, swh_scheduler, listed_origins_by_type,
+ ):
+ visit_type, origins, expected = self._prepare_oldest_scheduled_first_origins(
+ swh_scheduler, listed_origins_by_type
+ )
self._check_grab_next_visit(
swh_scheduler,
visit_type=visit_type,
@@ -830,6 +861,44 @@
expected=expected,
)
+ @pytest.mark.parametrize("cooldown", (7, 15))
+ def test_grab_next_visits_oldest_scheduled_first_scheduled_cooldown(
+ self, swh_scheduler, listed_origins_by_type, cooldown
+ ):
+ visit_type, origins, expected = self._prepare_oldest_scheduled_first_origins(
+ swh_scheduler, listed_origins_by_type
+ )
+ after = self._check_grab_next_visit_basic(
+ swh_scheduler,
+ visit_type=visit_type,
+ policy="oldest_scheduled_first",
+ expected=expected,
+ )
+
+ cooldown_td = datetime.timedelta(days=cooldown)
+
+ ret = swh_scheduler.grab_next_visits(
+ visit_type=visit_type,
+ count=len(expected) + 1,
+ policy="oldest_scheduled_first",
+ timestamp=after + cooldown_td - datetime.timedelta(seconds=1),
+ scheduled_cooldown=cooldown_td,
+ )
+
+ assert ret == [], "Scheduled cooldown ignored"
+
+ ret = swh_scheduler.grab_next_visits(
+ visit_type=visit_type,
+ count=len(expected) + 1,
+ policy="oldest_scheduled_first",
+ timestamp=after + cooldown_td + datetime.timedelta(seconds=1),
+ scheduled_cooldown=cooldown_td,
+ )
+
+ assert sorted(ret) == sorted(
+ expected
+ ), "grab_next_visits didn't reschedule visits after the configured cooldown"
+
def test_grab_next_visits_never_visited_oldest_update_first(
self, swh_scheduler, listed_origins_by_type,
):
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Jul 2, 10:51 AM (1 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3234236
Attached To
D5901: Make the origin visit scheduling cooldown configurable
Event Timeline
Log In to Comment