Page MenuHomeSoftware Heritage

D5901.id21179.diff
No OneTemporary

D5901.id21179.diff

diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py
--- a/swh/scheduler/backend.py
+++ b/swh/scheduler/backend.py
@@ -316,6 +316,7 @@
count: int,
policy: str,
timestamp: Optional[datetime.datetime] = None,
+ scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7),
db=None,
cur=None,
) -> List[ListedOrigin]:
@@ -335,21 +336,23 @@
where_clauses.append("visit_type = %s")
query_args.append(visit_type)
- # Don't re-schedule visits if they're already scheduled but we haven't
- # recorded a result yet, unless they've been scheduled more than a week
- # ago (it probably means we've lost them in flight somewhere).
- where_clauses.append(
- """origin_visit_stats.last_scheduled IS NULL
- OR origin_visit_stats.last_scheduled < GREATEST(
- %s - '7 day'::interval,
- origin_visit_stats.last_eventful,
- origin_visit_stats.last_uneventful,
- origin_visit_stats.last_failed,
- origin_visit_stats.last_notfound
+ if scheduled_cooldown:
+ # Don't re-schedule visits if they're already scheduled but we haven't
+ # recorded a result yet, unless they've been scheduled more than a week
+ # ago (it probably means we've lost them in flight somewhere).
+ where_clauses.append(
+ """origin_visit_stats.last_scheduled IS NULL
+ OR origin_visit_stats.last_scheduled < GREATEST(
+ %s - %s,
+ origin_visit_stats.last_eventful,
+ origin_visit_stats.last_uneventful,
+ origin_visit_stats.last_failed,
+ origin_visit_stats.last_notfound
+ )
+ """
)
- """
- )
- query_args.append(timestamp)
+ query_args.append(timestamp)
+ query_args.append(scheduled_cooldown)
if policy == "oldest_scheduled_first":
order_by = "origin_visit_stats.last_scheduled NULLS FIRST"
diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py
--- a/swh/scheduler/interface.py
+++ b/swh/scheduler/interface.py
@@ -389,6 +389,7 @@
count: int,
policy: str,
timestamp: Optional[datetime.datetime] = None,
+ scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7),
) -> List[ListedOrigin]:
"""Get at most the `count` next origins that need to be visited with
the `visit_type` loader according to the given scheduling `policy`.
@@ -402,6 +403,8 @@
policy: the scheduling policy used to select which visits to schedule
timestamp: the mocked timestamp at which we're recording that the visits are
being scheduled (defaults to the current time)
+ scheduled_cooldown: the minimal interval before which we can schedule
+ the same origin again
"""
...
diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py
--- a/swh/scheduler/tests/test_scheduler.py
+++ b/swh/scheduler/tests/test_scheduler.py
@@ -751,11 +751,20 @@
return visit_type, recorded_origins
- def _check_grab_next_visit(self, swh_scheduler, visit_type, policy, expected):
- """Calls grab_next_visits with the passed policy, and check that all
- the origins returned are the expected ones (in the same order), and
- that no extra origins are returned. Also checks the origin visits have
- been marked as scheduled, and are only re-scheduled a week later"""
+ def _check_grab_next_visit_basic(
+ self, swh_scheduler, visit_type, policy, expected, **kwargs
+ ):
+ """Calls grab_next_visits with the passed policy, and check that:
+
+ - all the origins returned are the expected ones (in the same order)
+ - no extra origins are returned
+ - the last_scheduled field has been set properly.
+
+ Pass the extra keyword arguments to the calls to grab_next_visits.
+
+ Returns a timestamp greater than all `last_scheduled` values for the grabbed
+ visits.
+ """
assert len(expected) != 0
before = utcnow()
@@ -764,6 +773,7 @@
# Request one more than expected to check that no extra origin is returned
count=len(expected) + 1,
policy=policy,
+ **kwargs,
)
after = utcnow()
@@ -778,11 +788,24 @@
# They should not be scheduled again
ret = swh_scheduler.grab_next_visits(
- visit_type=visit_type, count=len(expected) + 1, policy=policy,
+ visit_type=visit_type, count=len(expected) + 1, policy=policy, **kwargs
)
assert ret == [], "grab_next_visits returned already-scheduled origins"
- # But a week later, they should
+ return after
+
+ def _check_grab_next_visit(
+ self, swh_scheduler, visit_type, policy, expected, **kwargs
+ ):
+ """Run the same check as _check_grab_next_visit_basic, but also checks the
+ origin visits have been marked as scheduled, and are only re-scheduled a
+ week later
+ """
+
+ after = self._check_grab_next_visit_basic(
+ swh_scheduler, visit_type, policy, expected, **kwargs
+ )
+ # But a week, later, they should
ret = swh_scheduler.grab_next_visits(
visit_type=visit_type,
count=len(expected) + 1,
@@ -795,8 +818,8 @@
expected
), "grab_next_visits didn't reschedule visits after a week"
- def test_grab_next_visits_oldest_scheduled_first(
- self, swh_scheduler, listed_origins_by_type,
+ def _prepare_oldest_scheduled_first_origins(
+ self, swh_scheduler, listed_origins_by_type
):
visit_type, origins = self._grab_next_visits_setup(
swh_scheduler, listed_origins_by_type
@@ -823,6 +846,14 @@
# as well as those with the oldest values (i.e. the last ones), in order.
expected = [origins[0]] + origins[1:][::-1]
+ return visit_type, origins, expected
+
+ def test_grab_next_visits_oldest_scheduled_first(
+ self, swh_scheduler, listed_origins_by_type,
+ ):
+ visit_type, origins, expected = self._prepare_oldest_scheduled_first_origins(
+ swh_scheduler, listed_origins_by_type
+ )
self._check_grab_next_visit(
swh_scheduler,
visit_type=visit_type,
@@ -830,6 +861,44 @@
expected=expected,
)
+ @pytest.mark.parametrize("cooldown", (7, 15))
+ def test_grab_next_visits_oldest_scheduled_first_scheduled_cooldown(
+ self, swh_scheduler, listed_origins_by_type, cooldown
+ ):
+ visit_type, origins, expected = self._prepare_oldest_scheduled_first_origins(
+ swh_scheduler, listed_origins_by_type
+ )
+ after = self._check_grab_next_visit_basic(
+ swh_scheduler,
+ visit_type=visit_type,
+ policy="oldest_scheduled_first",
+ expected=expected,
+ )
+
+ cooldown_td = datetime.timedelta(days=cooldown)
+
+ ret = swh_scheduler.grab_next_visits(
+ visit_type=visit_type,
+ count=len(expected) + 1,
+ policy="oldest_scheduled_first",
+ timestamp=after + cooldown_td - datetime.timedelta(seconds=1),
+ scheduled_cooldown=cooldown_td,
+ )
+
+ assert ret == [], "Scheduled cooldown ignored"
+
+ ret = swh_scheduler.grab_next_visits(
+ visit_type=visit_type,
+ count=len(expected) + 1,
+ policy="oldest_scheduled_first",
+ timestamp=after + cooldown_td + datetime.timedelta(seconds=1),
+ scheduled_cooldown=cooldown_td,
+ )
+
+ assert sorted(ret) == sorted(
+ expected
+ ), "grab_next_visits didn't reschedule visits after the configured cooldown"
+
def test_grab_next_visits_never_visited_oldest_update_first(
self, swh_scheduler, listed_origins_by_type,
):

File Metadata

Mime Type
text/plain
Expires
Wed, Jul 2, 10:51 AM (1 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3234236

Event Timeline