Page MenuHomeSoftware Heritage

D4897.diff
No OneTemporary

D4897.diff

diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py
--- a/swh/scheduler/tests/test_scheduler.py
+++ b/swh/scheduler/tests/test_scheduler.py
@@ -755,50 +755,98 @@
assert ret.next_page_token is None
assert len(ret.origins) == len(listed_origins)
- @pytest.mark.parametrize("policy", ["oldest_scheduled_first"])
- def test_grab_next_visits(self, swh_scheduler, listed_origins_by_type, policy):
- NUM_RESULTS = 5
- # Strict inequality to check that grab_next_visits doesn't return more
- # results than requested
+ def _grab_next_visits_setup(self, swh_scheduler, listed_origins_by_type):
+ """Basic origins setup for scheduling policy tests"""
visit_type = next(iter(listed_origins_by_type))
- assert len(listed_origins_by_type[visit_type]) > NUM_RESULTS
+ origins = listed_origins_by_type[visit_type][:100]
+ assert len(origins) > 0
+
+ recorded_origins = swh_scheduler.record_listed_origins(origins)
- for origins in listed_origins_by_type.values():
- swh_scheduler.record_listed_origins(origins)
+ return visit_type, recorded_origins
+
+ def _check_grab_next_visit(self, swh_scheduler, visit_type, policy, expected):
+ """Calls grab_next_visits with the passed policy, and check that all
+ the origins returned are the expected ones (in the same order), and
+ that no extra origins are returned. Also checks the origin visits have
+ been marked as scheduled."""
+ assert len(expected) != 0
before = utcnow()
- ret = swh_scheduler.grab_next_visits(visit_type, NUM_RESULTS, policy=policy)
+ ret = swh_scheduler.grab_next_visits(
+ visit_type=visit_type,
+ # Request one more than expected to check that no extra origin is returned
+ count=len(expected) + 1,
+ policy=policy,
+ )
after = utcnow()
- assert len(ret) == NUM_RESULTS
- for origin in ret:
- pk = (origin.url, origin.visit_type)
- visit_stats = swh_scheduler.origin_visit_stats_get([pk])[0]
- assert visit_stats is not None
+ assert ret == expected
+ visit_stats_list = swh_scheduler.origin_visit_stats_get(
+ [(origin.url, origin.visit_type) for origin in expected]
+ )
+ assert len(visit_stats_list) == len(expected)
+ for visit_stats in visit_stats_list:
+ # Check that last_scheduled got updated
assert before <= visit_stats.last_scheduled <= after
- @pytest.mark.parametrize("policy", ["oldest_scheduled_first"])
- def test_grab_next_visits_underflow(
- self, swh_scheduler, listed_origins_by_type, policy
+ def test_grab_next_visits_oldest_scheduled_first(
+ self, swh_scheduler, listed_origins_by_type,
):
- NUM_RESULTS = 5
- visit_type = next(iter(listed_origins_by_type))
- assert len(listed_origins_by_type[visit_type]) > NUM_RESULTS
+ visit_type, origins = self._grab_next_visits_setup(
+ swh_scheduler, listed_origins_by_type
+ )
- swh_scheduler.record_listed_origins(
- listed_origins_by_type[visit_type][:NUM_RESULTS]
+ # Give all origins but one a last_scheduled date
+ base_date = datetime.datetime(2020, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc)
+ visit_stats = [
+ OriginVisitStats(
+ url=origin.url,
+ visit_type=origin.visit_type,
+ last_snapshot=None,
+ last_eventful=None,
+ last_uneventful=None,
+ last_failed=None,
+ last_notfound=None,
+ last_scheduled=base_date - datetime.timedelta(seconds=i),
+ )
+ for i, origin in enumerate(origins[1:])
+ ]
+ swh_scheduler.origin_visit_stats_upsert(visit_stats)
+
+ # We expect to retrieve the origin with a NULL last_scheduled
+ # as well as those with the oldest values (i.e. the last ones), in order.
+ expected = [origins[0]] + origins[1:][::-1]
+
+ self._check_grab_next_visit(
+ swh_scheduler,
+ visit_type=visit_type,
+ policy="oldest_scheduled_first",
+ expected=expected,
)
- ret = swh_scheduler.grab_next_visits(visit_type, NUM_RESULTS + 2, policy=policy)
+ def test_grab_next_visits_underflow(self, swh_scheduler, listed_origins_by_type):
+ """Check that grab_next_visits works when there not enough origins in
+ the database"""
+ visit_type = next(iter(listed_origins_by_type))
+
+ # Only add 5 origins to the database
+ origins = listed_origins_by_type[visit_type][:5]
+ assert origins
+
+ swh_scheduler.record_listed_origins(origins)
- assert len(ret) == NUM_RESULTS
+ ret = swh_scheduler.grab_next_visits(
+ visit_type, len(origins) + 2, policy="oldest_scheduled_first"
+ )
+
+ assert len(ret) == 5
def test_grab_next_visits_unknown_policy(self, swh_scheduler):
+ unknown_policy = "non_existing_policy"
NUM_RESULTS = 5
- with pytest.raises(UnknownPolicy, match="non_existing_policy"):
- swh_scheduler.grab_next_visits(
- "type", NUM_RESULTS, policy="non_existing_policy"
- )
+ with pytest.raises(UnknownPolicy, match=unknown_policy):
+ swh_scheduler.grab_next_visits("type", NUM_RESULTS, policy=unknown_policy)
def _create_task_types(self, scheduler):
for tt in TASK_TYPES.values():

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 30, 8:50 AM (22 h, 54 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221196

Event Timeline