Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7163476
D4897.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D4897.diff
View Options
diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py
--- a/swh/scheduler/tests/test_scheduler.py
+++ b/swh/scheduler/tests/test_scheduler.py
@@ -755,50 +755,98 @@
assert ret.next_page_token is None
assert len(ret.origins) == len(listed_origins)
- @pytest.mark.parametrize("policy", ["oldest_scheduled_first"])
- def test_grab_next_visits(self, swh_scheduler, listed_origins_by_type, policy):
- NUM_RESULTS = 5
- # Strict inequality to check that grab_next_visits doesn't return more
- # results than requested
+ def _grab_next_visits_setup(self, swh_scheduler, listed_origins_by_type):
+ """Basic origins setup for scheduling policy tests"""
visit_type = next(iter(listed_origins_by_type))
- assert len(listed_origins_by_type[visit_type]) > NUM_RESULTS
+ origins = listed_origins_by_type[visit_type][:100]
+ assert len(origins) > 0
+
+ recorded_origins = swh_scheduler.record_listed_origins(origins)
- for origins in listed_origins_by_type.values():
- swh_scheduler.record_listed_origins(origins)
+ return visit_type, recorded_origins
+
+ def _check_grab_next_visit(self, swh_scheduler, visit_type, policy, expected):
+ """Calls grab_next_visits with the passed policy, and check that all
+ the origins returned are the expected ones (in the same order), and
+ that no extra origins are returned. Also checks the origin visits have
+ been marked as scheduled."""
+ assert len(expected) != 0
before = utcnow()
- ret = swh_scheduler.grab_next_visits(visit_type, NUM_RESULTS, policy=policy)
+ ret = swh_scheduler.grab_next_visits(
+ visit_type=visit_type,
+ # Request one more than expected to check that no extra origin is returned
+ count=len(expected) + 1,
+ policy=policy,
+ )
after = utcnow()
- assert len(ret) == NUM_RESULTS
- for origin in ret:
- pk = (origin.url, origin.visit_type)
- visit_stats = swh_scheduler.origin_visit_stats_get([pk])[0]
- assert visit_stats is not None
+ assert ret == expected
+ visit_stats_list = swh_scheduler.origin_visit_stats_get(
+ [(origin.url, origin.visit_type) for origin in expected]
+ )
+ assert len(visit_stats_list) == len(expected)
+ for visit_stats in visit_stats_list:
+ # Check that last_scheduled got updated
assert before <= visit_stats.last_scheduled <= after
- @pytest.mark.parametrize("policy", ["oldest_scheduled_first"])
- def test_grab_next_visits_underflow(
- self, swh_scheduler, listed_origins_by_type, policy
+ def test_grab_next_visits_oldest_scheduled_first(
+ self, swh_scheduler, listed_origins_by_type,
):
- NUM_RESULTS = 5
- visit_type = next(iter(listed_origins_by_type))
- assert len(listed_origins_by_type[visit_type]) > NUM_RESULTS
+ visit_type, origins = self._grab_next_visits_setup(
+ swh_scheduler, listed_origins_by_type
+ )
- swh_scheduler.record_listed_origins(
- listed_origins_by_type[visit_type][:NUM_RESULTS]
+ # Give all origins but one a last_scheduled date
+ base_date = datetime.datetime(2020, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc)
+ visit_stats = [
+ OriginVisitStats(
+ url=origin.url,
+ visit_type=origin.visit_type,
+ last_snapshot=None,
+ last_eventful=None,
+ last_uneventful=None,
+ last_failed=None,
+ last_notfound=None,
+ last_scheduled=base_date - datetime.timedelta(seconds=i),
+ )
+ for i, origin in enumerate(origins[1:])
+ ]
+ swh_scheduler.origin_visit_stats_upsert(visit_stats)
+
+ # We expect to retrieve the origin with a NULL last_scheduled
+ # as well as those with the oldest values (i.e. the last ones), in order.
+ expected = [origins[0]] + origins[1:][::-1]
+
+ self._check_grab_next_visit(
+ swh_scheduler,
+ visit_type=visit_type,
+ policy="oldest_scheduled_first",
+ expected=expected,
)
- ret = swh_scheduler.grab_next_visits(visit_type, NUM_RESULTS + 2, policy=policy)
+ def test_grab_next_visits_underflow(self, swh_scheduler, listed_origins_by_type):
+ """Check that grab_next_visits works when there not enough origins in
+ the database"""
+ visit_type = next(iter(listed_origins_by_type))
+
+ # Only add 5 origins to the database
+ origins = listed_origins_by_type[visit_type][:5]
+ assert origins
+
+ swh_scheduler.record_listed_origins(origins)
- assert len(ret) == NUM_RESULTS
+ ret = swh_scheduler.grab_next_visits(
+ visit_type, len(origins) + 2, policy="oldest_scheduled_first"
+ )
+
+ assert len(ret) == 5
def test_grab_next_visits_unknown_policy(self, swh_scheduler):
+ unknown_policy = "non_existing_policy"
NUM_RESULTS = 5
- with pytest.raises(UnknownPolicy, match="non_existing_policy"):
- swh_scheduler.grab_next_visits(
- "type", NUM_RESULTS, policy="non_existing_policy"
- )
+ with pytest.raises(UnknownPolicy, match=unknown_policy):
+ swh_scheduler.grab_next_visits("type", NUM_RESULTS, policy=unknown_policy)
def _create_task_types(self, scheduler):
for tt in TASK_TYPES.values():
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jan 30, 8:50 AM (20 h, 10 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221196
Attached To
D4897: Reorganize grab_next_visits tests to better check sorting behavior
Event Timeline
Log In to Comment