diff --git a/swh/scheduler/celery_backend/recurrent_visits.py b/swh/scheduler/celery_backend/recurrent_visits.py --- a/swh/scheduler/celery_backend/recurrent_visits.py +++ b/swh/scheduler/celery_backend/recurrent_visits.py @@ -49,6 +49,15 @@ "cvs": _VCS_POLICY_WEIGHTS, "bzr": _VCS_POLICY_WEIGHTS, } + +POLICY_ADDITIONAL_PARAMETERS: Dict[str, Dict[str, Any]] = { + "git": { + "already_visited_order_by_lag": {"tablesample": 0.1}, + "never_visited_oldest_update_first": {"tablesample": 0.1}, + "origins_without_last_update": {"tablesample": 0.1}, + } +} + """Scheduling policies to use to retrieve visits for the given visit types, with their relative weights""" @@ -103,7 +112,10 @@ for policy, ratio in policy_ratio.items(): num_tasks_to_send = int(num_visits * ratio) fetched_origins[policy] = scheduler.grab_next_visits( - visit_type, num_tasks_to_send, policy=policy + visit_type, + num_tasks_to_send, + policy=policy, + **POLICY_ADDITIONAL_PARAMETERS.get(visit_type, {}).get(policy, {}), ) all_origins: List[ListedOrigin] = list( diff --git a/swh/scheduler/tests/test_recurrent_visits.py b/swh/scheduler/tests/test_recurrent_visits.py --- a/swh/scheduler/tests/test_recurrent_visits.py +++ b/swh/scheduler/tests/test_recurrent_visits.py @@ -11,7 +11,9 @@ import pytest from swh.scheduler.celery_backend.recurrent_visits import ( + POLICY_ADDITIONAL_PARAMETERS, VisitSchedulerThreads, + grab_next_visits_policy_weights, send_visits_for_visit_type, spawn_visit_scheduler_thread, terminate_visit_scheduler_threads, @@ -132,6 +134,26 @@ assert expected_record in set(records) +@pytest.mark.parametrize( + "visit_type, tablesamples", + [("hg", {}), ("git", POLICY_ADDITIONAL_PARAMETERS["git"])], +) +def test_recurrent_visit_additional_parameters( + swh_scheduler, mocker, visit_type, tablesamples +): + """Testing additional policy parameters""" + + mock_grab_next_visits = mocker.patch.object(swh_scheduler, "grab_next_visits") + mock_grab_next_visits.return_value = [] + + grab_next_visits_policy_weights(swh_scheduler, visit_type, 10) + + for call in mock_grab_next_visits.call_args_list: + assert call[1].get("tablesample") == tablesamples.get( + call[1]["policy"], {} + ).get("tablesample") + + @pytest.fixture def scheduler_config(swh_scheduler_config): return {"scheduler": {"cls": "local", **swh_scheduler_config}, "celery": {}}