diff --git a/swh/scheduler/celery_backend/recurrent_visits.py b/swh/scheduler/celery_backend/recurrent_visits.py --- a/swh/scheduler/celery_backend/recurrent_visits.py +++ b/swh/scheduler/celery_backend/recurrent_visits.py @@ -49,6 +49,18 @@ "cvs": _VCS_POLICY_WEIGHTS, "bzr": _VCS_POLICY_WEIGHTS, } + +_GIT_TABLESAMPLE_VALUES = { + "already_visited_order_by_lag": 0.1, + "never_visited_oldest_update_first": 0.1, + "origins_without_last_update": 0.1, +} + +POLICY_ADDITIONAL_PARAMETERS: Dict[str, Dict[str, Any]] = { + "default": {}, + "git": {"tablesample": _GIT_TABLESAMPLE_VALUES}, +} + """Scheduling policies to use to retrieve visits for the given visit types, with their relative weights""" @@ -100,10 +112,17 @@ fetched_origins: Dict[str, List[ListedOrigin]] = {} + additional_policy_parameters = POLICY_ADDITIONAL_PARAMETERS.get( + visit_type, POLICY_ADDITIONAL_PARAMETERS["default"] + ) + for policy, ratio in policy_ratio.items(): num_tasks_to_send = int(num_visits * ratio) fetched_origins[policy] = scheduler.grab_next_visits( - visit_type, num_tasks_to_send, policy=policy + visit_type, + num_tasks_to_send, + policy=policy, + **{k: v.get(policy) for k, v in additional_policy_parameters.items()}, ) all_origins: List[ListedOrigin] = list( diff --git a/swh/scheduler/tests/test_recurrent_visits.py b/swh/scheduler/tests/test_recurrent_visits.py --- a/swh/scheduler/tests/test_recurrent_visits.py +++ b/swh/scheduler/tests/test_recurrent_visits.py @@ -11,7 +11,9 @@ import pytest from swh.scheduler.celery_backend.recurrent_visits import ( + _GIT_TABLESAMPLE_VALUES, VisitSchedulerThreads, + grab_next_visits_policy_weights, send_visits_for_visit_type, spawn_visit_scheduler_thread, terminate_visit_scheduler_threads, @@ -132,6 +134,23 @@ assert expected_record in set(records) +@pytest.mark.parametrize( + "visit_type, tablesamples", [("hg", {}), ("git", _GIT_TABLESAMPLE_VALUES)] +) +def test_recurrent_visit_additional_parameters( + swh_scheduler, mocker, visit_type, tablesamples +): + """Testing additional policy parameters""" + + mock_grab_next_visits = mocker.patch.object(swh_scheduler, "grab_next_visits") + mock_grab_next_visits.return_value = [] + + grab_next_visits_policy_weights(swh_scheduler, visit_type, 10) + + for call in mock_grab_next_visits.call_args_list: + assert call.kwargs.get("tablesample") == tablesamples.get(call.kwargs["policy"]) + + @pytest.fixture def scheduler_config(swh_scheduler_config): return {"scheduler": {"cls": "local", **swh_scheduler_config}, "celery": {}}