Page MenuHomeSoftware Heritage

D6876.id24956.diff
No OneTemporary

D6876.id24956.diff

diff --git a/swh/scheduler/celery_backend/recurrent_visits.py b/swh/scheduler/celery_backend/recurrent_visits.py
--- a/swh/scheduler/celery_backend/recurrent_visits.py
+++ b/swh/scheduler/celery_backend/recurrent_visits.py
@@ -49,6 +49,15 @@
"cvs": _VCS_POLICY_WEIGHTS,
"bzr": _VCS_POLICY_WEIGHTS,
}
+
+POLICY_ADDITIONAL_PARAMETERS: Dict[str, Dict[str, Any]] = {
+ "git": {
+ "already_visited_order_by_lag": {"tablesample": 0.1},
+ "never_visited_oldest_update_first": {"tablesample": 0.1},
+ "origins_without_last_update": {"tablesample": 0.1},
+ }
+}
+
"""Scheduling policies to use to retrieve visits for the given visit types, with their
relative weights"""
@@ -103,7 +112,10 @@
for policy, ratio in policy_ratio.items():
num_tasks_to_send = int(num_visits * ratio)
fetched_origins[policy] = scheduler.grab_next_visits(
- visit_type, num_tasks_to_send, policy=policy
+ visit_type,
+ num_tasks_to_send,
+ policy=policy,
+ **POLICY_ADDITIONAL_PARAMETERS.get(visit_type, {}).get(policy, {}),
)
all_origins: List[ListedOrigin] = list(
diff --git a/swh/scheduler/tests/test_recurrent_visits.py b/swh/scheduler/tests/test_recurrent_visits.py
--- a/swh/scheduler/tests/test_recurrent_visits.py
+++ b/swh/scheduler/tests/test_recurrent_visits.py
@@ -11,7 +11,9 @@
import pytest
from swh.scheduler.celery_backend.recurrent_visits import (
+ POLICY_ADDITIONAL_PARAMETERS,
VisitSchedulerThreads,
+ grab_next_visits_policy_weights,
send_visits_for_visit_type,
spawn_visit_scheduler_thread,
terminate_visit_scheduler_threads,
@@ -132,6 +134,26 @@
assert expected_record in set(records)
+@pytest.mark.parametrize(
+ "visit_type, tablesamples",
+ [("hg", {}), ("git", POLICY_ADDITIONAL_PARAMETERS["git"])],
+)
+def test_recurrent_visit_additional_parameters(
+ swh_scheduler, mocker, visit_type, tablesamples
+):
+ """Testing additional policy parameters"""
+
+ mock_grab_next_visits = mocker.patch.object(swh_scheduler, "grab_next_visits")
+ mock_grab_next_visits.return_value = []
+
+ grab_next_visits_policy_weights(swh_scheduler, visit_type, 10)
+
+ for call in mock_grab_next_visits.call_args_list:
+ assert call[1].get("tablesample") == tablesamples.get(
+ call[1]["policy"], {}
+ ).get("tablesample")
+
+
@pytest.fixture
def scheduler_config(swh_scheduler_config):
return {"scheduler": {"cls": "local", **swh_scheduler_config}, "celery": {}}

File Metadata

Mime Type
text/plain
Expires
Wed, Jul 2, 11:24 AM (1 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219138

Event Timeline