Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9313110
D6876.id24956.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
2 KB
Subscribers
None
D6876.id24956.diff
View Options
diff --git a/swh/scheduler/celery_backend/recurrent_visits.py b/swh/scheduler/celery_backend/recurrent_visits.py
--- a/swh/scheduler/celery_backend/recurrent_visits.py
+++ b/swh/scheduler/celery_backend/recurrent_visits.py
@@ -49,6 +49,15 @@
"cvs": _VCS_POLICY_WEIGHTS,
"bzr": _VCS_POLICY_WEIGHTS,
}
+
+POLICY_ADDITIONAL_PARAMETERS: Dict[str, Dict[str, Any]] = {
+ "git": {
+ "already_visited_order_by_lag": {"tablesample": 0.1},
+ "never_visited_oldest_update_first": {"tablesample": 0.1},
+ "origins_without_last_update": {"tablesample": 0.1},
+ }
+}
+
"""Scheduling policies to use to retrieve visits for the given visit types, with their
relative weights"""
@@ -103,7 +112,10 @@
for policy, ratio in policy_ratio.items():
num_tasks_to_send = int(num_visits * ratio)
fetched_origins[policy] = scheduler.grab_next_visits(
- visit_type, num_tasks_to_send, policy=policy
+ visit_type,
+ num_tasks_to_send,
+ policy=policy,
+ **POLICY_ADDITIONAL_PARAMETERS.get(visit_type, {}).get(policy, {}),
)
all_origins: List[ListedOrigin] = list(
diff --git a/swh/scheduler/tests/test_recurrent_visits.py b/swh/scheduler/tests/test_recurrent_visits.py
--- a/swh/scheduler/tests/test_recurrent_visits.py
+++ b/swh/scheduler/tests/test_recurrent_visits.py
@@ -11,7 +11,9 @@
import pytest
from swh.scheduler.celery_backend.recurrent_visits import (
+ POLICY_ADDITIONAL_PARAMETERS,
VisitSchedulerThreads,
+ grab_next_visits_policy_weights,
send_visits_for_visit_type,
spawn_visit_scheduler_thread,
terminate_visit_scheduler_threads,
@@ -132,6 +134,26 @@
assert expected_record in set(records)
+@pytest.mark.parametrize(
+ "visit_type, tablesamples",
+ [("hg", {}), ("git", POLICY_ADDITIONAL_PARAMETERS["git"])],
+)
+def test_recurrent_visit_additional_parameters(
+ swh_scheduler, mocker, visit_type, tablesamples
+):
+ """Testing additional policy parameters"""
+
+ mock_grab_next_visits = mocker.patch.object(swh_scheduler, "grab_next_visits")
+ mock_grab_next_visits.return_value = []
+
+ grab_next_visits_policy_weights(swh_scheduler, visit_type, 10)
+
+ for call in mock_grab_next_visits.call_args_list:
+ assert call[1].get("tablesample") == tablesamples.get(
+ call[1]["policy"], {}
+ ).get("tablesample")
+
+
@pytest.fixture
def scheduler_config(swh_scheduler_config):
return {"scheduler": {"cls": "local", **swh_scheduler_config}, "celery": {}}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Jul 2, 11:24 AM (1 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219138
Attached To
D6876: Allow to specify the visit grab parameters per visit type and policy
Event Timeline
Log In to Comment