Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124762
D5980.id21882.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
3 KB
Subscribers
None
D5980.id21882.diff
View Options
diff --git a/swh/scheduler/journal_client.py b/swh/scheduler/journal_client.py
--- a/swh/scheduler/journal_client.py
+++ b/swh/scheduler/journal_client.py
@@ -193,6 +193,8 @@
for field in attr.fields(OriginVisitStats)
}
+ disabled_urls: List[str] = []
+
# Retrieve the global queue state
queue_position_per_visit_type = scheduler.visit_scheduler_queue_position_get()
@@ -261,6 +263,25 @@
else:
visit_stats_d["successive_visits"] = 1
+ # Deactivate successive failing/not-found origins
+ if (
+ visit_stats_d["last_visit_status"]
+ in [LastVisitStatus.not_found, LastVisitStatus.failed]
+ ) and visit_stats_d["successive_visits"] >= 3:
+ disabled_urls.append(visit_stats_d["url"])
+
scheduler.origin_visit_stats_upsert(
OriginVisitStats(**ovs) for ovs in origin_visit_stats.values()
)
+
+ # Disable any origins if any
+ if disabled_urls:
+ disabled_origins = []
+ for url in disabled_urls:
+ origins = scheduler.get_listed_origins(url=url).results
+ if len(origins) > 0:
+ origin = attr.evolve(origins[0], enabled=False)
+ disabled_origins.append(origin)
+
+ if disabled_origins:
+ scheduler.record_listed_origins(disabled_origins)
diff --git a/swh/scheduler/tests/test_journal_client.py b/swh/scheduler/tests/test_journal_client.py
--- a/swh/scheduler/tests/test_journal_client.py
+++ b/swh/scheduler/tests/test_journal_client.py
@@ -919,3 +919,76 @@
)
assert mock_random.called
+
+
+def test_disable_failing_origins(swh_scheduler):
+ """Origin with too many failed attempts ends up being deactivated in the scheduler.
+
+ """
+
+ # actually store the origin in the scheduler so we can check it's deactivated in the
+ # end.
+ lister = swh_scheduler.get_or_create_lister(
+ name="something", instance_name="something"
+ )
+ origin = ListedOrigin(
+ url="bar", enabled=True, visit_type="svn", lister_id=lister.id
+ )
+ swh_scheduler.record_listed_origins([origin])
+
+ visit_statuses = [
+ {
+ "origin": "bar",
+ "visit": 2,
+ "status": "failed",
+ "date": DATE1,
+ "type": "svn",
+ "snapshot": None,
+ },
+ {
+ "origin": "bar",
+ "visit": 3,
+ "status": "failed",
+ "date": DATE2,
+ "type": "svn",
+ "snapshot": None,
+ },
+ {
+ "origin": "bar",
+ "visit": 3,
+ "status": "failed",
+ "date": DATE3,
+ "type": "svn",
+ "snapshot": None,
+ },
+ ]
+
+ process_journal_objects(
+ {"origin_visit_status": visit_statuses}, scheduler=swh_scheduler
+ )
+
+ actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get([("bar", "svn")])
+ assert_visit_stats_ok(
+ actual_origin_visit_stats[0],
+ OriginVisitStats(
+ url="bar",
+ visit_type="svn",
+ last_successful=None,
+ last_visit=DATE3,
+ last_visit_status=LastVisitStatus.failed,
+ next_position_offset=6,
+ successive_visits=3,
+ ),
+ )
+
+ # Now check that the origin in question is disabled
+ actual_page = swh_scheduler.get_listed_origins(url="bar")
+
+ assert len(actual_page.results) == 1
+ assert actual_page.next_page_token is None
+
+ for origin in actual_page.results:
+ assert origin.enabled is False
+ assert origin.lister_id == lister.id
+ assert origin.url == "bar"
+ assert origin.visit_type == "svn"
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 6:42 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226505
Attached To
D5980: journal_client: Disable origins when too many visited attempts failed
Event Timeline
Log In to Comment