Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124921
D4895.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D4895.diff
View Options
diff --git a/swh/scheduler/journal_client.py b/swh/scheduler/journal_client.py
--- a/swh/scheduler/journal_client.py
+++ b/swh/scheduler/journal_client.py
@@ -62,6 +62,7 @@
"last_failed": None,
"last_notfound": None,
"last_snapshot": None,
+ "successive_visits": 0,
}
pk = origin, visit_type
if pk not in origin_visit_stats:
@@ -72,14 +73,33 @@
visit_stats_d = origin_visit_stats[pk]
+ # get the name of the most recetn event we got
+ date_keys = [
+ f"last_{k}" for k in ("eventful", "uneventful", "failed", "notfound")
+ ]
+ event_dates = dict(
+ (v, k) for k, v in visit_stats_d.items() if k in date_keys and v is not None
+ )
+ if not event_dates:
+ maxdate = None
+ last_event = None
+ else:
+ maxdate = max_date(*event_dates.keys())
+ last_event = event_dates[maxdate]
+ increment_successive_visits = False
if msg_dict["status"] == "not_found":
visit_stats_d["last_notfound"] = max_date(
msg_dict["date"], visit_stats_d.get("last_notfound")
)
+ if last_event == "last_notfound":
+ increment_successive_visits = True
+
elif msg_dict["snapshot"] is None:
visit_stats_d["last_failed"] = max_date(
msg_dict["date"], visit_stats_d.get("last_failed")
)
+ if last_event == "last_failed":
+ increment_successive_visits = True
else: # visit with snapshot, something happened
if visit_stats_d["last_snapshot"] is None:
# first time visit with snapshot, we keep relevant information
@@ -104,6 +124,9 @@
# new eventful visit (new snapshot)
visit_stats_d["last_eventful"] = current_status_date
visit_stats_d["last_snapshot"] = msg_dict["snapshot"]
+ if last_event == "last_eventful":
+ increment_successive_visits = True
+
else:
# same snapshot as before
if (
@@ -118,9 +141,18 @@
"last_eventful"
]
visit_stats_d["last_eventful"] = current_status_date
+ # there is no way we can do anything but reset the
+ # successive_visits here...
else:
# uneventful event
visit_stats_d["last_uneventful"] = current_status_date
+ if last_event == "last_uneventful":
+ increment_successive_visits = True
+
+ if increment_successive_visits:
+ visit_stats_d["successive_visits"] += 1
+ else:
+ visit_stats_d["successive_visits"] = 1
scheduler.origin_visit_stats_upsert(
OriginVisitStats(**ovs) for ovs in origin_visit_stats.values()
diff --git a/swh/scheduler/model.py b/swh/scheduler/model.py
--- a/swh/scheduler/model.py
+++ b/swh/scheduler/model.py
@@ -235,6 +235,7 @@
last_snapshot = attr.ib(
type=Optional[bytes], validator=type_validator(), default=None
)
+ successive_visits = attr.ib(type=int, validator=type_validator(), default=0)
@last_eventful.validator
def check_last_eventful(self, attribute, value):
diff --git a/swh/scheduler/sql/30-schema.sql b/swh/scheduler/sql/30-schema.sql
--- a/swh/scheduler/sql/30-schema.sql
+++ b/swh/scheduler/sql/30-schema.sql
@@ -172,6 +172,7 @@
last_scheduled timestamptz,
-- last snapshot resulting from an eventful visit
last_snapshot bytea,
+ successive_visits int default 0,
primary key (url, visit_type)
);
@@ -184,6 +185,7 @@
comment on column origin_visit_stats.last_notfound is 'Date of the last notfound event';
comment on column origin_visit_stats.last_scheduled is 'Time when this origin was scheduled to be visited last';
comment on column origin_visit_stats.last_snapshot is 'sha1_git of the last visit snapshot';
+comment on column origin_visit_stats.successive_visits is 'number of successive visits with the same status';
create table scheduler_metrics (
diff --git a/swh/scheduler/tests/test_journal_client.py b/swh/scheduler/tests/test_journal_client.py
--- a/swh/scheduler/tests/test_journal_client.py
+++ b/swh/scheduler/tests/test_journal_client.py
@@ -120,6 +120,7 @@
last_failed=None,
last_notfound=visit_status["date"],
last_snapshot=None,
+ successive_visits=1,
)
visit_statuses = [
@@ -155,6 +156,7 @@
last_failed=None,
last_notfound=DATE3,
last_snapshot=None,
+ successive_visits=1,
)
@@ -208,6 +210,7 @@
last_failed=DATE3,
last_notfound=None,
last_snapshot=None,
+ successive_visits=3,
)
@@ -261,6 +264,7 @@
last_failed=None,
last_notfound=None,
last_snapshot=hash_to_bytes("dddcc0710eb6cf9efd5b920a8453e1e07157bddd"),
+ successive_visits=3,
)
@@ -285,6 +289,7 @@
last_failed=DATE2,
last_notfound=DATE1,
last_snapshot=visit_status["snapshot"],
+ successive_visits=1,
)
]
)
@@ -305,6 +310,7 @@
last_failed=DATE2,
last_notfound=DATE1,
last_snapshot=visit_status["snapshot"],
+ successive_visits=1,
)
@@ -364,6 +370,7 @@
last_failed=None,
last_notfound=None,
last_snapshot=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"),
+ successive_visits=1,
)
assert swh_scheduler.origin_visit_stats_get("foo", "git") == expected_visit_stats
@@ -425,6 +432,7 @@
last_failed=None,
last_notfound=None,
last_snapshot=hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"),
+ successive_visits=1,
)
assert (
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 11:18 PM (11 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216494
Attached To
D4895: Add a successive_visits counter to OriginVisitStats
Event Timeline
Log In to Comment