Page MenuHomeSoftware Heritage

D4895.diff
No OneTemporary

D4895.diff

diff --git a/swh/scheduler/journal_client.py b/swh/scheduler/journal_client.py
--- a/swh/scheduler/journal_client.py
+++ b/swh/scheduler/journal_client.py
@@ -62,6 +62,7 @@
"last_failed": None,
"last_notfound": None,
"last_snapshot": None,
+ "successive_visits": 0,
}
pk = origin, visit_type
if pk not in origin_visit_stats:
@@ -72,14 +73,33 @@
visit_stats_d = origin_visit_stats[pk]
+ # get the name of the most recetn event we got
+ date_keys = [
+ f"last_{k}" for k in ("eventful", "uneventful", "failed", "notfound")
+ ]
+ event_dates = dict(
+ (v, k) for k, v in visit_stats_d.items() if k in date_keys and v is not None
+ )
+ if not event_dates:
+ maxdate = None
+ last_event = None
+ else:
+ maxdate = max_date(*event_dates.keys())
+ last_event = event_dates[maxdate]
+ increment_successive_visits = False
if msg_dict["status"] == "not_found":
visit_stats_d["last_notfound"] = max_date(
msg_dict["date"], visit_stats_d.get("last_notfound")
)
+ if last_event == "last_notfound":
+ increment_successive_visits = True
+
elif msg_dict["snapshot"] is None:
visit_stats_d["last_failed"] = max_date(
msg_dict["date"], visit_stats_d.get("last_failed")
)
+ if last_event == "last_failed":
+ increment_successive_visits = True
else: # visit with snapshot, something happened
if visit_stats_d["last_snapshot"] is None:
# first time visit with snapshot, we keep relevant information
@@ -104,6 +124,9 @@
# new eventful visit (new snapshot)
visit_stats_d["last_eventful"] = current_status_date
visit_stats_d["last_snapshot"] = msg_dict["snapshot"]
+ if last_event == "last_eventful":
+ increment_successive_visits = True
+
else:
# same snapshot as before
if (
@@ -118,9 +141,18 @@
"last_eventful"
]
visit_stats_d["last_eventful"] = current_status_date
+ # there is no way we can do anything but reset the
+ # successive_visits here...
else:
# uneventful event
visit_stats_d["last_uneventful"] = current_status_date
+ if last_event == "last_uneventful":
+ increment_successive_visits = True
+
+ if increment_successive_visits:
+ visit_stats_d["successive_visits"] += 1
+ else:
+ visit_stats_d["successive_visits"] = 1
scheduler.origin_visit_stats_upsert(
OriginVisitStats(**ovs) for ovs in origin_visit_stats.values()
diff --git a/swh/scheduler/model.py b/swh/scheduler/model.py
--- a/swh/scheduler/model.py
+++ b/swh/scheduler/model.py
@@ -235,6 +235,7 @@
last_snapshot = attr.ib(
type=Optional[bytes], validator=type_validator(), default=None
)
+ successive_visits = attr.ib(type=int, validator=type_validator(), default=0)
@last_eventful.validator
def check_last_eventful(self, attribute, value):
diff --git a/swh/scheduler/sql/30-schema.sql b/swh/scheduler/sql/30-schema.sql
--- a/swh/scheduler/sql/30-schema.sql
+++ b/swh/scheduler/sql/30-schema.sql
@@ -172,6 +172,7 @@
last_scheduled timestamptz,
-- last snapshot resulting from an eventful visit
last_snapshot bytea,
+ successive_visits int default 0,
primary key (url, visit_type)
);
@@ -184,6 +185,7 @@
comment on column origin_visit_stats.last_notfound is 'Date of the last notfound event';
comment on column origin_visit_stats.last_scheduled is 'Time when this origin was scheduled to be visited last';
comment on column origin_visit_stats.last_snapshot is 'sha1_git of the last visit snapshot';
+comment on column origin_visit_stats.successive_visits is 'number of successive visits with the same status';
create table scheduler_metrics (
diff --git a/swh/scheduler/tests/test_journal_client.py b/swh/scheduler/tests/test_journal_client.py
--- a/swh/scheduler/tests/test_journal_client.py
+++ b/swh/scheduler/tests/test_journal_client.py
@@ -120,6 +120,7 @@
last_failed=None,
last_notfound=visit_status["date"],
last_snapshot=None,
+ successive_visits=1,
)
visit_statuses = [
@@ -155,6 +156,7 @@
last_failed=None,
last_notfound=DATE3,
last_snapshot=None,
+ successive_visits=1,
)
@@ -208,6 +210,7 @@
last_failed=DATE3,
last_notfound=None,
last_snapshot=None,
+ successive_visits=3,
)
@@ -261,6 +264,7 @@
last_failed=None,
last_notfound=None,
last_snapshot=hash_to_bytes("dddcc0710eb6cf9efd5b920a8453e1e07157bddd"),
+ successive_visits=3,
)
@@ -285,6 +289,7 @@
last_failed=DATE2,
last_notfound=DATE1,
last_snapshot=visit_status["snapshot"],
+ successive_visits=1,
)
]
)
@@ -305,6 +310,7 @@
last_failed=DATE2,
last_notfound=DATE1,
last_snapshot=visit_status["snapshot"],
+ successive_visits=1,
)
@@ -364,6 +370,7 @@
last_failed=None,
last_notfound=None,
last_snapshot=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"),
+ successive_visits=1,
)
assert swh_scheduler.origin_visit_stats_get("foo", "git") == expected_visit_stats
@@ -425,6 +432,7 @@
last_failed=None,
last_notfound=None,
last_snapshot=hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"),
+ successive_visits=1,
)
assert (

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 11:18 PM (11 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216494

Event Timeline