Changeset View
Changeset View
Standalone View
Standalone View
swh/scheduler/journal_client.py
Show All 37 Lines | ) -> None: | ||||
currification of `scheduler` and `task_names`. | currification of `scheduler` and `task_names`. | ||||
""" | """ | ||||
assert set(messages) <= { | assert set(messages) <= { | ||||
msg_type | msg_type | ||||
}, f"Got unexpected {', '.join(set(messages) - set([msg_type]))} message types" | }, f"Got unexpected {', '.join(set(messages) - set([msg_type]))} message types" | ||||
assert msg_type in messages, f"Expected {msg_type} messages" | assert msg_type in messages, f"Expected {msg_type} messages" | ||||
origin_visit_stats: Dict[Tuple[str, str], Dict] = {} | interesting_messages = [ | ||||
for msg_dict in messages[msg_type]: | msg for msg in messages[msg_type] if msg["status"] not in ("created", "ongoing") | ||||
if msg_dict["status"] in ("created", "ongoing"): | ] | ||||
continue | |||||
origin_visit_stats: Dict[Tuple[str, str], Dict] = { | |||||
(visit_stats.url, visit_stats.visit_type): attr.asdict(visit_stats) | |||||
for visit_stats in scheduler.origin_visit_stats_get( | |||||
olasd: Maybe make this a `list(set(...))`? shouldn't change much in practice. | |||||
list(set((vs["origin"], vs["type"]) for vs in interesting_messages)) | |||||
) | |||||
} | |||||
for msg_dict in interesting_messages: | |||||
origin = msg_dict["origin"] | origin = msg_dict["origin"] | ||||
visit_type = msg_dict["type"] | visit_type = msg_dict["type"] | ||||
empty_object = { | empty_object = { | ||||
"url": origin, | "url": origin, | ||||
"visit_type": visit_type, | "visit_type": visit_type, | ||||
"last_uneventful": None, | "last_uneventful": None, | ||||
"last_eventful": None, | "last_eventful": None, | ||||
"last_failed": None, | "last_failed": None, | ||||
"last_notfound": None, | "last_notfound": None, | ||||
"last_snapshot": None, | "last_snapshot": None, | ||||
} | } | ||||
pk = origin, visit_type | pk = origin, visit_type | ||||
if pk not in origin_visit_stats: | if pk not in origin_visit_stats: | ||||
visit_stats = scheduler.origin_visit_stats_get([pk]) | origin_visit_stats[pk] = empty_object | ||||
origin_visit_stats[pk] = ( | |||||
attr.asdict(visit_stats[0]) if visit_stats else empty_object | |||||
) | |||||
visit_stats_d = origin_visit_stats[pk] | visit_stats_d = origin_visit_stats[pk] | ||||
if msg_dict["status"] == "not_found": | if msg_dict["status"] == "not_found": | ||||
visit_stats_d["last_notfound"] = max_date( | visit_stats_d["last_notfound"] = max_date( | ||||
msg_dict["date"], visit_stats_d.get("last_notfound") | msg_dict["date"], visit_stats_d.get("last_notfound") | ||||
) | ) | ||||
elif msg_dict["snapshot"] is None: | elif msg_dict["snapshot"] is None: | ||||
visit_stats_d["last_failed"] = max_date( | visit_stats_d["last_failed"] = max_date( | ||||
▲ Show 20 Lines • Show All 47 Lines • Show Last 20 Lines |
Maybe make this a list(set(...))? shouldn't change much in practice.