diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py --- a/swh/scheduler/backend.py +++ b/swh/scheduler/backend.py @@ -928,16 +928,17 @@ pk_cols = OriginVisitStats.primary_key_columns() insert_cols, insert_meta = OriginVisitStats.insert_columns_and_metavars() + upsert_cols = [col for col in insert_cols if col not in pk_cols] + upsert_set = ", ".join( + f"{col} = coalesce(EXCLUDED.{col}, ovi.{col})" for col in upsert_cols + ) + query = f""" INSERT into origin_visit_stats AS ovi ({", ".join(insert_cols)}) VALUES %s ON CONFLICT ({", ".join(pk_cols)}) DO UPDATE - SET last_eventful = coalesce(excluded.last_eventful, ovi.last_eventful), - last_uneventful = coalesce(excluded.last_uneventful, ovi.last_uneventful), - last_failed = coalesce(excluded.last_failed, ovi.last_failed), - last_notfound = coalesce(excluded.last_notfound, ovi.last_notfound), - last_snapshot = coalesce(excluded.last_snapshot, ovi.last_snapshot) - """ # noqa + SET {upsert_set} + """ try: psycopg2.extras.execute_values( diff --git a/swh/scheduler/journal_client.py b/swh/scheduler/journal_client.py --- a/swh/scheduler/journal_client.py +++ b/swh/scheduler/journal_client.py @@ -56,22 +56,23 @@ list(set((vs["origin"], vs["type"]) for vs in interesting_messages)) ) } + # Use the default values from the model object + empty_object = { + field.name: field.default if field.default != attr.NOTHING else None + for field in attr.fields(OriginVisitStats) + } for msg_dict in interesting_messages: origin = msg_dict["origin"] visit_type = msg_dict["type"] - empty_object = { - "url": origin, - "visit_type": visit_type, - "last_uneventful": None, - "last_eventful": None, - "last_failed": None, - "last_notfound": None, - "last_snapshot": None, - } pk = origin, visit_type if pk not in origin_visit_stats: - origin_visit_stats[pk] = empty_object + origin_visit_stats[pk] = { + **empty_object, + "url": origin, + "visit_type": visit_type, + } + visit_stats_d = origin_visit_stats[pk] if msg_dict["status"] == "not_found":