diff --git a/swh/scheduler/journal_client.py b/swh/scheduler/journal_client.py --- a/swh/scheduler/journal_client.py +++ b/swh/scheduler/journal_client.py @@ -122,6 +122,13 @@ "last_eventful" ] visit_stats_d["last_eventful"] = current_status_date + elif ( + latest_recorded_visit_date + and current_status_date == latest_recorded_visit_date + ): + # A duplicated message must be ignored to avoid + # populating the last_uneventful message + continue else: # uneventful event visit_stats_d["last_uneventful"] = current_status_date diff --git a/swh/scheduler/tests/test_journal_client.py b/swh/scheduler/tests/test_journal_client.py --- a/swh/scheduler/tests/test_journal_client.py +++ b/swh/scheduler/tests/test_journal_client.py @@ -545,3 +545,39 @@ assert ovs.last_snapshot == hash_to_bytes( "5555555555555555555555555555555555555555" ) + + +def test_journal_client_origin_visit_status_duplicated_messages(swh_scheduler): + """A duplicated message must be ignored + + """ + visit_status = { + "origin": "foo", + "visit": 1, + "status": "full", + "date": DATE1, + "type": "git", + "snapshot": hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"), + } + + process_journal_objects( + {"origin_visit_status": [visit_status]}, scheduler=swh_scheduler + ) + + process_journal_objects( + {"origin_visit_status": [visit_status]}, scheduler=swh_scheduler + ) + + expected_visit_stats = OriginVisitStats( + url="foo", + visit_type="git", + last_eventful=DATE1, + last_uneventful=None, + last_failed=None, + last_notfound=None, + last_snapshot=hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"), + ) + + assert swh_scheduler.origin_visit_stats_get([("foo", "git")]) == [ + expected_visit_stats + ]