diff --git a/swh/scheduler/journal_client.py b/swh/scheduler/journal_client.py --- a/swh/scheduler/journal_client.py +++ b/swh/scheduler/journal_client.py @@ -120,12 +120,14 @@ ): # we receive an old message which is an earlier "eventful" event # than what we had, we consider the last_eventful event as - # actually an uneventful event. The true eventful message is the - # current one - visit_stats_d["last_uneventful"] = visit_stats_d[ - "last_eventful" - ] - visit_stats_d["last_eventful"] = current_status_date + # actually an uneventful event. + # The last uneventful visit remains the most recent: + # max, previously computed + visit_stats_d["last_uneventful"] = latest_recorded_visit_date + # The eventful visit remains the oldest one: min + visit_stats_d["last_eventful"] = min( + visit_stats_d["last_eventful"], current_status_date + ) elif ( latest_recorded_visit_date and current_status_date == latest_recorded_visit_date diff --git a/swh/scheduler/tests/test_journal_client.py b/swh/scheduler/tests/test_journal_client.py --- a/swh/scheduler/tests/test_journal_client.py +++ b/swh/scheduler/tests/test_journal_client.py @@ -26,6 +26,7 @@ ONE_DAY = datetime.timedelta(days=1) +ONE_YEAR = datetime.timedelta(days=366) DATE3 = utcnow() DATE2 = DATE3 - ONE_DAY @@ -664,3 +665,62 @@ assert swh_scheduler.origin_visit_stats_get([("foo", "git")]) == [ expected_visit_stats ] + + +VISIT_STATUSES_SAME_SNAPSHOT = [ + {**ovs, "date": DATE1 + n * ONE_YEAR} + for n, ovs in enumerate( + [ + { + "origin": "cavabarder", + "type": "hg", + "visit": 3, + "status": "full", + "snapshot": hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"), + }, + { + "origin": "cavabarder", + "type": "hg", + "visit": 4, + "status": "full", + "snapshot": hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"), + }, + { + "origin": "cavabarder", + "type": "hg", + "visit": 4, + "status": "full", + "snapshot": hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"), + }, + ] + ) +] + + +@pytest.mark.parametrize( + "visit_statuses", + permutations(VISIT_STATUSES_SAME_SNAPSHOT, len(VISIT_STATUSES_SAME_SNAPSHOT)), +) +def test_journal_client_origin_visit_statuses_same_snapshot_permutation( + visit_statuses, swh_scheduler +): + """Ensure out of order topic subscription ends up in the same final state + + """ + process_journal_objects( + {"origin_visit_status": visit_statuses}, scheduler=swh_scheduler + ) + + expected_visit_stats = OriginVisitStats( + url="cavabarder", + visit_type="hg", + last_eventful=DATE1, + last_uneventful=DATE1 + 2 * ONE_YEAR, + last_failed=None, + last_notfound=None, + last_snapshot=hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"), + ) + + assert swh_scheduler.origin_visit_stats_get([("cavabarder", "hg")]) == [ + expected_visit_stats + ]