diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py --- a/swh/scheduler/backend.py +++ b/swh/scheduler/backend.py @@ -831,36 +831,11 @@ INSERT into origin_visit_stats AS ovi ({", ".join(insert_cols)}) VALUES %s ON CONFLICT ({", ".join(pk_cols)}) DO UPDATE - SET last_eventful = ( - select max(eventful.date) from (values - (excluded.last_eventful), - (ovi.last_eventful) - ) as eventful(date) - ), - last_uneventful = ( - select max(uneventful.date) from (values - (excluded.last_uneventful), - (ovi.last_uneventful) - ) as uneventful(date) - ), - last_failed = ( - select max(failed.date) from (values - (excluded.last_failed), - (ovi.last_failed) - ) as failed(date) - ), - last_notfound = ( - select max(notfound.date) from (values - (excluded.last_notfound), - (ovi.last_notfound) - ) as notfound(date) - ), - last_snapshot = (select - case - when ovi.last_eventful < excluded.last_eventful then excluded.last_snapshot - else coalesce(ovi.last_snapshot, excluded.last_snapshot) - end - ) + SET last_eventful = coalesce(excluded.last_eventful, ovi.last_eventful), + last_uneventful = coalesce(excluded.last_uneventful, ovi.last_uneventful), + last_failed = coalesce(excluded.last_failed, ovi.last_failed), + last_notfound = coalesce(excluded.last_notfound, ovi.last_notfound), + last_snapshot = coalesce(excluded.last_snapshot, ovi.last_snapshot) """ # noqa try: diff --git a/swh/scheduler/tests/test_journal_client.py b/swh/scheduler/tests/test_journal_client.py --- a/swh/scheduler/tests/test_journal_client.py +++ b/swh/scheduler/tests/test_journal_client.py @@ -581,3 +581,48 @@ assert swh_scheduler.origin_visit_stats_get([("foo", "git")]) == [ expected_visit_stats ] + + +def test_journal_client_origin_visit_status_several_upsert(swh_scheduler): + """A duplicated message must be ignored + + """ + visit_status1 = { + "origin": "foo", + "visit": 1, + "status": "full", + "date": DATE1, + "type": "git", + "snapshot": hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"), + } + + visit_status2 = { + "origin": "foo", + "visit": 1, + "status": "full", + "date": DATE2, + "type": "git", + "snapshot": hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"), + } + + process_journal_objects( + {"origin_visit_status": [visit_status2]}, scheduler=swh_scheduler + ) + + process_journal_objects( + {"origin_visit_status": [visit_status1]}, scheduler=swh_scheduler + ) + + expected_visit_stats = OriginVisitStats( + url="foo", + visit_type="git", + last_eventful=DATE1, + last_uneventful=DATE2, + last_failed=None, + last_notfound=None, + last_snapshot=hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"), + ) + + assert swh_scheduler.origin_visit_stats_get([("foo", "git")]) == [ + expected_visit_stats + ] diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py --- a/swh/scheduler/tests/test_scheduler.py +++ b/swh/scheduler/tests/test_scheduler.py @@ -891,66 +891,6 @@ assert swh_scheduler.origin_visit_stats_get([(url, "git")]) == [visit_stats] assert swh_scheduler.origin_visit_stats_get([(url, "svn")]) == [] - def test_origin_visit_stats_upsert_messing_with_time(self, swh_scheduler) -> None: - url = "interesting-origin" - - # Let's play with dates... - date2 = utcnow() - date1 = date2 - ONEDAY - date0 = date1 - ONEDAY - assert date0 < date1 < date2 - - snapshot2 = hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd") - snapshot0 = hash_to_bytes("fffcc0710eb6cf9efd5b920a8453e1e07157bfff") - visit_stats0 = OriginVisitStats( - url=url, - visit_type="git", - last_eventful=date2, - last_uneventful=None, - last_failed=None, - last_notfound=None, - last_snapshot=snapshot2, - ) - swh_scheduler.origin_visit_stats_upsert([visit_stats0]) - - actual_visit_stats0 = swh_scheduler.origin_visit_stats_get([(url, "git")])[0] - assert actual_visit_stats0 == visit_stats0 - - visit_stats2 = OriginVisitStats( - url=url, - visit_type="git", - last_eventful=None, - last_uneventful=date1, - last_notfound=None, - last_failed=None, - ) - swh_scheduler.origin_visit_stats_upsert([visit_stats2]) - - actual_visit_stats2 = swh_scheduler.origin_visit_stats_get([(url, "git")])[0] - assert actual_visit_stats2 == attr.evolve( - actual_visit_stats0, last_uneventful=date1 - ) - - # a past date, what happens? - # date0 < date2 so this ovs should be dismissed - # the "eventful" associated snapshot should be dismissed as well - visit_stats1 = OriginVisitStats( - url=url, - visit_type="git", - last_eventful=date0, - last_uneventful=None, - last_failed=None, - last_notfound=None, - last_snapshot=snapshot0, - ) - swh_scheduler.origin_visit_stats_upsert([visit_stats1]) - - actual_visit_stats1 = swh_scheduler.origin_visit_stats_get([(url, "git")])[0] - - assert actual_visit_stats1 == attr.evolve( - actual_visit_stats2, last_eventful=date2 - ) - def test_origin_visit_stats_upsert_batch(self, swh_scheduler) -> None: """Batch upsert is ok""" visit_stats = [