diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py --- a/swh/scheduler/backend.py +++ b/swh/scheduler/backend.py @@ -774,23 +774,32 @@ ) VALUES (%s, %s, %s, %s, %s, %s) ON CONFLICT (url, visit_type) DO UPDATE - SET last_eventful = coalesce( - excluded.last_eventful, - ovi.last_eventful + SET last_eventful = ( + select max(eventful.date) from (values + (excluded.last_eventful), + (ovi.last_eventful) + ) as eventful(date) ), - last_uneventful = coalesce( - excluded.last_uneventful, - ovi.last_uneventful + last_uneventful = ( + select max(uneventful.date) from (values + (excluded.last_uneventful), + (ovi.last_uneventful) + ) as uneventful(date) ), - last_failed = coalesce( - excluded.last_failed, - ovi.last_failed + last_failed = ( + select max(failed.date) from (values + (excluded.last_failed), + (ovi.last_failed) + ) as failed(date) ), - last_snapshot = coalesce( - excluded.last_snapshot, - ovi.last_snapshot + last_snapshot = (select + case + when ovi.last_eventful < excluded.last_eventful then excluded.last_snapshot + when ovi.last_eventful <= excluded.last_eventful then ovi.last_snapshot + else ovi.last_snapshot -- touch nothing + end ) - """ + """ # noqa cur.execute( query, diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py --- a/swh/scheduler/tests/test_scheduler.py +++ b/swh/scheduler/tests/test_scheduler.py @@ -841,3 +841,61 @@ assert swh_scheduler.origin_visit_stats_get(url, "git") == visit_stats assert swh_scheduler.origin_visit_stats_get(url, "svn") is None + + def test_origin_visit_stats_upsert_messing_with_time(self, swh_scheduler) -> None: + url = "interesting-origin" + + # Let's play with dates... + date2 = utcnow() + date1 = date2 - ONEDAY + assert date1 < date2 + date0 = date1 - ONEDAY + assert date0 < date1 + + snapshot2 = hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd") + snapshot0 = hash_to_bytes("fffcc0710eb6cf9efd5b920a8453e1e07157bfff") + visit_stats0 = OriginVisitStats( + url=url, + visit_type="git", + last_eventful=date2, + last_uneventful=None, + last_failed=None, + last_snapshot=snapshot2, + ) + swh_scheduler.origin_visit_stats_upsert(visit_stats0) + + actual_visit_stats0 = swh_scheduler.origin_visit_stats_get(url, "git") + assert actual_visit_stats0 == visit_stats0 + + visit_stats2 = OriginVisitStats( + url=url, + visit_type="git", + last_eventful=None, + last_uneventful=date1, + last_failed=None, + ) + swh_scheduler.origin_visit_stats_upsert(visit_stats2) + + actual_visit_stats2 = swh_scheduler.origin_visit_stats_get(url, "git") + assert actual_visit_stats2 == attr.evolve( + actual_visit_stats0, last_uneventful=date1 + ) + + # a past date, what happens? + # date0 < date2 so this ovs should be dismissed + # the "eventful" associated snapshot should be dismissed as well + visit_stats1 = OriginVisitStats( + url=url, + visit_type="git", + last_eventful=date0, + last_uneventful=None, + last_failed=None, + last_snapshot=snapshot0, + ) + swh_scheduler.origin_visit_stats_upsert(visit_stats1) + + actual_visit_stats1 = swh_scheduler.origin_visit_stats_get(url, "git") + + assert actual_visit_stats1 == attr.evolve( + actual_visit_stats2, last_eventful=date2 + )