diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py --- a/swh/storage/postgresql/storage.py +++ b/swh/storage/postgresql/storage.py @@ -108,7 +108,7 @@ class Storage: """SWH storage datastore proxy, encompassing DB and object storage""" - current_version: int = 184 + current_version: int = 185 def __init__( self, diff --git a/swh/storage/sql/40-funcs.sql b/swh/storage/sql/40-funcs.sql --- a/swh/storage/sql/40-funcs.sql +++ b/swh/storage/sql/40-funcs.sql @@ -826,14 +826,14 @@ select id into origin_id from origin where url=origin_url; return query with closest_two_visits as (( - select ov, (date - visit_date), visit as interval + select ov, (date - visit_date) as interval, visit from origin_visit ov where ov.origin = origin_id and ov.date >= visit_date order by ov.date asc, ov.visit desc limit 1 ) union ( - select ov, (visit_date - date), visit as interval + select ov, (visit_date - date) as interval, visit from origin_visit ov where ov.origin = origin_id and ov.date < visit_date diff --git a/swh/storage/sql/upgrades/185.sql b/swh/storage/sql/upgrades/185.sql new file mode 100644 --- /dev/null +++ b/swh/storage/sql/upgrades/185.sql @@ -0,0 +1,35 @@ +-- SWH DB schema upgrade +-- from_version: 184 +-- to_version: 185 +-- description: origin_visit_find_by_date: Fix invalid alias in query which could lead to wrong visit being returned + +insert into dbversion(version, release, description) + values(185, now(), 'Work In Progress'); + +create or replace function swh_visit_find_by_date(origin_url text, visit_date timestamptz default NOW()) + returns setof origin_visit + language plpgsql + stable +as $$ +declare + origin_id bigint; +begin + select id into origin_id from origin where url=origin_url; + return query + with closest_two_visits as (( + select ov, (date - visit_date) as interval, visit + from origin_visit ov + where ov.origin = origin_id + and ov.date >= visit_date + order by ov.date asc, ov.visit desc + limit 1 + ) union ( + select ov, (visit_date - date) as interval, visit + from origin_visit ov + where ov.origin = origin_id + and ov.date < visit_date + order by ov.date desc, ov.visit desc + limit 1 + )) select (ov).* from closest_two_visits order by interval, visit limit 1; +end +$$; \ No newline at end of file diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py --- a/swh/storage/tests/storage_tests.py +++ b/swh/storage/tests/storage_tests.py @@ -3276,6 +3276,30 @@ ) assert actual_visit == ov3 + def test_origin_visit_find_by_date_latest_visit(self, swh_storage, sample_data): + first_visit_date = sample_data.date_visit2 + second_visit_date = first_visit_date + timedelta(days=10) + + origin = sample_data.origin + swh_storage.origin_add([origin]) + visit1 = OriginVisit( + origin=origin.url, + date=first_visit_date, + type=sample_data.type_visit2, + ) + visit2 = OriginVisit( + origin=origin.url, + date=second_visit_date, + type=sample_data.type_visit2, + ) + visit1, visit2 = swh_storage.origin_visit_add([visit1, visit2]) + + # should return the second visit + actual_visit = swh_storage.origin_visit_find_by_date( + origin.url, second_visit_date + ) + assert actual_visit == visit2 + def test_origin_visit_find_by_date__unknown_origin(self, swh_storage, sample_data): actual_visit = swh_storage.origin_visit_find_by_date( "foo", sample_data.date_visit2