diff --git a/swh/storage/postgresql/converters.py b/swh/storage/postgresql/converters.py --- a/swh/storage/postgresql/converters.py +++ b/swh/storage/postgresql/converters.py @@ -1,9 +1,10 @@ -# Copyright (C) 2015-2020 The Software Heritage developers +# Copyright (C) 2015-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime +import math from typing import Any, Dict, Optional import warnings @@ -106,7 +107,9 @@ return TimestampWithTimezone( timestamp=Timestamp( - seconds=int(date.timestamp()), microseconds=date.microsecond, + # we use floor() instead of int() to round down, because of negative dates + seconds=math.floor(date.timestamp()), + microseconds=date.microsecond, ), offset=offset, negative_utc=neg_utc_offset, diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py --- a/swh/storage/postgresql/storage.py +++ b/swh/storage/postgresql/storage.py @@ -133,7 +133,15 @@ if self._db: return self._db else: - return Db.from_pool(self._pool) + db = Db.from_pool(self._pool) + + # Workaround for psycopg2 < 2.9.0 not handling fractional timezones, + # which may happen on old revision/release dates on systems configured + # with non-UTC timezones. + # https://www.psycopg.org/docs/usage.html#time-zones-handling + db.cursor().execute("SET TIME ZONE 'UTC'") + + return db def put_db(self, db): if db is not self._db: diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py --- a/swh/storage/tests/storage_tests.py +++ b/swh/storage/tests/storage_tests.py @@ -37,6 +37,8 @@ Snapshot, SnapshotBranch, TargetType, + Timestamp, + TimestampWithTimezone, ) from swh.model.swhids import CoreSWHID, ObjectType from swh.storage import get_storage @@ -1017,6 +1019,43 @@ ("revision", revision2), ] + def test_revision_add_fractional_timezone(self, swh_storage, sample_data): + # When reading a date from this time period on systems configured with + # timezone Europe/Paris, postgresql returns them with UTC+00:09:21 as timezone, + # but psycopg2 < 2.9.0 had to truncate them. + # https://www.psycopg.org/docs/usage.html#time-zones-handling + # + # There is a workaround in swh.storage.postgresql.storage.Storage.get_db, + # to set the timezone to UTC so it works on all psycopg2 versions. + # + # Therefore, this test always succeeds in tox (because psycopg2 >= 2.9.0) + # and on the CI (both because psycopg2 >= 2.9.0 and TZ=UTC); but which means + # this test is only useful on machines with older psycopg2 versions and + # TZ=Europe/Paris. But the workaround is also only needed on this kind of + # configuration, so this is good enough. + revision = attr.evolve( + sample_data.revision, + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=-1855958962, microseconds=0), + offset=0, + negative_utc=False, + ), + ) + init_missing = swh_storage.revision_missing([revision.id]) + assert list(init_missing) == [revision.id] + + actual_result = swh_storage.revision_add([revision]) + assert actual_result == {"revision:add": 1} + + end_missing = swh_storage.revision_missing([revision.id]) + assert list(end_missing) == [] + + assert list(swh_storage.journal_writer.journal.objects) == [ + ("revision", revision) + ] + + assert swh_storage.revision_get([revision.id])[0] == revision + def test_revision_add_name_clash(self, swh_storage, sample_data): revision, revision2 = sample_data.revisions[:2] diff --git a/swh/storage/tests/test_postgresql_converters.py b/swh/storage/tests/test_postgresql_converters.py --- a/swh/storage/tests/test_postgresql_converters.py +++ b/swh/storage/tests/test_postgresql_converters.py @@ -72,6 +72,78 @@ "neg_utc_offset": False, }, ), + ( + TimestampWithTimezone( + timestamp=Timestamp(seconds=0, microseconds=0,), + offset=-120, + negative_utc=False, + ), + { + "timestamp": "1970-01-01T00:00:00+00:00", + "offset": -120, + "neg_utc_offset": False, + }, + ), + ( + TimestampWithTimezone( + timestamp=Timestamp(seconds=0, microseconds=1,), + offset=-120, + negative_utc=False, + ), + { + "timestamp": "1970-01-01T00:00:00.000001+00:00", + "offset": -120, + "neg_utc_offset": False, + }, + ), + ( + TimestampWithTimezone( + timestamp=Timestamp(seconds=-1, microseconds=0,), + offset=-120, + negative_utc=False, + ), + { + "timestamp": "1969-12-31T23:59:59+00:00", + "offset": -120, + "neg_utc_offset": False, + }, + ), + ( + TimestampWithTimezone( + timestamp=Timestamp(seconds=-1, microseconds=1,), + offset=-120, + negative_utc=False, + ), + { + "timestamp": "1969-12-31T23:59:59.000001+00:00", + "offset": -120, + "neg_utc_offset": False, + }, + ), + ( + TimestampWithTimezone( + timestamp=Timestamp(seconds=-3600, microseconds=0,), + offset=-120, + negative_utc=False, + ), + { + "timestamp": "1969-12-31T23:00:00+00:00", + "offset": -120, + "neg_utc_offset": False, + }, + ), + ( + TimestampWithTimezone( + timestamp=Timestamp(seconds=-3600, microseconds=1,), + offset=-120, + negative_utc=False, + ), + { + "timestamp": "1969-12-31T23:00:00.000001+00:00", + "offset": -120, + "neg_utc_offset": False, + }, + ), ], ) def test_date(model_date, db_date):