diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -385,8 +385,6 @@ object_type: Final = "timestamp_with_timezone" timestamp = attr.ib(type=Timestamp, validator=type_validator()) - offset = attr.ib(type=int, validator=type_validator()) - negative_utc = attr.ib(type=bool, validator=type_validator()) offset_bytes = attr.ib(type=bytes, validator=type_validator()) """Raw git representation of the timezone, as an offset from UTC. @@ -397,64 +395,40 @@ original objects, so it may differ from this format when they do. """ - @offset.validator - def check_offset(self, attribute, value): - """Checks the offset is a 16-bits signed integer (in theory, it - should always be between -14 and +14 hours).""" - if not (-(2 ** 15) <= value < 2 ** 15): - # max 14 hours offset in theory, but you never know what - # you'll find in the wild... - raise ValueError("offset too large: %d minutes" % value) - - self._check_offsets_match() - - @negative_utc.validator - def check_negative_utc(self, attribute, value): - if self.offset and value: - raise ValueError("negative_utc can only be True is offset=0") - - self._check_offsets_match() - - @offset_bytes.default - def _default_offset_bytes(self): - negative = self.offset < 0 or self.negative_utc - (hours, minutes) = divmod(abs(self.offset), 60) - return f"{'-' if negative else '+'}{hours:02}{minutes:02}".encode() - - @offset_bytes.validator - def check_offset_bytes(self, attribute, value): - if not set(value) <= _OFFSET_CHARS: - raise ValueError(f"invalid characters in offset_bytes: {value!r}") - - self._check_offsets_match() - - def _check_offsets_match(self): + @property + def offset(self) -> int: + """Parsed value of :attr:`offset_bytes` as a number of minutes, + or ``0`` if it cannot be parsed. + """ offset_str = self.offset_bytes.decode() assert offset_str[0] in "+-" sign = int(offset_str[0] + "1") hours = int(offset_str[1:-2]) minutes = int(offset_str[-2:]) - offset = sign * (hours * 60 + minutes) - if offset != self.offset: - raise ValueError( - f"offset_bytes ({self.offset_bytes!r}) does not match offset " - f"{divmod(self.offset, 60)}" - ) + return sign * (hours * 60 + minutes) - if offset == 0 and self.negative_utc != self.offset_bytes.startswith(b"-"): - raise ValueError( - f"offset_bytes ({self.offset_bytes!r}) does not match negative_utc " - f"({self.negative_utc})" - ) + @classmethod + def from_numeric_offset( + cls, timestamp: Timestamp, offset: int, negative_utc: bool + ) -> "TimestampWithTimezone": + """Returns a :class:`TimestampWithTimezone` instance from the old dictionary + format (with ``offset`` and ``negative_utc`` instead of ``offset_bytes``). + """ + negative = offset < 0 or negative_utc + (hours, minutes) = divmod(abs(offset), 60) + offset_bytes = f"{'-' if negative else '+'}{hours:02}{minutes:02}".encode() + tstz = TimestampWithTimezone(timestamp=timestamp, offset_bytes=offset_bytes) + assert tstz.offset == offset, (tstz.offset, offset) + return tstz @classmethod - def from_dict(cls, time_representation: Union[Dict, datetime.datetime, int]): + def from_dict( + cls, time_representation: Union[Dict, datetime.datetime, int] + ) -> "TimestampWithTimezone": """Builds a TimestampWithTimezone from any of the formats accepted by :func:`swh.model.normalize_timestamp`.""" # TODO: this accept way more types than just dicts; find a better # name - negative_utc = False - if isinstance(time_representation, dict): ts = time_representation["timestamp"] if isinstance(ts, dict): @@ -468,12 +442,21 @@ f"TimestampWithTimezone.from_dict received non-integer timestamp " f"member {ts!r}" ) - offset = time_representation["offset"] - if "negative_utc" in time_representation: - negative_utc = time_representation["negative_utc"] - if negative_utc is None: - negative_utc = False + + timestamp = Timestamp(seconds=seconds, microseconds=microseconds) + + if "offset_bytes" in time_representation: + return cls( + timestamp=timestamp, + offset_bytes=time_representation["offset_bytes"], + ) + else: + # old format + offset = time_representation["offset"] + negative_utc = time_representation.get("negative_utc") or False + return cls.from_numeric_offset(timestamp, offset, negative_utc) elif isinstance(time_representation, datetime.datetime): + # TODO: warn when using from_dict() on a datetime utcoffset = time_representation.utcoffset() time_representation = time_representation.astimezone(datetime.timezone.utc) microseconds = time_representation.microsecond @@ -489,33 +472,30 @@ # utcoffset is an integer number of minutes seconds_offset = utcoffset.total_seconds() offset = int(seconds_offset) // 60 + # TODO: warn if remainder is not zero + return cls.from_numeric_offset( + Timestamp(seconds=seconds, microseconds=microseconds), offset, False + ) elif isinstance(time_representation, int): + # TODO: warn when using from_dict() on an int seconds = time_representation - microseconds = 0 - offset = 0 + timestamp = Timestamp(seconds=time_representation, microseconds=0) + return cls(timestamp=timestamp, offset_bytes=b"+0000") else: raise ValueError( f"TimestampWithTimezone.from_dict received non-integer timestamp: " f"{time_representation!r}" ) - return cls( - timestamp=Timestamp(seconds=seconds, microseconds=microseconds), - offset=offset, - negative_utc=negative_utc, - ) - @classmethod - def from_datetime(cls, dt: datetime.datetime): + def from_datetime(cls, dt: datetime.datetime) -> "TimestampWithTimezone": return cls.from_dict(dt) def to_datetime(self) -> datetime.datetime: """Convert to a datetime (with a timezone set to the recorded fixed UTC offset) - Beware that this conversion can be lossy: the negative_utc flag is not - taken into consideration (since it cannot be represented in a - datetime). Also note that it may fail due to type overflow. - + Beware that this conversion can be lossy: ``-0000`` and 'weird' offsets + cannot be represented. Also note that it may fail due to type overflow. """ timestamp = datetime.datetime.fromtimestamp( self.timestamp.seconds, @@ -532,7 +512,7 @@ tstz = cls.from_datetime(dt) if dt.tzname() == "-00:00": assert tstz.offset_bytes == b"+0000" - tstz = attr.evolve(tstz, negative_utc=True, offset_bytes=b"-0000") + tstz = attr.evolve(tstz, offset_bytes=b"-0000") return tstz diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py --- a/swh/model/tests/swh_model_data.py +++ b/swh/model/tests/swh_model_data.py @@ -91,14 +91,10 @@ DATES = [ TimestampWithTimezone( - timestamp=Timestamp(seconds=1234567891, microseconds=0,), - offset=120, - negative_utc=False, + timestamp=Timestamp(seconds=1234567891, microseconds=0,), offset_bytes=b"+0200", ), TimestampWithTimezone( - timestamp=Timestamp(seconds=1234567892, microseconds=0,), - offset=120, - negative_utc=False, + timestamp=Timestamp(seconds=1234567892, microseconds=0,), offset_bytes=b"+0200", ), ] @@ -152,8 +148,7 @@ name=b"v0.0.1", date=TimestampWithTimezone( timestamp=Timestamp(seconds=1234567890, microseconds=0,), - offset=120, - negative_utc=False, + offset_bytes=b"+0200", ), author=COMMITTERS[0], target_type=ObjectType.REVISION, diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -279,11 +279,7 @@ "name": b"Software Heritage", "email": b"robot@softwareheritage.org", }, - "date": { - "timestamp": {"seconds": 1437047495}, - "offset": 0, - "negative_utc": False, - }, + "date": {"timestamp": {"seconds": 1437047495}, "offset_bytes": b"+0000",}, "type": "tar", "committer": { "name": b"Software Heritage", @@ -549,11 +545,7 @@ "name": b"20081029", "target": _x("54e9abca4c77421e2921f5f156c9fe4a9f7441c7"), "target_type": "revision", - "date": { - "timestamp": {"seconds": 1225281976}, - "offset": 0, - "negative_utc": True, - }, + "date": {"timestamp": {"seconds": 1225281976}, "offset_bytes": b"-0000",}, "author": {"name": b"Otavio Salvador", "email": b"otavio@debian.org",}, "synthetic": False, "message": b"tagging version 20081029\n\nr56558\n", @@ -566,8 +558,7 @@ "name": b"Eugene Janusov\n", }, "date": { - "negative_utc": None, - "offset": 600, + "offset_bytes": b"+1000", "timestamp": {"microseconds": 0, "seconds": 1377480558,}, }, "id": _x("5c98f559d034162de22d3ebeb95433e6f8885231"), @@ -1029,51 +1020,66 @@ ) +# Format: [ +# ( +# input1, +# expected_output1, +# ), +# ( +# input2, +# expected_output2, +# ), +# ... +# ] TS_DICTS = [ + # with current input dict format (offset_bytes) ( - {"timestamp": 12345, "offset": 0}, + {"timestamp": 12345, "offset_bytes": b"+0000"}, + {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",}, + ), + ( + {"timestamp": 12345, "offset_bytes": b"-0000"}, + {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"-0000",}, + ), + ( + {"timestamp": 12345, "offset_bytes": b"+0200"}, + {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0200",}, + ), + ( + {"timestamp": 12345, "offset_bytes": b"-0200"}, + {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"-0200",}, + ), + ( + {"timestamp": 12345, "offset_bytes": b"--700"}, + {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"--700",}, + ), + ( + {"timestamp": 12345, "offset_bytes": b"1234567"}, { "timestamp": {"seconds": 12345, "microseconds": 0}, - "offset": 0, - "negative_utc": False, - "offset_bytes": b"+0000", + "offset_bytes": b"1234567", }, ), + # with old-style input dicts (numeric offset + optional negative_utc): + ( + {"timestamp": 12345, "offset": 0}, + {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",}, + ), ( {"timestamp": 12345, "offset": 0, "negative_utc": False}, - { - "timestamp": {"seconds": 12345, "microseconds": 0}, - "offset": 0, - "negative_utc": False, - "offset_bytes": b"+0000", - }, + {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",}, ), ( {"timestamp": 12345, "offset": 0, "negative_utc": False}, - { - "timestamp": {"seconds": 12345, "microseconds": 0}, - "offset": 0, - "negative_utc": False, - "offset_bytes": b"+0000", - }, + {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",}, ), ( {"timestamp": 12345, "offset": 0, "negative_utc": None}, - { - "timestamp": {"seconds": 12345, "microseconds": 0}, - "offset": 0, - "negative_utc": False, - "offset_bytes": b"+0000", - }, + {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",}, ), ( {"timestamp": {"seconds": 12345}, "offset": 0, "negative_utc": None}, - { - "timestamp": {"seconds": 12345, "microseconds": 0}, - "offset": 0, - "negative_utc": False, - "offset_bytes": b"+0000", - }, + {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",}, ), ( { @@ -1081,12 +1087,7 @@ "offset": 0, "negative_utc": None, }, - { - "timestamp": {"seconds": 12345, "microseconds": 0}, - "offset": 0, - "negative_utc": False, - "offset_bytes": b"+0000", - }, + {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",}, ), ( { @@ -1096,28 +1097,16 @@ }, { "timestamp": {"seconds": 12345, "microseconds": 100}, - "offset": 0, - "negative_utc": False, "offset_bytes": b"+0000", }, ), ( {"timestamp": 12345, "offset": 0, "negative_utc": True}, - { - "timestamp": {"seconds": 12345, "microseconds": 0}, - "offset": 0, - "negative_utc": True, - "offset_bytes": b"-0000", - }, + {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"-0000",}, ), ( {"timestamp": 12345, "offset": 0, "negative_utc": None}, - { - "timestamp": {"seconds": 12345, "microseconds": 0}, - "offset": 0, - "negative_utc": False, - "offset_bytes": b"+0000", - }, + {"timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000",}, ), ] @@ -1172,8 +1161,6 @@ date = date.astimezone(tz).replace(microsecond=microsecond) assert TimestampWithTimezone.from_dict(date).to_dict() == { "timestamp": {"seconds": seconds, "microseconds": microsecond}, - "offset": offset, - "negative_utc": False, "offset_bytes": offset_bytes, } diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -450,53 +450,41 @@ def test_timestampwithtimezone(): ts = Timestamp(seconds=0, microseconds=0) - tstz = TimestampWithTimezone(timestamp=ts, offset=0, negative_utc=False) + tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"+0000") attr.validate(tstz) - assert tstz.negative_utc is False + assert tstz.offset == 0 assert tstz.offset_bytes == b"+0000" - tstz = TimestampWithTimezone(timestamp=ts, offset=10, negative_utc=False) + tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"+0010") attr.validate(tstz) + assert tstz.offset == 10 assert tstz.offset_bytes == b"+0010" - tstz = TimestampWithTimezone(timestamp=ts, offset=-10, negative_utc=False) + tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"-0010") attr.validate(tstz) + assert tstz.offset == -10 assert tstz.offset_bytes == b"-0010" - tstz = TimestampWithTimezone(timestamp=ts, offset=0, negative_utc=True) + tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"-0000") attr.validate(tstz) - assert tstz.negative_utc is True + assert tstz.offset == 0 assert tstz.offset_bytes == b"-0000" - tstz = TimestampWithTimezone(timestamp=ts, offset=-630, negative_utc=False) + tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"-1030") attr.validate(tstz) - assert tstz.negative_utc is False + assert tstz.offset == -630 assert tstz.offset_bytes == b"-1030" - tstz = TimestampWithTimezone(timestamp=ts, offset=800, negative_utc=False) + tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"+1320") attr.validate(tstz) - assert tstz.negative_utc is False + assert tstz.offset == 800 assert tstz.offset_bytes == b"+1320" with pytest.raises(AttributeTypeError): - TimestampWithTimezone( - timestamp=datetime.datetime.now(), offset=0, negative_utc=False - ) + TimestampWithTimezone(timestamp=datetime.datetime.now(), offset_bytes=b"+0000") with pytest.raises((AttributeTypeError, TypeError)): - TimestampWithTimezone(timestamp=ts, offset="0", negative_utc=False) - - with pytest.raises(AttributeTypeError): - TimestampWithTimezone(timestamp=ts, offset=1.0, negative_utc=False) - - with pytest.raises(AttributeTypeError): - TimestampWithTimezone(timestamp=ts, offset=1, negative_utc=0) - - with pytest.raises(ValueError): - TimestampWithTimezone(timestamp=ts, offset=1, negative_utc=True) - - with pytest.raises(ValueError): - TimestampWithTimezone(timestamp=ts, offset=-1, negative_utc=True) + TimestampWithTimezone(timestamp=ts, offset_bytes=0) def test_timestampwithtimezone_from_datetime(): @@ -505,9 +493,7 @@ date = datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=tz) tstz = TimestampWithTimezone.from_datetime(date) assert tstz == TimestampWithTimezone( - timestamp=Timestamp(seconds=1582810759, microseconds=0,), - offset=60, - negative_utc=False, + timestamp=Timestamp(seconds=1582810759, microseconds=0,), offset_bytes=b"+0100" ) # Typical case (close to epoch) @@ -515,7 +501,7 @@ date = datetime.datetime(1970, 1, 1, 1, 0, 5, tzinfo=tz) tstz = TimestampWithTimezone.from_datetime(date) assert tstz == TimestampWithTimezone( - timestamp=Timestamp(seconds=5, microseconds=0,), offset=60, negative_utc=False, + timestamp=Timestamp(seconds=5, microseconds=0,), offset_bytes=b"+0100" ) # non-integer number of seconds before UNIX epoch @@ -524,9 +510,7 @@ ) tstz = TimestampWithTimezone.from_datetime(date) assert tstz == TimestampWithTimezone( - timestamp=Timestamp(seconds=-1, microseconds=100000,), - offset=0, - negative_utc=False, + timestamp=Timestamp(seconds=-1, microseconds=100000,), offset_bytes=b"+0000" ) # non-integer number of seconds in both the timestamp and the offset @@ -534,9 +518,7 @@ date = datetime.datetime(1969, 12, 31, 23, 59, 59, 600000, tzinfo=tz) tstz = TimestampWithTimezone.from_datetime(date) assert tstz == TimestampWithTimezone( - timestamp=Timestamp(seconds=0, microseconds=200000,), - offset=0, - negative_utc=False, + timestamp=Timestamp(seconds=0, microseconds=200000,), offset_bytes=b"+0000" ) # timezone offset with non-integer number of seconds, for dates before epoch @@ -546,9 +528,7 @@ date = datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=tz) tstz = TimestampWithTimezone.from_datetime(date) assert tstz == TimestampWithTimezone( - timestamp=Timestamp(seconds=-1, microseconds=100000,), - offset=0, - negative_utc=False, + timestamp=Timestamp(seconds=-1, microseconds=100000,), offset_bytes=b"+0000" ) @@ -566,8 +546,7 @@ assert tstz == TimestampWithTimezone( timestamp=Timestamp(seconds=1582810759, microseconds=123456,), - offset=60, - negative_utc=False, + offset_bytes=b"+0100", ) @@ -577,9 +556,7 @@ tstz = TimestampWithTimezone.from_iso8601(date) assert tstz == TimestampWithTimezone( - timestamp=Timestamp(seconds=1582810759, microseconds=0,), - offset=0, - negative_utc=True, + timestamp=Timestamp(seconds=1582810759, microseconds=0,), offset_bytes=b"-0000" )