diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -379,12 +379,11 @@ @attr.s(frozen=True, slots=True) -class TimestampWithTimezone(BaseModel): +class _TimestampWithTimezone(BaseModel): """Represents a TZ-aware timestamp from a VCS.""" - object_type: Final = "timestamp_with_timezone" - timestamp = attr.ib(type=Timestamp, validator=type_validator()) + offset = attr.ib(type=int, validator=type_validator()) negative_utc = attr.ib(type=bool, validator=type_validator()) @@ -415,12 +414,6 @@ self._check_offsets_match() - @offset_bytes.default - def _default_offset_bytes(self): - negative = self.offset < 0 or self.negative_utc - (hours, minutes) = divmod(abs(self.offset), 60) - return f"{'-' if negative else '+'}{hours:02}{minutes:02}".encode() - @offset_bytes.validator def check_offset_bytes(self, attribute, value): if not set(value) <= _OFFSET_CHARS: @@ -428,13 +421,18 @@ self._check_offsets_match() - def _check_offsets_match(self): - offset_str = self.offset_bytes.decode() + @staticmethod + def _parse_offset_bytes(offset_bytes: bytes): + offset_str = offset_bytes.decode() assert offset_str[0] in "+-" sign = int(offset_str[0] + "1") hours = int(offset_str[1:-2]) minutes = int(offset_str[-2:]) offset = sign * (hours * 60 + minutes) + return offset + + def _check_offsets_match(self): + offset = self._parse_offset_bytes(self.offset_bytes) if offset != self.offset: raise ValueError( f"offset_bytes ({self.offset_bytes!r}) does not match offset " @@ -448,13 +446,32 @@ ) @classmethod - def from_dict(cls, time_representation: Union[Dict, datetime.datetime, int]): + def from_numeric_offset( + cls, timestamp: Timestamp, offset: int, negative_utc: bool + ) -> "TimestampWithTimezone": + """Returns a :class:`TimestampWithTimezone` instance from the old dictionary + format (with ``offset`` and ``negative_utc`` instead of ``offset_bytes``). + """ + negative = offset < 0 or negative_utc + (hours, minutes) = divmod(abs(offset), 60) + offset_bytes = f"{'-' if negative else '+'}{hours:02}{minutes:02}".encode() + tstz = TimestampWithTimezone( + timestamp=timestamp, + offset_bytes=offset_bytes, + offset=offset, + negative_utc=negative_utc, + ) + assert tstz.offset == offset, (tstz.offset, offset) + return tstz + + @classmethod + def from_dict( + cls, time_representation: Union[Dict, datetime.datetime, int] + ) -> "TimestampWithTimezone": """Builds a TimestampWithTimezone from any of the formats accepted by :func:`swh.model.normalize_timestamp`.""" # TODO: this accept way more types than just dicts; find a better # name - negative_utc = False - if isinstance(time_representation, dict): ts = time_representation["timestamp"] if isinstance(ts, dict): @@ -468,12 +485,21 @@ f"TimestampWithTimezone.from_dict received non-integer timestamp " f"member {ts!r}" ) - offset = time_representation["offset"] - if "negative_utc" in time_representation: - negative_utc = time_representation["negative_utc"] - if negative_utc is None: - negative_utc = False + + timestamp = Timestamp(seconds=seconds, microseconds=microseconds) + + if "offset_bytes" in time_representation: + return TimestampWithTimezone( + timestamp=timestamp, + offset_bytes=time_representation["offset_bytes"], + ) + else: + # old format + offset = time_representation["offset"] + negative_utc = time_representation.get("negative_utc") or False + return cls.from_numeric_offset(timestamp, offset, negative_utc) elif isinstance(time_representation, datetime.datetime): + # TODO: warn when using from_dict() on a datetime utcoffset = time_representation.utcoffset() time_representation = time_representation.astimezone(datetime.timezone.utc) microseconds = time_representation.microsecond @@ -489,33 +515,30 @@ # utcoffset is an integer number of minutes seconds_offset = utcoffset.total_seconds() offset = int(seconds_offset) // 60 + # TODO: warn if remainder is not zero + return cls.from_numeric_offset( + Timestamp(seconds=seconds, microseconds=microseconds), offset, False + ) elif isinstance(time_representation, int): + # TODO: warn when using from_dict() on an int seconds = time_representation - microseconds = 0 - offset = 0 + timestamp = Timestamp(seconds=time_representation, microseconds=0) + return TimestampWithTimezone(timestamp=timestamp, offset_bytes=b"+0000") else: raise ValueError( f"TimestampWithTimezone.from_dict received non-integer timestamp: " f"{time_representation!r}" ) - return cls( - timestamp=Timestamp(seconds=seconds, microseconds=microseconds), - offset=offset, - negative_utc=negative_utc, - ) - @classmethod - def from_datetime(cls, dt: datetime.datetime): + def from_datetime(cls, dt: datetime.datetime) -> "TimestampWithTimezone": return cls.from_dict(dt) def to_datetime(self) -> datetime.datetime: """Convert to a datetime (with a timezone set to the recorded fixed UTC offset) - Beware that this conversion can be lossy: the negative_utc flag is not - taken into consideration (since it cannot be represented in a - datetime). Also note that it may fail due to type overflow. - + Beware that this conversion can be lossy: ``-0000`` and 'weird' offsets + cannot be represented. Also note that it may fail due to type overflow. """ timestamp = datetime.datetime.fromtimestamp( self.timestamp.seconds, @@ -532,10 +555,44 @@ tstz = cls.from_datetime(dt) if dt.tzname() == "-00:00": assert tstz.offset_bytes == b"+0000" - tstz = attr.evolve(tstz, negative_utc=True, offset_bytes=b"-0000") + tstz = attr.evolve(tstz, offset_bytes=b"-0000", negative_utc=True) return tstz +# Temporary hack to support both (offset, negative_utc) and offset_bytes being optional, +# without __attrs_init__ (depends on attrs >= 21.1.0) +class TimestampWithTimezone(_TimestampWithTimezone): + object_type: Final = "timestamp_with_timezone" + + def __init__( + self, + timestamp: Timestamp, + offset: int = None, + negative_utc: bool = None, + offset_bytes: bytes = None, + ): + if offset_bytes is None: + if offset is None: + raise AttributeError("Neither 'offset' nor 'offset_bytes' was passed.") + if negative_utc is None: + raise AttributeError("Neither 'offset' nor 'offset_bytes' was passed.") + negative = offset < 0 or negative_utc + (hours, minutes) = divmod(abs(offset), 60) + offset_bytes = f"{'-' if negative else '+'}{hours:02}{minutes:02}".encode() + else: + offset = self._parse_offset_bytes(offset_bytes) + negative_utc = offset == 0 and offset_bytes.startswith(b"-") + + assert offset is not None and negative_utc is not None # thanks mypy... + + super().__init__( + timestamp=timestamp, + offset=offset, + negative_utc=negative_utc, + offset_bytes=offset_bytes, + ) + + @attr.s(frozen=True, slots=True) class Origin(HashableObject, BaseModel): """Represents a software source: a VCS and an URL.""" diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py --- a/swh/model/tests/swh_model_data.py +++ b/swh/model/tests/swh_model_data.py @@ -91,14 +91,10 @@ DATES = [ TimestampWithTimezone( - timestamp=Timestamp(seconds=1234567891, microseconds=0,), - offset=120, - negative_utc=False, + timestamp=Timestamp(seconds=1234567891, microseconds=0,), offset_bytes=b"+0200", ), TimestampWithTimezone( - timestamp=Timestamp(seconds=1234567892, microseconds=0,), - offset=120, - negative_utc=False, + timestamp=Timestamp(seconds=1234567892, microseconds=0,), offset_bytes=b"+0200", ), ] @@ -152,8 +148,7 @@ name=b"v0.0.1", date=TimestampWithTimezone( timestamp=Timestamp(seconds=1234567890, microseconds=0,), - offset=120, - negative_utc=False, + offset_bytes=b"+0200", ), author=COMMITTERS[0], target_type=ObjectType.REVISION, diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -21,6 +21,7 @@ Release, Revision, Snapshot, + Timestamp, TimestampWithTimezone, ) @@ -279,11 +280,7 @@ "name": b"Software Heritage", "email": b"robot@softwareheritage.org", }, - "date": { - "timestamp": {"seconds": 1437047495}, - "offset": 0, - "negative_utc": False, - }, + "date": {"timestamp": {"seconds": 1437047495}, "offset_bytes": b"+0000",}, "type": "tar", "committer": { "name": b"Software Heritage", @@ -549,11 +546,7 @@ "name": b"20081029", "target": _x("54e9abca4c77421e2921f5f156c9fe4a9f7441c7"), "target_type": "revision", - "date": { - "timestamp": {"seconds": 1225281976}, - "offset": 0, - "negative_utc": True, - }, + "date": {"timestamp": {"seconds": 1225281976}, "offset_bytes": b"-0000",}, "author": {"name": b"Otavio Salvador", "email": b"otavio@debian.org",}, "synthetic": False, "message": b"tagging version 20081029\n\nr56558\n", @@ -566,8 +559,7 @@ "name": b"Eugene Janusov\n", }, "date": { - "negative_utc": None, - "offset": 600, + "offset_bytes": b"+1000", "timestamp": {"microseconds": 0, "seconds": 1377480558,}, }, "id": _x("5c98f559d034162de22d3ebeb95433e6f8885231"), @@ -1029,50 +1021,118 @@ ) +# Format: [ +# ( +# input1, +# expected_output1, +# ), +# ( +# input2, +# expected_output2, +# ), +# ... +# ] TS_DICTS = [ + # with current input dict format (offset_bytes) ( - {"timestamp": 12345, "offset": 0}, + {"timestamp": 12345, "offset_bytes": b"+0000"}, { "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"+0000", "offset": 0, "negative_utc": False, - "offset_bytes": b"+0000", }, ), ( - {"timestamp": 12345, "offset": 0, "negative_utc": False}, + {"timestamp": 12345, "offset_bytes": b"-0000"}, { "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"-0000", "offset": 0, + "negative_utc": True, + }, + ), + ( + {"timestamp": 12345, "offset_bytes": b"+0200"}, + { + "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"+0200", + "offset": 120, "negative_utc": False, + }, + ), + ( + {"timestamp": 12345, "offset_bytes": b"-0200"}, + { + "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"-0200", + "offset": -120, + "negative_utc": False, + }, + ), + # not working yet: + # ( + # {"timestamp": 12345, "offset_bytes": b"--700"}, + # { + # "timestamp": {"seconds": 12345, "microseconds": 0}, + # "offset_bytes": b"--700", + # "offset": 0, + # "negative_utc": False, + # }, + # ), + # ( + # {"timestamp": 12345, "offset_bytes": b"1234567"}, + # { + # "timestamp": {"seconds": 12345, "microseconds": 0}, + # "offset_bytes": b"1234567", + # "offset": 0, + # "negative_utc": False, + # }, + # ), + # with old-style input dicts (numeric offset + optional negative_utc): + ( + {"timestamp": 12345, "offset": 0}, + { + "timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000", + "offset": 0, + "negative_utc": False, }, ), ( {"timestamp": 12345, "offset": 0, "negative_utc": False}, { "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"+0000", "offset": 0, "negative_utc": False, + }, + ), + ( + {"timestamp": 12345, "offset": 0, "negative_utc": False}, + { + "timestamp": {"seconds": 12345, "microseconds": 0}, "offset_bytes": b"+0000", + "offset": 0, + "negative_utc": False, }, ), ( {"timestamp": 12345, "offset": 0, "negative_utc": None}, { "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"+0000", "offset": 0, "negative_utc": False, - "offset_bytes": b"+0000", }, ), ( {"timestamp": {"seconds": 12345}, "offset": 0, "negative_utc": None}, { "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"+0000", "offset": 0, "negative_utc": False, - "offset_bytes": b"+0000", }, ), ( @@ -1083,9 +1143,9 @@ }, { "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"+0000", "offset": 0, "negative_utc": False, - "offset_bytes": b"+0000", }, ), ( @@ -1096,27 +1156,27 @@ }, { "timestamp": {"seconds": 12345, "microseconds": 100}, + "offset_bytes": b"+0000", "offset": 0, "negative_utc": False, - "offset_bytes": b"+0000", }, ), ( {"timestamp": 12345, "offset": 0, "negative_utc": True}, { "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"-0000", "offset": 0, "negative_utc": True, - "offset_bytes": b"-0000", }, ), ( {"timestamp": 12345, "offset": 0, "negative_utc": None}, { "timestamp": {"seconds": 12345, "microseconds": 0}, + "offset_bytes": b"+0000", "offset": 0, "negative_utc": False, - "offset_bytes": b"+0000", }, ), ] @@ -1127,6 +1187,35 @@ assert TimestampWithTimezone.from_dict(dict_input).to_dict() == expected +def test_timestampwithtimezone_init(): + ts = Timestamp(seconds=1234567, microseconds=0) + tstz = TimestampWithTimezone( + timestamp=ts, offset=120, negative_utc=False, offset_bytes=b"+0200" + ) + assert tstz.timestamp == ts + assert tstz.offset == 120 + assert tstz.negative_utc is False + assert tstz.offset_bytes == b"+0200" + + assert tstz == TimestampWithTimezone(timestamp=ts, offset=120, negative_utc=False) + assert tstz == TimestampWithTimezone(timestamp=ts, offset_bytes=b"+0200") + + assert tstz != TimestampWithTimezone(timestamp=ts, offset_bytes=b"+0100") + + tstz = TimestampWithTimezone( + timestamp=ts, offset=0, negative_utc=True, offset_bytes=b"-0000" + ) + assert tstz.timestamp == ts + assert tstz.offset == 0 + assert tstz.negative_utc is True + assert tstz.offset_bytes == b"-0000" + + assert tstz == TimestampWithTimezone(timestamp=ts, offset=0, negative_utc=True) + assert tstz == TimestampWithTimezone(timestamp=ts, offset_bytes=b"-0000") + + assert tstz != TimestampWithTimezone(timestamp=ts, offset_bytes=b"+0000") + + TS_DICTS_INVALID_TIMESTAMP = [ {"timestamp": 1.2, "offset": 0}, {"timestamp": "1", "offset": 0}, @@ -1172,9 +1261,9 @@ date = date.astimezone(tz).replace(microsecond=microsecond) assert TimestampWithTimezone.from_dict(date).to_dict() == { "timestamp": {"seconds": seconds, "microseconds": microsecond}, + "offset_bytes": offset_bytes, "offset": offset, "negative_utc": False, - "offset_bytes": offset_bytes, } diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -450,53 +450,41 @@ def test_timestampwithtimezone(): ts = Timestamp(seconds=0, microseconds=0) - tstz = TimestampWithTimezone(timestamp=ts, offset=0, negative_utc=False) + tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"+0000") attr.validate(tstz) - assert tstz.negative_utc is False + assert tstz.offset == 0 assert tstz.offset_bytes == b"+0000" - tstz = TimestampWithTimezone(timestamp=ts, offset=10, negative_utc=False) + tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"+0010") attr.validate(tstz) + assert tstz.offset == 10 assert tstz.offset_bytes == b"+0010" - tstz = TimestampWithTimezone(timestamp=ts, offset=-10, negative_utc=False) + tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"-0010") attr.validate(tstz) + assert tstz.offset == -10 assert tstz.offset_bytes == b"-0010" - tstz = TimestampWithTimezone(timestamp=ts, offset=0, negative_utc=True) + tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"-0000") attr.validate(tstz) - assert tstz.negative_utc is True + assert tstz.offset == 0 assert tstz.offset_bytes == b"-0000" - tstz = TimestampWithTimezone(timestamp=ts, offset=-630, negative_utc=False) + tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"-1030") attr.validate(tstz) - assert tstz.negative_utc is False + assert tstz.offset == -630 assert tstz.offset_bytes == b"-1030" - tstz = TimestampWithTimezone(timestamp=ts, offset=800, negative_utc=False) + tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"+1320") attr.validate(tstz) - assert tstz.negative_utc is False + assert tstz.offset == 800 assert tstz.offset_bytes == b"+1320" with pytest.raises(AttributeTypeError): - TimestampWithTimezone( - timestamp=datetime.datetime.now(), offset=0, negative_utc=False - ) - - with pytest.raises((AttributeTypeError, TypeError)): - TimestampWithTimezone(timestamp=ts, offset="0", negative_utc=False) + TimestampWithTimezone(timestamp=datetime.datetime.now(), offset_bytes=b"+0000") - with pytest.raises(AttributeTypeError): - TimestampWithTimezone(timestamp=ts, offset=1.0, negative_utc=False) - - with pytest.raises(AttributeTypeError): - TimestampWithTimezone(timestamp=ts, offset=1, negative_utc=0) - - with pytest.raises(ValueError): - TimestampWithTimezone(timestamp=ts, offset=1, negative_utc=True) - - with pytest.raises(ValueError): - TimestampWithTimezone(timestamp=ts, offset=-1, negative_utc=True) + with pytest.raises((AttributeTypeError, AttributeError, TypeError)): + TimestampWithTimezone(timestamp=ts, offset_bytes=0) def test_timestampwithtimezone_from_datetime(): @@ -505,9 +493,7 @@ date = datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=tz) tstz = TimestampWithTimezone.from_datetime(date) assert tstz == TimestampWithTimezone( - timestamp=Timestamp(seconds=1582810759, microseconds=0,), - offset=60, - negative_utc=False, + timestamp=Timestamp(seconds=1582810759, microseconds=0,), offset_bytes=b"+0100" ) # Typical case (close to epoch) @@ -515,7 +501,7 @@ date = datetime.datetime(1970, 1, 1, 1, 0, 5, tzinfo=tz) tstz = TimestampWithTimezone.from_datetime(date) assert tstz == TimestampWithTimezone( - timestamp=Timestamp(seconds=5, microseconds=0,), offset=60, negative_utc=False, + timestamp=Timestamp(seconds=5, microseconds=0,), offset_bytes=b"+0100" ) # non-integer number of seconds before UNIX epoch @@ -524,9 +510,7 @@ ) tstz = TimestampWithTimezone.from_datetime(date) assert tstz == TimestampWithTimezone( - timestamp=Timestamp(seconds=-1, microseconds=100000,), - offset=0, - negative_utc=False, + timestamp=Timestamp(seconds=-1, microseconds=100000,), offset_bytes=b"+0000" ) # non-integer number of seconds in both the timestamp and the offset @@ -534,9 +518,7 @@ date = datetime.datetime(1969, 12, 31, 23, 59, 59, 600000, tzinfo=tz) tstz = TimestampWithTimezone.from_datetime(date) assert tstz == TimestampWithTimezone( - timestamp=Timestamp(seconds=0, microseconds=200000,), - offset=0, - negative_utc=False, + timestamp=Timestamp(seconds=0, microseconds=200000,), offset_bytes=b"+0000" ) # timezone offset with non-integer number of seconds, for dates before epoch @@ -546,9 +528,7 @@ date = datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=tz) tstz = TimestampWithTimezone.from_datetime(date) assert tstz == TimestampWithTimezone( - timestamp=Timestamp(seconds=-1, microseconds=100000,), - offset=0, - negative_utc=False, + timestamp=Timestamp(seconds=-1, microseconds=100000,), offset_bytes=b"+0000" ) @@ -566,8 +546,7 @@ assert tstz == TimestampWithTimezone( timestamp=Timestamp(seconds=1582810759, microseconds=123456,), - offset=60, - negative_utc=False, + offset_bytes=b"+0100", ) @@ -577,9 +556,7 @@ tstz = TimestampWithTimezone.from_iso8601(date) assert tstz == TimestampWithTimezone( - timestamp=Timestamp(seconds=1582810759, microseconds=0,), - offset=0, - negative_utc=True, + timestamp=Timestamp(seconds=1582810759, microseconds=0,), offset_bytes=b"-0000" )