diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -49,6 +49,8 @@ SHA1_SIZE = 20 +_OFFSET_CHARS = frozenset(b"+-0123456789") + # TODO: Limit this to 20 bytes Sha1Git = bytes Sha1 = bytes @@ -325,6 +327,15 @@ offset = attr.ib(type=int, validator=type_validator()) negative_utc = attr.ib(type=bool, validator=type_validator()) + offset_bytes = attr.ib(type=bytes, validator=type_validator()) + """Raw git representation of the timezone, as an offset from UTC. + It should follow this format: ``+HHMM`` or ``-HHMM`` (including ``+0000`` and + ``-0000``). + + However, when created from git objects, it must be the exact bytes used in the + original objects, so it may differ from this format when they do. + """ + @offset.validator def check_offset(self, attribute, value): """Checks the offset is a 16-bits signed integer (in theory, it @@ -334,11 +345,47 @@ # you'll find in the wild... raise ValueError("offset too large: %d minutes" % value) + self._check_offsets_match() + @negative_utc.validator def check_negative_utc(self, attribute, value): if self.offset and value: raise ValueError("negative_utc can only be True is offset=0") + self._check_offsets_match() + + @offset_bytes.default + def _default_offset_bytes(self): + negative = self.offset < 0 or self.negative_utc + (hours, minutes) = divmod(abs(self.offset), 60) + return f"{'-' if negative else '+'}{hours:02}{minutes:02}".encode() + + @offset_bytes.validator + def check_offset_bytes(self, attribute, value): + if not set(value) <= _OFFSET_CHARS: + raise ValueError(f"invalid characters in offset_bytes: {value!r}") + + self._check_offsets_match() + + def _check_offsets_match(self): + offset_str = self.offset_bytes.decode() + assert offset_str[0] in "+-" + sign = int(offset_str[0] + "1") + hours = int(offset_str[1:-2]) + minutes = int(offset_str[-2:]) + offset = sign * (hours * 60 + minutes) + if offset != self.offset: + raise ValueError( + f"offset_bytes ({self.offset_bytes!r}) does not match offset " + f"{divmod(self.offset, 60)}" + ) + + if offset == 0 and self.negative_utc != self.offset_bytes.startswith(b"-"): + raise ValueError( + f"offset_bytes ({self.offset_bytes!r}) does not match negative_utc " + f"({self.negative_utc})" + ) + @classmethod def from_dict(cls, time_representation: Union[Dict, datetime.datetime, int]): """Builds a TimestampWithTimezone from any of the formats @@ -422,7 +469,8 @@ dt = iso8601.parse_date(s) tstz = cls.from_datetime(dt) if dt.tzname() == "-00:00": - tstz = attr.evolve(tstz, negative_utc=True) + assert tstz.offset_bytes == b"+0000" + tstz = attr.evolve(tstz, negative_utc=True, offset_bytes=b"-0000") return tstz diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -1046,6 +1046,7 @@ "timestamp": {"seconds": 12345, "microseconds": 0}, "offset": 0, "negative_utc": False, + "offset_bytes": b"+0000", }, ), ( @@ -1054,6 +1055,7 @@ "timestamp": {"seconds": 12345, "microseconds": 0}, "offset": 0, "negative_utc": False, + "offset_bytes": b"+0000", }, ), ( @@ -1062,6 +1064,7 @@ "timestamp": {"seconds": 12345, "microseconds": 0}, "offset": 0, "negative_utc": False, + "offset_bytes": b"+0000", }, ), ( @@ -1070,6 +1073,7 @@ "timestamp": {"seconds": 12345, "microseconds": 0}, "offset": 0, "negative_utc": False, + "offset_bytes": b"+0000", }, ), ( @@ -1078,6 +1082,7 @@ "timestamp": {"seconds": 12345, "microseconds": 0}, "offset": 0, "negative_utc": False, + "offset_bytes": b"+0000", }, ), ( @@ -1090,6 +1095,7 @@ "timestamp": {"seconds": 12345, "microseconds": 0}, "offset": 0, "negative_utc": False, + "offset_bytes": b"+0000", }, ), ( @@ -1102,6 +1108,7 @@ "timestamp": {"seconds": 12345, "microseconds": 100}, "offset": 0, "negative_utc": False, + "offset_bytes": b"+0000", }, ), ( @@ -1110,6 +1117,7 @@ "timestamp": {"seconds": 12345, "microseconds": 0}, "offset": 0, "negative_utc": True, + "offset_bytes": b"-0000", }, ), ( @@ -1118,6 +1126,7 @@ "timestamp": {"seconds": 12345, "microseconds": 0}, "offset": 0, "negative_utc": False, + "offset_bytes": b"+0000", }, ), ] @@ -1153,6 +1162,7 @@ datetime.timezone.max, ] TS_TZ_EXPECTED = [-1439, -60, 0, 60, 1439] +TS_TZ_BYTES_EXPECTED = [b"-2359", b"-0100", b"+0000", b"+0100", b"+2359"] TS_DATETIMES = [ datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=UTC), datetime.datetime(2120, 12, 31, 23, 59, 59, tzinfo=UTC), @@ -1162,14 +1172,19 @@ @pytest.mark.parametrize("date, seconds", zip(TS_DATETIMES, TS_DT_EXPECTED)) -@pytest.mark.parametrize("tz, offset", zip(TS_TIMEZONES, TS_TZ_EXPECTED)) +@pytest.mark.parametrize( + "tz, offset, offset_bytes", zip(TS_TIMEZONES, TS_TZ_EXPECTED, TS_TZ_BYTES_EXPECTED) +) @pytest.mark.parametrize("microsecond", [0, 1, 10, 100, 1000, 999999]) -def test_normalize_timestamp_datetime(date, seconds, tz, offset, microsecond): +def test_normalize_timestamp_datetime( + date, seconds, tz, offset, offset_bytes, microsecond +): date = date.astimezone(tz).replace(microsecond=microsecond) assert TimestampWithTimezone.from_dict(date).to_dict() == { "timestamp": {"seconds": seconds, "microseconds": microsecond}, "offset": offset, "negative_utc": False, + "offset_bytes": offset_bytes, } diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -447,21 +447,27 @@ tstz = TimestampWithTimezone(timestamp=ts, offset=0, negative_utc=False) attr.validate(tstz) assert tstz.negative_utc is False + assert tstz.offset_bytes == b"+0000" - attr.validate(TimestampWithTimezone(timestamp=ts, offset=10, negative_utc=False)) + tstz = TimestampWithTimezone(timestamp=ts, offset=10, negative_utc=False) + attr.validate(tstz) + assert tstz.offset_bytes == b"+0010" - attr.validate(TimestampWithTimezone(timestamp=ts, offset=-10, negative_utc=False)) + tstz = TimestampWithTimezone(timestamp=ts, offset=-10, negative_utc=False) + attr.validate(tstz) + assert tstz.offset_bytes == b"-0010" tstz = TimestampWithTimezone(timestamp=ts, offset=0, negative_utc=True) attr.validate(tstz) assert tstz.negative_utc is True + assert tstz.offset_bytes == b"-0000" with pytest.raises(AttributeTypeError): TimestampWithTimezone( timestamp=datetime.datetime.now(), offset=0, negative_utc=False ) - with pytest.raises(AttributeTypeError): + with pytest.raises((AttributeTypeError, TypeError)): TimestampWithTimezone(timestamp=ts, offset="0", negative_utc=False) with pytest.raises(AttributeTypeError):