diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -885,6 +885,17 @@ return {"name": self.name, "version": self.version} +def normalize_discovery_date(value: Any) -> datetime.datetime: + if not isinstance(value, datetime.datetime): + raise TypeError("discovery_date must be a timezone-aware datetime.") + + if value.tzinfo is None: + raise ValueError("discovery_date must be a timezone-aware datetime.") + + # Normalize timezone to utc, and truncate microseconds to 0 + return value.astimezone(datetime.timezone.utc).replace(microsecond=0) + + @attr.s(frozen=True, slots=True) class RawExtrinsicMetadata(HashableObject, BaseModel): object_type: Final = "raw_extrinsic_metadata" @@ -893,7 +904,7 @@ target = attr.ib(type=ExtendedSWHID, validator=type_validator()) # source - discovery_date = attr.ib(type=datetime.datetime, validator=type_validator()) + discovery_date = attr.ib(type=datetime.datetime, converter=normalize_discovery_date) authority = attr.ib(type=MetadataAuthority, validator=type_validator()) fetcher = attr.ib(type=MetadataFetcher, validator=type_validator()) @@ -923,12 +934,6 @@ def compute_hash(self) -> bytes: return hash_to_bytes(raw_extrinsic_metadata_identifier(self.to_dict())) - @discovery_date.validator - def check_discovery_date(self, attribute, value): - """Checks the discovery_date has a timezone.""" - if value is not None and value.tzinfo is None: - raise ValueError("discovery_date must be a timezone-aware datetime.") - @origin.validator def check_origin(self, attribute, value): if value is None: diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -1140,3 +1140,39 @@ ), **_common_metadata_fields, ) + + +def test_metadata_normalize_discovery_date(): + fields_copy = {**_common_metadata_fields} + truncated_date = fields_copy.pop("discovery_date") + assert truncated_date.microsecond == 0 + + # Check for TypeError on disabled object type: we removed attrs_strict's + # type_validator + with pytest.raises(TypeError): + RawExtrinsicMetadata( + target=_content_swhid, discovery_date="not a datetime", **fields_copy + ) + + # Check for truncation to integral second + date_with_us = truncated_date.replace(microsecond=42) + md = RawExtrinsicMetadata( + target=_content_swhid, discovery_date=date_with_us, **fields_copy, + ) + + assert md.discovery_date == truncated_date + assert md.discovery_date.tzinfo == datetime.timezone.utc + + # Check that the timezone gets normalized. Timezones can be offset by a + # non-integral number of seconds, so we need to handle that. + timezone = datetime.timezone(offset=datetime.timedelta(hours=2)) + date_with_tz = truncated_date.astimezone(timezone) + + assert date_with_tz.tzinfo != datetime.timezone.utc + + md = RawExtrinsicMetadata( + target=_content_swhid, discovery_date=date_with_tz, **fields_copy, + ) + + assert md.discovery_date == truncated_date + assert md.discovery_date.tzinfo == datetime.timezone.utc