diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -712,9 +712,9 @@ $Swhid are core SWHIDs, as defined in :ref:`persistent-identifiers`. - $Timestamp is a decimal representation of the integer number of seconds since - the UNIX epoch (1970-01-01 00:00:00 UTC), with no leading '0' - (unless the timestamp value is zero) and no timezone. + $Timestamp is a decimal representation of the rounded-down integer number of + seconds since the UNIX epoch (1970-01-01 00:00:00 UTC), + with no leading '0' (unless the timestamp value is zero) and no timezone. It may be negative by prefixing it with a '-', which must not be followed by a '0'. @@ -725,7 +725,14 @@ str: the intrinsic identifier for `metadata` """ - timestamp = metadata["discovery_date"].timestamp() + # equivalent to using math.floor(dt.timestamp()) to round down, + # as int(dt.timestamp()) rounds toward zero, + # which would map two seconds on the 0 timestamp. + # + # This should never be an issue in practice as Software Heritage didn't + # start collecting metadata before 2015. + timestamp = metadata["discovery_date"].replace(microsecond=0).timestamp() + assert timestamp.is_integer() headers = [ (b"target_type", metadata["type"].encode("ascii")), diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -948,6 +948,64 @@ "02e58aa3e7d476f6fc174669f9f4b88d56f534fa", ) + def test_epoch(self): + metadata = { + **self.minimal, + "discovery_date": datetime.datetime( + 1970, 1, 1, 0, 00, 0, tzinfo=datetime.timezone.utc, + ), + } + + manifest = ( + b"raw_extrinsic_metadata 221\0" + b"target_type content\n" + b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n" + b"discovery_date 0\n" + b"authority forge https://forge.softwareheritage.org/\n" + b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n" + b"format json\n" + b"\n" + b'{"foo": "bar"}' + ) + + self.assertEqual( + identifiers.raw_extrinsic_metadata_identifier(metadata), + hashlib.sha1(manifest).hexdigest(), + ) + self.assertEqual( + identifiers.raw_extrinsic_metadata_identifier(metadata), + "6d97665ce05e97d67dd1be93a105b0a5006bb7ca", + ) + + def test_negative_epoch(self): + metadata = { + **self.minimal, + "discovery_date": datetime.datetime( + 1969, 12, 31, 23, 59, 59, 1, tzinfo=datetime.timezone.utc, + ), + } + + manifest = ( + b"raw_extrinsic_metadata 222\0" + b"target_type content\n" + b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n" + b"discovery_date -1\n" + b"authority forge https://forge.softwareheritage.org/\n" + b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n" + b"format json\n" + b"\n" + b'{"foo": "bar"}' + ) + + self.assertEqual( + identifiers.raw_extrinsic_metadata_identifier(metadata), + hashlib.sha1(manifest).hexdigest(), + ) + self.assertEqual( + identifiers.raw_extrinsic_metadata_identifier(metadata), + "db6d842c2e9230d7cf33d7fe36d8801377700e60", + ) + class OriginIdentifier(unittest.TestCase): def setUp(self):