Page MenuHomeSoftware Heritage

D5008.id18597.diff
No OneTemporary

D5008.id18597.diff

diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -759,9 +759,9 @@
$ExtendedSwhid is a core SWHID, with extra types allowed ('ori' for
origins and 'emd' for raw extrinsic metadata)
- $Timestamp is a decimal representation of the integer number of seconds since
- the UNIX epoch (1970-01-01 00:00:00 UTC), with no leading '0'
- (unless the timestamp value is zero) and no timezone.
+ $Timestamp is a decimal representation of the rounded-down integer number of
+ seconds since the UNIX epoch (1970-01-01 00:00:00 UTC),
+ with no leading '0' (unless the timestamp value is zero) and no timezone.
It may be negative by prefixing it with a '-', which must not be followed
by a '0'.
@@ -772,7 +772,19 @@
str: the intrinsic identifier for `metadata`
"""
- timestamp = metadata["discovery_date"].timestamp()
+ # equivalent to using math.floor(dt.timestamp()) to round down,
+ # as int(dt.timestamp()) rounds toward zero,
+ # which would map two seconds on the 0 timestamp.
+ #
+ # This should never be an issue in practice as Software Heritage didn't
+ # start collecting metadata before 2015.
+ timestamp = (
+ metadata["discovery_date"]
+ .astimezone(datetime.timezone.utc)
+ .replace(microsecond=0)
+ .timestamp()
+ )
+ assert timestamp.is_integer()
headers = [
(b"target", str(metadata["target"]).encode()),
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -920,6 +920,26 @@
"5c13f20ba336e44549baf3d7b9305b027ec9f43d",
)
+ def test_noninteger_timezone(self):
+ """Checks the discovery_date is translated to UTC before truncating
+ microseconds"""
+ tz = datetime.timezone(datetime.timedelta(microseconds=-42))
+ metadata = {
+ **self.minimal,
+ "discovery_date": datetime.datetime(
+ 2021, 1, 25, 11, 27, 50, 1_000_000 - 42, tzinfo=tz,
+ ),
+ }
+
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(self.minimal),
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ )
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+ )
+
def test_negative_timestamp(self):
metadata = {
**self.minimal,
@@ -948,6 +968,62 @@
"895d0821a2991dd376ddc303424aceb7c68280f9",
)
+ def test_epoch(self):
+ metadata = {
+ **self.minimal,
+ "discovery_date": datetime.datetime(
+ 1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc,
+ ),
+ }
+
+ manifest = (
+ b"raw_extrinsic_metadata 201\0"
+ b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
+ b"discovery_date 0\n"
+ b"authority forge https://forge.softwareheritage.org/\n"
+ b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
+ b"format json\n"
+ b"\n"
+ b'{"foo": "bar"}'
+ )
+
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ hashlib.sha1(manifest).hexdigest(),
+ )
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ "27a53df54ace35ebd910493cdc70b334d6b7cb88",
+ )
+
+ def test_negative_epoch(self):
+ metadata = {
+ **self.minimal,
+ "discovery_date": datetime.datetime(
+ 1969, 12, 31, 23, 59, 59, 1, tzinfo=datetime.timezone.utc,
+ ),
+ }
+
+ manifest = (
+ b"raw_extrinsic_metadata 202\0"
+ b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
+ b"discovery_date -1\n"
+ b"authority forge https://forge.softwareheritage.org/\n"
+ b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
+ b"format json\n"
+ b"\n"
+ b'{"foo": "bar"}'
+ )
+
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ hashlib.sha1(manifest).hexdigest(),
+ )
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ "be7154a8fd49d87f81547ea634d1e2152907d089",
+ )
+
origin_example = {
"url": "https://github.com/torvalds/linux",

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 3:22 PM (1 w, 9 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224631

Event Timeline