Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9345481
D5008.id18597.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D5008.id18597.diff
View Options
diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -759,9 +759,9 @@
$ExtendedSwhid is a core SWHID, with extra types allowed ('ori' for
origins and 'emd' for raw extrinsic metadata)
- $Timestamp is a decimal representation of the integer number of seconds since
- the UNIX epoch (1970-01-01 00:00:00 UTC), with no leading '0'
- (unless the timestamp value is zero) and no timezone.
+ $Timestamp is a decimal representation of the rounded-down integer number of
+ seconds since the UNIX epoch (1970-01-01 00:00:00 UTC),
+ with no leading '0' (unless the timestamp value is zero) and no timezone.
It may be negative by prefixing it with a '-', which must not be followed
by a '0'.
@@ -772,7 +772,19 @@
str: the intrinsic identifier for `metadata`
"""
- timestamp = metadata["discovery_date"].timestamp()
+ # equivalent to using math.floor(dt.timestamp()) to round down,
+ # as int(dt.timestamp()) rounds toward zero,
+ # which would map two seconds on the 0 timestamp.
+ #
+ # This should never be an issue in practice as Software Heritage didn't
+ # start collecting metadata before 2015.
+ timestamp = (
+ metadata["discovery_date"]
+ .astimezone(datetime.timezone.utc)
+ .replace(microsecond=0)
+ .timestamp()
+ )
+ assert timestamp.is_integer()
headers = [
(b"target", str(metadata["target"]).encode()),
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -920,6 +920,26 @@
"5c13f20ba336e44549baf3d7b9305b027ec9f43d",
)
+ def test_noninteger_timezone(self):
+ """Checks the discovery_date is translated to UTC before truncating
+ microseconds"""
+ tz = datetime.timezone(datetime.timedelta(microseconds=-42))
+ metadata = {
+ **self.minimal,
+ "discovery_date": datetime.datetime(
+ 2021, 1, 25, 11, 27, 50, 1_000_000 - 42, tzinfo=tz,
+ ),
+ }
+
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(self.minimal),
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ )
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+ )
+
def test_negative_timestamp(self):
metadata = {
**self.minimal,
@@ -948,6 +968,62 @@
"895d0821a2991dd376ddc303424aceb7c68280f9",
)
+ def test_epoch(self):
+ metadata = {
+ **self.minimal,
+ "discovery_date": datetime.datetime(
+ 1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc,
+ ),
+ }
+
+ manifest = (
+ b"raw_extrinsic_metadata 201\0"
+ b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
+ b"discovery_date 0\n"
+ b"authority forge https://forge.softwareheritage.org/\n"
+ b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
+ b"format json\n"
+ b"\n"
+ b'{"foo": "bar"}'
+ )
+
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ hashlib.sha1(manifest).hexdigest(),
+ )
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ "27a53df54ace35ebd910493cdc70b334d6b7cb88",
+ )
+
+ def test_negative_epoch(self):
+ metadata = {
+ **self.minimal,
+ "discovery_date": datetime.datetime(
+ 1969, 12, 31, 23, 59, 59, 1, tzinfo=datetime.timezone.utc,
+ ),
+ }
+
+ manifest = (
+ b"raw_extrinsic_metadata 202\0"
+ b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
+ b"discovery_date -1\n"
+ b"authority forge https://forge.softwareheritage.org/\n"
+ b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
+ b"format json\n"
+ b"\n"
+ b'{"foo": "bar"}'
+ )
+
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ hashlib.sha1(manifest).hexdigest(),
+ )
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ "be7154a8fd49d87f81547ea634d1e2152907d089",
+ )
+
origin_example = {
"url": "https://github.com/torvalds/linux",
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 3:22 PM (1 w, 2 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224631
Attached To
D5008: identifiers: Properly define the behavior of raw_extrinsic_metadata on negative timestamps.
Event Timeline
Log In to Comment