Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9345499
D5004.id18554.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D5004.id18554.diff
View Options
diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -759,6 +759,12 @@
$ExtendedSwhid is a core SWHID, with extra types allowed ('ori' for
origins and 'emd' for raw extrinsic metadata)
+ $Timestamp is a decimal representation of the integer number of seconds since
+ the UNIX epoch (1970-01-01 00:00:00 UTC), with no leading '0'
+ (unless the timestamp value is zero) and no timezone.
+ It may be negative by prefixing it with a '-', which must not be followed
+ by a '0'.
+
Newlines in $Bytes, $Str, and $Iri are escaped as with other git fields,
ie. by adding a space after them.
@@ -766,9 +772,11 @@
str: the intrinsic identifier for `metadata`
"""
+ timestamp = metadata["discovery_date"].timestamp()
+
headers = [
(b"target", str(metadata["target"]).encode()),
- (b"discovery_date", metadata["discovery_date"].isoformat().encode("ascii")),
+ (b"discovery_date", str(int(timestamp)).encode("ascii")),
(
b"authority",
f"{metadata['authority']['type']} {metadata['authority']['url']}".encode(),
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -807,9 +807,9 @@
def test_minimal(self):
manifest = (
- b"raw_extrinsic_metadata 225\0"
+ b"raw_extrinsic_metadata 210\0"
b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
- b"discovery_date 2021-01-25T11:27:51+00:00\n"
+ b"discovery_date 1611574071\n"
b"authority forge https://forge.softwareheritage.org/\n"
b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
b"format json\n"
@@ -823,14 +823,14 @@
)
self.assertEqual(
identifiers.raw_extrinsic_metadata_identifier(self.minimal),
- "df16b5ea35b12f530fb7ecd0eb10b87a8b1fc3d2",
+ "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
)
def test_maximal(self):
manifest = (
- b"raw_extrinsic_metadata 548\0"
+ b"raw_extrinsic_metadata 533\0"
b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
- b"discovery_date 2021-01-25T11:27:51+00:00\n"
+ b"discovery_date 1611574071\n"
b"authority forge https://forge.softwareheritage.org/\n"
b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
b"format json\n"
@@ -851,7 +851,7 @@
)
self.assertEqual(
identifiers.raw_extrinsic_metadata_identifier(self.maximal),
- "55563d91a3f9cb41aa36c60c2b518433bf318ae4",
+ "f96966e1093d15236a31fde07e47d5b1c9428049",
)
def test_nonascii_path(self):
@@ -860,9 +860,9 @@
"path": b"/ab\nc/d\xf0\x9f\xa4\xb7e\x00f",
}
manifest = (
- b"raw_extrinsic_metadata 246\0"
+ b"raw_extrinsic_metadata 231\0"
b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
- b"discovery_date 2021-01-25T11:27:51+00:00\n"
+ b"discovery_date 1611574071\n"
b"authority forge https://forge.softwareheritage.org/\n"
b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
b"format json\n"
@@ -878,7 +878,74 @@
)
self.assertEqual(
identifiers.raw_extrinsic_metadata_identifier(metadata),
- "d8e5856601cdae96dfdfb5147235895949c9322d",
+ "7cc83fd1912176510c083f5df43f01b09af4b333",
+ )
+
+ def test_timezone_insensitive(self):
+ """Checks the timezone of the datetime.datetime does not affect the
+ hashed manifest."""
+ utc_plus_one = datetime.timezone(datetime.timedelta(hours=1))
+ metadata = {
+ **self.minimal,
+ "discovery_date": datetime.datetime(
+ 2021, 1, 25, 12, 27, 51, tzinfo=utc_plus_one,
+ ),
+ }
+
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(self.minimal),
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ )
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+ )
+
+ def test_microsecond_insensitive(self):
+ """Checks the microseconds of the datetime.datetime does not affect the
+ hashed manifest."""
+ metadata = {
+ **self.minimal,
+ "discovery_date": datetime.datetime(
+ 2021, 1, 25, 11, 27, 51, 123456, tzinfo=datetime.timezone.utc,
+ ),
+ }
+
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(self.minimal),
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ )
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+ )
+
+ def test_negative_timestamp(self):
+ metadata = {
+ **self.minimal,
+ "discovery_date": datetime.datetime(
+ 1960, 1, 25, 11, 27, 51, tzinfo=datetime.timezone.utc,
+ ),
+ }
+
+ manifest = (
+ b"raw_extrinsic_metadata 210\0"
+ b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
+ b"discovery_date -313504329\n"
+ b"authority forge https://forge.softwareheritage.org/\n"
+ b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
+ b"format json\n"
+ b"\n"
+ b'{"foo": "bar"}'
+ )
+
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ hashlib.sha1(manifest).hexdigest(),
+ )
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ "895d0821a2991dd376ddc303424aceb7c68280f9",
)
diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py
--- a/swh/model/tests/test_model.py
+++ b/swh/model/tests/test_model.py
@@ -855,7 +855,7 @@
m = RawExtrinsicMetadata(target=_origin_swhid, **_common_metadata_fields,)
assert m.to_dict() == {
"target": str(_origin_swhid),
- "id": b"\xeck\x9cQ\xf1\x1f\xeb\xde\x85{\x7f\xf0\x83\x9c\x8a\xd5\xfb\x8e2\xef",
+ "id": b"@j\xc9\x01\xbc\x1e#p*\xf3q9\xa7u\x97\x00\x14\x02xa",
**common_fields,
}
assert RawExtrinsicMetadata.from_dict(m.to_dict()) == m
@@ -863,7 +863,7 @@
m = RawExtrinsicMetadata(target=_content_swhid, **_common_metadata_fields,)
assert m.to_dict() == {
"target": "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
- "id": b"\x8e:_;eb\xe6\xf1Y\xd9\xa5aG[\rt\x89\xa1\x0b\xe4",
+ "id": b"\xbc\xa3U\xddf\x19U\xc5\xd2\xd7\xdfK\xd7c\x1f\xa8\xfeh\x992",
**common_fields,
}
assert RawExtrinsicMetadata.from_dict(m.to_dict()) == m
@@ -882,7 +882,7 @@
)
assert m.to_dict() == {
"target": "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
- "id": b"\x07\xf1aS\xbe\xda\xd2\xf2\xd7\xaf:\xc7\xb7\x91C\x87W\x85R\x19",
+ "id": b"\x14l\xb0\x1f\xb9\xc0{)\xc7\x0f\xbd\xc0*,YZ\xf5C\xab\xfc",
**common_fields,
"origin": "https://example.org/",
"snapshot": f"swh:1:snp:{hash_hex}",
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 3:22 PM (6 d, 18 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219173
Attached To
D5004: identifiers: Change the manifest format of raw_extrinsic_metadata to use integer instead of ISO8601
Event Timeline
Log In to Comment