Page MenuHomeSoftware Heritage

D5004.id18554.diff
No OneTemporary

D5004.id18554.diff

diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -759,6 +759,12 @@
$ExtendedSwhid is a core SWHID, with extra types allowed ('ori' for
origins and 'emd' for raw extrinsic metadata)
+ $Timestamp is a decimal representation of the integer number of seconds since
+ the UNIX epoch (1970-01-01 00:00:00 UTC), with no leading '0'
+ (unless the timestamp value is zero) and no timezone.
+ It may be negative by prefixing it with a '-', which must not be followed
+ by a '0'.
+
Newlines in $Bytes, $Str, and $Iri are escaped as with other git fields,
ie. by adding a space after them.
@@ -766,9 +772,11 @@
str: the intrinsic identifier for `metadata`
"""
+ timestamp = metadata["discovery_date"].timestamp()
+
headers = [
(b"target", str(metadata["target"]).encode()),
- (b"discovery_date", metadata["discovery_date"].isoformat().encode("ascii")),
+ (b"discovery_date", str(int(timestamp)).encode("ascii")),
(
b"authority",
f"{metadata['authority']['type']} {metadata['authority']['url']}".encode(),
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -807,9 +807,9 @@
def test_minimal(self):
manifest = (
- b"raw_extrinsic_metadata 225\0"
+ b"raw_extrinsic_metadata 210\0"
b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
- b"discovery_date 2021-01-25T11:27:51+00:00\n"
+ b"discovery_date 1611574071\n"
b"authority forge https://forge.softwareheritage.org/\n"
b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
b"format json\n"
@@ -823,14 +823,14 @@
)
self.assertEqual(
identifiers.raw_extrinsic_metadata_identifier(self.minimal),
- "df16b5ea35b12f530fb7ecd0eb10b87a8b1fc3d2",
+ "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
)
def test_maximal(self):
manifest = (
- b"raw_extrinsic_metadata 548\0"
+ b"raw_extrinsic_metadata 533\0"
b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
- b"discovery_date 2021-01-25T11:27:51+00:00\n"
+ b"discovery_date 1611574071\n"
b"authority forge https://forge.softwareheritage.org/\n"
b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
b"format json\n"
@@ -851,7 +851,7 @@
)
self.assertEqual(
identifiers.raw_extrinsic_metadata_identifier(self.maximal),
- "55563d91a3f9cb41aa36c60c2b518433bf318ae4",
+ "f96966e1093d15236a31fde07e47d5b1c9428049",
)
def test_nonascii_path(self):
@@ -860,9 +860,9 @@
"path": b"/ab\nc/d\xf0\x9f\xa4\xb7e\x00f",
}
manifest = (
- b"raw_extrinsic_metadata 246\0"
+ b"raw_extrinsic_metadata 231\0"
b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
- b"discovery_date 2021-01-25T11:27:51+00:00\n"
+ b"discovery_date 1611574071\n"
b"authority forge https://forge.softwareheritage.org/\n"
b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
b"format json\n"
@@ -878,7 +878,74 @@
)
self.assertEqual(
identifiers.raw_extrinsic_metadata_identifier(metadata),
- "d8e5856601cdae96dfdfb5147235895949c9322d",
+ "7cc83fd1912176510c083f5df43f01b09af4b333",
+ )
+
+ def test_timezone_insensitive(self):
+ """Checks the timezone of the datetime.datetime does not affect the
+ hashed manifest."""
+ utc_plus_one = datetime.timezone(datetime.timedelta(hours=1))
+ metadata = {
+ **self.minimal,
+ "discovery_date": datetime.datetime(
+ 2021, 1, 25, 12, 27, 51, tzinfo=utc_plus_one,
+ ),
+ }
+
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(self.minimal),
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ )
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+ )
+
+ def test_microsecond_insensitive(self):
+ """Checks the microseconds of the datetime.datetime does not affect the
+ hashed manifest."""
+ metadata = {
+ **self.minimal,
+ "discovery_date": datetime.datetime(
+ 2021, 1, 25, 11, 27, 51, 123456, tzinfo=datetime.timezone.utc,
+ ),
+ }
+
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(self.minimal),
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ )
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+ )
+
+ def test_negative_timestamp(self):
+ metadata = {
+ **self.minimal,
+ "discovery_date": datetime.datetime(
+ 1960, 1, 25, 11, 27, 51, tzinfo=datetime.timezone.utc,
+ ),
+ }
+
+ manifest = (
+ b"raw_extrinsic_metadata 210\0"
+ b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
+ b"discovery_date -313504329\n"
+ b"authority forge https://forge.softwareheritage.org/\n"
+ b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
+ b"format json\n"
+ b"\n"
+ b'{"foo": "bar"}'
+ )
+
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ hashlib.sha1(manifest).hexdigest(),
+ )
+ self.assertEqual(
+ identifiers.raw_extrinsic_metadata_identifier(metadata),
+ "895d0821a2991dd376ddc303424aceb7c68280f9",
)
diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py
--- a/swh/model/tests/test_model.py
+++ b/swh/model/tests/test_model.py
@@ -855,7 +855,7 @@
m = RawExtrinsicMetadata(target=_origin_swhid, **_common_metadata_fields,)
assert m.to_dict() == {
"target": str(_origin_swhid),
- "id": b"\xeck\x9cQ\xf1\x1f\xeb\xde\x85{\x7f\xf0\x83\x9c\x8a\xd5\xfb\x8e2\xef",
+ "id": b"@j\xc9\x01\xbc\x1e#p*\xf3q9\xa7u\x97\x00\x14\x02xa",
**common_fields,
}
assert RawExtrinsicMetadata.from_dict(m.to_dict()) == m
@@ -863,7 +863,7 @@
m = RawExtrinsicMetadata(target=_content_swhid, **_common_metadata_fields,)
assert m.to_dict() == {
"target": "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
- "id": b"\x8e:_;eb\xe6\xf1Y\xd9\xa5aG[\rt\x89\xa1\x0b\xe4",
+ "id": b"\xbc\xa3U\xddf\x19U\xc5\xd2\xd7\xdfK\xd7c\x1f\xa8\xfeh\x992",
**common_fields,
}
assert RawExtrinsicMetadata.from_dict(m.to_dict()) == m
@@ -882,7 +882,7 @@
)
assert m.to_dict() == {
"target": "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
- "id": b"\x07\xf1aS\xbe\xda\xd2\xf2\xd7\xaf:\xc7\xb7\x91C\x87W\x85R\x19",
+ "id": b"\x14l\xb0\x1f\xb9\xc0{)\xc7\x0f\xbd\xc0*,YZ\xf5C\xab\xfc",
**common_fields,
"origin": "https://example.org/",
"snapshot": f"swh:1:snp:{hash_hex}",

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 3:22 PM (1 w, 9 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219173

Event Timeline