diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -688,7 +688,7 @@ ``` target_type: $ValueOfMetadataTargetType target: $UrlOrSwhid - discovery_date: $ISO8601 + discovery_date: $Timestamp authority: $StrWithoutSpaces $IRI fetcher: $Str $Version format: $StrWithoutSpaces @@ -712,6 +712,12 @@ $Swhid are core SWHIDs, as defined in :ref:`persistent-identifiers`. + $Timestamp is a decimal representation of the integer number of seconds since + the UNIX epoch (1970-01-01 00:00:00 UTC), with no leading '0' + (unless the timestamp value is zero) and no timezone. + It may be negative by prefixing it with a '-', which must not be followed + by a '0'. + Newlines in $Bytes, $Str, and $Iri are escaped as with other git fields, ie. by adding a space after them. @@ -719,10 +725,16 @@ str: the intrinsic identifier for `metadata` """ + discovery_date = metadata["discovery_date"] + if discovery_date.microsecond != 0: + raise ValueError(f"discovery_date={discovery_date} has microsecond != 0") + timestamp = discovery_date.timestamp() + assert timestamp.is_integer() + headers = [ (b"target_type", metadata["type"].encode("ascii")), (b"target", str(metadata["target"]).encode()), - (b"discovery_date", metadata["discovery_date"].isoformat().encode("ascii")), + (b"discovery_date", str(int(timestamp)).encode("ascii")), ( b"authority", f"{metadata['authority']['type']} {metadata['authority']['url']}".encode(), diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -803,10 +803,10 @@ def test_minimal(self): manifest = ( - b"raw_extrinsic_metadata 245\0" + b"raw_extrinsic_metadata 230\0" b"target_type content\n" b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n" - b"discovery_date 2021-01-25T11:27:51+00:00\n" + b"discovery_date 1611574071\n" b"authority forge https://forge.softwareheritage.org/\n" b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n" b"format json\n" @@ -820,15 +820,15 @@ ) self.assertEqual( identifiers.raw_extrinsic_metadata_identifier(self.minimal), - "da734f1531f830b7282ee01c5e0c0dfe7ecc99e9", + "e35827936a4ae7c351a92eda0eeb36da07da315f", ) def test_maximal(self): manifest = ( - b"raw_extrinsic_metadata 568\0" + b"raw_extrinsic_metadata 553\0" b"target_type content\n" b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n" - b"discovery_date 2021-01-25T11:27:51+00:00\n" + b"discovery_date 1611574071\n" b"authority forge https://forge.softwareheritage.org/\n" b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n" b"format json\n" @@ -849,7 +849,7 @@ ) self.assertEqual( identifiers.raw_extrinsic_metadata_identifier(self.maximal), - "0d9e3bb9a72850e32bfb575f612cfad1a7e6b66a", + "7523fa6cef72ced2935242a45def7c5a36d0f609", ) def test_nonascii_path(self): @@ -858,10 +858,10 @@ "path": b"/ab\nc/d\xf0\x9f\xa4\xb7e\x00f", } manifest = ( - b"raw_extrinsic_metadata 266\0" + b"raw_extrinsic_metadata 251\0" b"target_type content\n" b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n" - b"discovery_date 2021-01-25T11:27:51+00:00\n" + b"discovery_date 1611574071\n" b"authority forge https://forge.softwareheritage.org/\n" b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n" b"format json\n" @@ -877,7 +877,27 @@ ) self.assertEqual( identifiers.raw_extrinsic_metadata_identifier(metadata), - "63f4cb28396e00926ab7ebfd96e96b60227fc11a", + "caacc32905ebf4659a633a2dc856ccd6412991dc", + ) + + def test_timezone_insensitive(self): + """Checks the timezone of the datetime.datetime does not affect the + hashed manifest.""" + utc_plus_one = datetime.timezone(datetime.timedelta(hours=1)) + metadata = { + **self.minimal, + "discovery_date": datetime.datetime( + 2021, 1, 25, 12, 27, 51, tzinfo=utc_plus_one, + ), + } + + self.assertEqual( + identifiers.raw_extrinsic_metadata_identifier(self.minimal), + identifiers.raw_extrinsic_metadata_identifier(metadata), + ) + self.assertEqual( + identifiers.raw_extrinsic_metadata_identifier(metadata), + "e35827936a4ae7c351a92eda0eeb36da07da315f", )