diff --git a/swh/core/json.py b/swh/core/json.py index 5b181bf..140656a 100644 --- a/swh/core/json.py +++ b/swh/core/json.py @@ -1,53 +1,90 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 import datetime from json import JSONDecoder, JSONEncoder import dateutil.parser class SWHJSONEncoder(JSONEncoder): + """JSON encoder for data structures generated by Software Heritage. + + This JSON encoder extends the default Python JSON encoder and adds + awareness for the following specific types: + - bytes (get encoded as a Base85 string); + - datetime.datetime (get encoded as an ISO8601 string). + + Non-standard types get encoded as a a dictionary with two keys: + - swhtype with value 'bytes' or 'datetime'; + - d containing the encoded value. + + SWHJSONEncoder also encodes arbitrary iterables as a list + (allowing serialization of generators). + + Caveats: Limitations in the JSONEncoder extension mechanism + prevent us from "escaping" dictionaries that only contain the + swhtype and d keys, and therefore arbitrary data structures can't + be round-tripped through SWHJSONEncoder and SWHJSONDecoder. + + """ + def default(self, o): if isinstance(o, bytes): return { 'swhtype': 'bytes', 'd': base64.b85encode(o).decode('ascii'), } elif isinstance(o, datetime.datetime): return { 'swhtype': 'datetime', 'd': o.isoformat(), } try: return super().default(o) except TypeError as e: try: iterable = iter(o) except TypeError: raise e from None else: return list(iterable) class SWHJSONDecoder(JSONDecoder): + """JSON decoder for data structures encoded with SWHJSONEncoder. + + This JSON decoder extends the default Python JSON decoder, + allowing the decoding of: + - bytes (encoded as a Base85 string); + - datetime.datetime (encoded as an ISO8601 string). + + Non-standard types must be encoded as a a dictionary with exactly + two keys: + - swhtype with value 'bytes' or 'datetime'; + - d containing the encoded value. + + To limit the impact our encoding, if the swhtype key doesn't + contain a known value, the dictionary is decoded as-is. + + """ def decode_data(self, o): if isinstance(o, dict): if set(o.keys()) == {'d', 'swhtype'}: datatype = o['swhtype'] if datatype == 'bytes': return base64.b85decode(o['d']) elif datatype == 'datetime': return dateutil.parser.parse(o['d']) return {key: self.decode_data(value) for key, value in o.items()} if isinstance(o, list): return [self.decode_data(value) for value in o] else: return o def raw_decode(self, s, idx=0): data, index = super().raw_decode(s, idx) return self.decode_data(data), index