diff --git a/mypy.ini b/mypy.ini index 1688682..5d30edb 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,42 +1,45 @@ [mypy] namespace_packages = True warn_unused_ignores = True # 3rd party libraries without stubs (yet) [mypy-aiohttp_utils.*] ignore_missing_imports = True [mypy-arrow.*] ignore_missing_imports = True [mypy-celery.*] ignore_missing_imports = True [mypy-decorator.*] ignore_missing_imports = True [mypy-deprecated.*] ignore_missing_imports = True [mypy-django.*] # false positive, only used my hypotesis' extras ignore_missing_imports = True +[mypy-iso8601.*] +ignore_missing_imports = True + [mypy-msgpack.*] ignore_missing_imports = True [mypy-pkg_resources.*] ignore_missing_imports = True [mypy-psycopg2.*] ignore_missing_imports = True [mypy-pytest.*] ignore_missing_imports = True [mypy-requests_mock.*] ignore_missing_imports = True [mypy-systemd.*] ignore_missing_imports = True diff --git a/requirements-http.txt b/requirements-http.txt index c66192b..f307d47 100644 --- a/requirements-http.txt +++ b/requirements-http.txt @@ -1,10 +1,10 @@ # requirements for swh.core.api aiohttp aiohttp_utils >= 3.1.1 arrow decorator Flask +iso8601 msgpack > 0.5 -python-dateutil requests blinker # dependency of sentry-sdk[flask] diff --git a/swh/core/api/serializers.py b/swh/core/api/serializers.py index c16108e..57f37ae 100644 --- a/swh/core/api/serializers.py +++ b/swh/core/api/serializers.py @@ -1,214 +1,214 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 import datetime import json import types from uuid import UUID import arrow -import dateutil.parser +import iso8601 import msgpack from typing import Any, Dict, Union, Tuple from requests import Response ENCODERS = [ (arrow.Arrow, 'arrow', arrow.Arrow.isoformat), (datetime.datetime, 'datetime', datetime.datetime.isoformat), (datetime.timedelta, 'timedelta', lambda o: { 'days': o.days, 'seconds': o.seconds, 'microseconds': o.microseconds, }), (UUID, 'uuid', str), # Only for JSON: (bytes, 'bytes', lambda o: base64.b85encode(o).decode('ascii')), ] DECODERS = { 'arrow': arrow.get, - 'datetime': dateutil.parser.parse, + 'datetime': lambda d: iso8601.parse_date(d, default_timezone=None), 'timedelta': lambda d: datetime.timedelta(**d), 'uuid': UUID, # Only for JSON: 'bytes': base64.b85decode, } def encode_data_client(data: Any, extra_encoders=None) -> bytes: try: return msgpack_dumps(data, extra_encoders=extra_encoders) except OverflowError as e: raise ValueError('Limits were reached. Please, check your input.\n' + str(e)) def decode_response(response: Response, extra_decoders=None) -> Any: content_type = response.headers['content-type'] if content_type.startswith('application/x-msgpack'): r = msgpack_loads(response.content, extra_decoders=extra_decoders) elif content_type.startswith('application/json'): r = json_loads(response.text, extra_decoders=extra_decoders) elif content_type.startswith('text/'): r = response.text else: raise ValueError('Wrong content type `%s` for API response' % content_type) return r class SWHJSONEncoder(json.JSONEncoder): """JSON encoder for data structures generated by Software Heritage. This JSON encoder extends the default Python JSON encoder and adds awareness for the following specific types: - bytes (get encoded as a Base85 string); - datetime.datetime (get encoded as an ISO8601 string). Non-standard types get encoded as a a dictionary with two keys: - swhtype with value 'bytes' or 'datetime'; - d containing the encoded value. SWHJSONEncoder also encodes arbitrary iterables as a list (allowing serialization of generators). Caveats: Limitations in the JSONEncoder extension mechanism prevent us from "escaping" dictionaries that only contain the swhtype and d keys, and therefore arbitrary data structures can't be round-tripped through SWHJSONEncoder and SWHJSONDecoder. """ def __init__(self, extra_encoders=None, **kwargs): super().__init__(**kwargs) self.encoders = ENCODERS if extra_encoders: self.encoders += extra_encoders def default(self, o: Any ) -> Union[Dict[str, Union[Dict[str, int], str]], list]: for (type_, type_name, encoder) in self.encoders: if isinstance(o, type_): return { 'swhtype': type_name, 'd': encoder(o), } try: return super().default(o) except TypeError as e: try: iterable = iter(o) except TypeError: raise e from None else: return list(iterable) class SWHJSONDecoder(json.JSONDecoder): """JSON decoder for data structures encoded with SWHJSONEncoder. This JSON decoder extends the default Python JSON decoder, allowing the decoding of: - bytes (encoded as a Base85 string); - datetime.datetime (encoded as an ISO8601 string). Non-standard types must be encoded as a a dictionary with exactly two keys: - swhtype with value 'bytes' or 'datetime'; - d containing the encoded value. To limit the impact our encoding, if the swhtype key doesn't contain a known value, the dictionary is decoded as-is. """ def __init__(self, extra_decoders=None, **kwargs): super().__init__(**kwargs) self.decoders = DECODERS if extra_decoders: self.decoders = {**self.decoders, **extra_decoders} def decode_data(self, o: Any) -> Any: if isinstance(o, dict): if set(o.keys()) == {'d', 'swhtype'}: if o['swhtype'] == 'bytes': return base64.b85decode(o['d']) decoder = self.decoders.get(o['swhtype']) if decoder: return decoder(self.decode_data(o['d'])) return {key: self.decode_data(value) for key, value in o.items()} if isinstance(o, list): return [self.decode_data(value) for value in o] else: return o def raw_decode(self, s: str, idx: int = 0) -> Tuple[Any, int]: data, index = super().raw_decode(s, idx) return self.decode_data(data), index def json_dumps(data: Any, extra_encoders=None) -> str: return json.dumps(data, cls=SWHJSONEncoder, extra_encoders=extra_encoders) def json_loads(data: str, extra_decoders=None) -> Any: return json.loads(data, cls=SWHJSONDecoder, extra_decoders=extra_decoders) def msgpack_dumps(data: Any, extra_encoders=None) -> bytes: """Write data as a msgpack stream""" encoders = ENCODERS if extra_encoders: encoders += extra_encoders def encode_types(obj): if isinstance(obj, types.GeneratorType): return list(obj) for (type_, type_name, encoder) in encoders: if isinstance(obj, type_): return { b'swhtype': type_name, b'd': encoder(obj), } return obj return msgpack.packb(data, use_bin_type=True, default=encode_types) def msgpack_loads(data: bytes, extra_decoders=None) -> Any: """Read data as a msgpack stream""" decoders = DECODERS if extra_decoders: decoders = {**decoders, **extra_decoders} def decode_types(obj): if set(obj.keys()) == {b'd', b'swhtype'}: decoder = decoders.get(obj[b'swhtype']) if decoder: return decoder(obj[b'd']) return obj try: return msgpack.unpackb(data, raw=False, object_hook=decode_types) except TypeError: # msgpack < 0.5.2 return msgpack.unpackb(data, encoding='utf-8', object_hook=decode_types)