diff --git a/PKG-INFO b/PKG-INFO index acfbd79..26a61e6 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,88 +1,88 @@ Metadata-Version: 2.1 Name: swh.core -Version: 0.0.73 +Version: 0.0.74 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN -Project-URL: Funding, https://www.softwareheritage.org/donate -Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest +Project-URL: Source, https://forge.softwareheritage.org/source/swh-core +Project-URL: Funding, https://www.softwareheritage.org/donate Description: swh-core ======== core library for swh's modules: - config parser - hash computations - serialization - logging mechanism - database connection - http-based RPC client/server Development ----------- We strongly recommend you to use a [virtualenv][1] if you want to run tests or hack the code. To set up your development environment: ``` (swh) user@host:~/swh-environment/swh-core$ pip install -e .[testing] ``` This will install every Python package needed to run this package's tests. Unit tests can be executed using [pytest][2] or [tox][3]. ``` (swh) user@host:~/swh-environment/swh-core$ pytest ============================== test session starts ============================== platform linux -- Python 3.7.3, pytest-3.10.1, py-1.8.0, pluggy-0.12.0 hypothesis profile 'default' -> database=DirectoryBasedExampleDatabase('/home/ddouard/src/swh-environment/swh-core/.hypothesis/examples') rootdir: /home/ddouard/src/swh-environment/swh-core, inifile: pytest.ini plugins: requests-mock-1.6.0, hypothesis-4.26.4, celery-4.3.0, postgresql-1.4.1 collected 89 items swh/core/api/tests/test_api.py .. [ 2%] swh/core/api/tests/test_async.py .... [ 6%] swh/core/api/tests/test_serializers.py ..... [ 12%] swh/core/db/tests/test_db.py .... [ 16%] swh/core/tests/test_cli.py ...... [ 23%] swh/core/tests/test_config.py .............. [ 39%] swh/core/tests/test_statsd.py ........................................... [ 87%] .... [ 92%] swh/core/tests/test_utils.py ....... [100%] ===================== 89 passed, 9 warnings in 6.94 seconds ===================== ``` Note: this git repository uses [pre-commit][4] hooks to ensure better and more consistent code. It should already be installed in your virtualenv (if not, just type `pip install pre-commit`). Make sure to activate it in your local copy of the git repository: ``` (swh) user@host:~/swh-environment/swh-core$ pre-commit install pre-commit installed at .git/hooks/pre-commit ``` Please read the [developer setup manual][5] for more information on how to hack on Software Heritage. [1]: https://virtualenv.pypa.io [2]: https://docs.pytest.org [3]: https://tox.readthedocs.io [4]: https://pre-commit.com [5]: https://docs.softwareheritage.org/devel/developer-setup.html Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown +Provides-Extra: db Provides-Extra: http Provides-Extra: testing -Provides-Extra: db diff --git a/swh.core.egg-info/PKG-INFO b/swh.core.egg-info/PKG-INFO index acfbd79..26a61e6 100644 --- a/swh.core.egg-info/PKG-INFO +++ b/swh.core.egg-info/PKG-INFO @@ -1,88 +1,88 @@ Metadata-Version: 2.1 Name: swh.core -Version: 0.0.73 +Version: 0.0.74 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN -Project-URL: Funding, https://www.softwareheritage.org/donate -Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest +Project-URL: Source, https://forge.softwareheritage.org/source/swh-core +Project-URL: Funding, https://www.softwareheritage.org/donate Description: swh-core ======== core library for swh's modules: - config parser - hash computations - serialization - logging mechanism - database connection - http-based RPC client/server Development ----------- We strongly recommend you to use a [virtualenv][1] if you want to run tests or hack the code. To set up your development environment: ``` (swh) user@host:~/swh-environment/swh-core$ pip install -e .[testing] ``` This will install every Python package needed to run this package's tests. Unit tests can be executed using [pytest][2] or [tox][3]. ``` (swh) user@host:~/swh-environment/swh-core$ pytest ============================== test session starts ============================== platform linux -- Python 3.7.3, pytest-3.10.1, py-1.8.0, pluggy-0.12.0 hypothesis profile 'default' -> database=DirectoryBasedExampleDatabase('/home/ddouard/src/swh-environment/swh-core/.hypothesis/examples') rootdir: /home/ddouard/src/swh-environment/swh-core, inifile: pytest.ini plugins: requests-mock-1.6.0, hypothesis-4.26.4, celery-4.3.0, postgresql-1.4.1 collected 89 items swh/core/api/tests/test_api.py .. [ 2%] swh/core/api/tests/test_async.py .... [ 6%] swh/core/api/tests/test_serializers.py ..... [ 12%] swh/core/db/tests/test_db.py .... [ 16%] swh/core/tests/test_cli.py ...... [ 23%] swh/core/tests/test_config.py .............. [ 39%] swh/core/tests/test_statsd.py ........................................... [ 87%] .... [ 92%] swh/core/tests/test_utils.py ....... [100%] ===================== 89 passed, 9 warnings in 6.94 seconds ===================== ``` Note: this git repository uses [pre-commit][4] hooks to ensure better and more consistent code. It should already be installed in your virtualenv (if not, just type `pip install pre-commit`). Make sure to activate it in your local copy of the git repository: ``` (swh) user@host:~/swh-environment/swh-core$ pre-commit install pre-commit installed at .git/hooks/pre-commit ``` Please read the [developer setup manual][5] for more information on how to hack on Software Heritage. [1]: https://virtualenv.pypa.io [2]: https://docs.pytest.org [3]: https://tox.readthedocs.io [4]: https://pre-commit.com [5]: https://docs.softwareheritage.org/devel/developer-setup.html Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown +Provides-Extra: db Provides-Extra: http Provides-Extra: testing -Provides-Extra: db diff --git a/swh.core.egg-info/SOURCES.txt b/swh.core.egg-info/SOURCES.txt index 9f2e95d..37ae81f 100644 --- a/swh.core.egg-info/SOURCES.txt +++ b/swh.core.egg-info/SOURCES.txt @@ -1,61 +1,63 @@ MANIFEST.in Makefile README.md requirements-db.txt requirements-http.txt requirements-swh.txt requirements.txt setup.py version.txt swh/__init__.py swh.core.egg-info/PKG-INFO swh.core.egg-info/SOURCES.txt swh.core.egg-info/dependency_links.txt swh.core.egg-info/entry_points.txt swh.core.egg-info/requires.txt swh.core.egg-info/top_level.txt swh/core/__init__.py swh/core/api_async.py swh/core/config.py swh/core/logger.py swh/core/py.typed swh/core/pytest_plugin.py swh/core/statsd.py swh/core/tarball.py swh/core/utils.py swh/core/api/__init__.py swh/core/api/asynchronous.py swh/core/api/negotiation.py swh/core/api/serializers.py swh/core/api/tests/__init__.py swh/core/api/tests/server_testing.py -swh/core/api/tests/test_api.py swh/core/api/tests/test_async.py +swh/core/api/tests/test_rpc_client.py +swh/core/api/tests/test_rpc_client_server.py +swh/core/api/tests/test_rpc_server.py swh/core/api/tests/test_serializers.py swh/core/cli/__init__.py swh/core/cli/db.py swh/core/db/__init__.py swh/core/db/common.py swh/core/db/db_utils.py swh/core/db/tests/__init__.py swh/core/db/tests/conftest.py swh/core/db/tests/db_testing.py swh/core/db/tests/test_cli.py swh/core/db/tests/test_db.py swh/core/sql/log-schema.sql swh/core/tests/__init__.py swh/core/tests/test_cli.py swh/core/tests/test_config.py swh/core/tests/test_logger.py swh/core/tests/test_pytest_plugin.py swh/core/tests/test_statsd.py swh/core/tests/test_tarball.py swh/core/tests/test_utils.py swh/core/tests/data/example.com/file.json swh/core/tests/data/example.com/file.json,name=doe,firstname=jane swh/core/tests/data/example.com/file.json_visit1 swh/core/tests/data/example.com/other.json swh/core/tests/fixture/__init__.py swh/core/tests/fixture/conftest.py swh/core/tests/fixture/test_pytest_plugin.py swh/core/tests/fixture/data/example.com/file.json \ No newline at end of file diff --git a/swh/core/api/__init__.py b/swh/core/api/__init__.py index 363c79f..f52293b 100644 --- a/swh/core/api/__init__.py +++ b/swh/core/api/__init__.py @@ -1,349 +1,338 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import collections +from collections import abc import functools import inspect import json import logging import pickle import requests import datetime from typing import ClassVar, Optional, Type -from deprecated import deprecated from flask import Flask, Request, Response, request, abort from .serializers import (decode_response, encode_data_client as encode_data, msgpack_dumps, msgpack_loads, SWHJSONDecoder) from .negotiation import (Formatter as FormatterBase, Negotiator as NegotiatorBase, negotiate as _negotiate) logger = logging.getLogger(__name__) # support for content negotiation class Negotiator(NegotiatorBase): def best_mimetype(self): return request.accept_mimetypes.best_match( self.accept_mimetypes, 'application/json') def _abort(self, status_code, err=None): return abort(status_code, err) def negotiate(formatter_cls, *args, **kwargs): return _negotiate(Negotiator, formatter_cls, *args, **kwargs) class Formatter(FormatterBase): def _make_response(self, body, content_type): return Response(body, content_type=content_type) class SWHJSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, (datetime.datetime, datetime.date)): return obj.isoformat() if isinstance(obj, datetime.timedelta): return str(obj) # Let the base class default method raise the TypeError return super().default(obj) class JSONFormatter(Formatter): format = 'json' mimetypes = ['application/json'] def render(self, obj): return json.dumps(obj, cls=SWHJSONEncoder) class MsgpackFormatter(Formatter): format = 'msgpack' mimetypes = ['application/x-msgpack'] def render(self, obj): return msgpack_dumps(obj) # base API classes class RemoteException(Exception): pass def remote_api_endpoint(path): def dec(f): f._endpoint_path = path return f return dec class APIError(Exception): """API Error""" def __str__(self): return ('An unexpected error occurred in the backend: {}' .format(self.args)) class MetaRPCClient(type): """Metaclass for RPCClient, which adds a method for each endpoint of the database it is designed to access. See for example :class:`swh.indexer.storage.api.client.RemoteStorage`""" def __new__(cls, name, bases, attributes): # For each method wrapped with @remote_api_endpoint in an API backend # (eg. :class:`swh.indexer.storage.IndexerStorage`), add a new # method in RemoteStorage, with the same documentation. # # Note that, despite the usage of decorator magic (eg. functools.wrap), # this never actually calls an IndexerStorage method. backend_class = attributes.get('backend_class', None) for base in bases: if backend_class is not None: break backend_class = getattr(base, 'backend_class', None) if backend_class: for (meth_name, meth) in backend_class.__dict__.items(): if hasattr(meth, '_endpoint_path'): cls.__add_endpoint(meth_name, meth, attributes) return super().__new__(cls, name, bases, attributes) @staticmethod def __add_endpoint(meth_name, meth, attributes): wrapped_meth = inspect.unwrap(meth) @functools.wraps(meth) # Copy signature and doc def meth_(*args, **kwargs): # Match arguments and parameters post_data = inspect.getcallargs( wrapped_meth, *args, **kwargs) # Remove arguments that should not be passed self = post_data.pop('self') post_data.pop('cur', None) post_data.pop('db', None) # Send the request. return self.post(meth._endpoint_path, post_data) attributes[meth_name] = meth_ class RPCClient(metaclass=MetaRPCClient): """Proxy to an internal SWH RPC """ backend_class = None # type: ClassVar[Optional[type]] """For each method of `backend_class` decorated with :func:`remote_api_endpoint`, a method with the same prototype and docstring will be added to this class. Calls to this new method will be translated into HTTP requests to a remote server. This backend class will never be instantiated, it only serves as a template.""" api_exception = APIError # type: ClassVar[Type[Exception]] """The exception class to raise in case of communication error with the server.""" def __init__(self, url, api_exception=None, timeout=None, chunk_size=4096, **kwargs): if api_exception: self.api_exception = api_exception base_url = url if url.endswith('/') else url + '/' self.url = base_url self.session = requests.Session() adapter = requests.adapters.HTTPAdapter( max_retries=kwargs.get('max_retries', 3), pool_connections=kwargs.get('pool_connections', 20), pool_maxsize=kwargs.get('pool_maxsize', 100)) self.session.mount(self.url, adapter) self.timeout = timeout self.chunk_size = chunk_size def _url(self, endpoint): return '%s%s' % (self.url, endpoint) def raw_verb(self, verb, endpoint, **opts): if 'chunk_size' in opts: # if the chunk_size argument has been passed, consider the user # also wants stream=True, otherwise, what's the point. opts['stream'] = True if self.timeout and 'timeout' not in opts: opts['timeout'] = self.timeout try: return getattr(self.session, verb)( self._url(endpoint), **opts ) except requests.exceptions.ConnectionError as e: raise self.api_exception(e) def post(self, endpoint, data, **opts): - if isinstance(data, (collections.Iterator, collections.Generator)): + if isinstance(data, (abc.Iterator, abc.Generator)): data = (encode_data(x) for x in data) else: data = encode_data(data) chunk_size = opts.pop('chunk_size', self.chunk_size) response = self.raw_verb( 'post', endpoint, data=data, headers={'content-type': 'application/x-msgpack', 'accept': 'application/x-msgpack'}, **opts) if opts.get('stream') or \ response.headers.get('transfer-encoding') == 'chunked': return response.iter_content(chunk_size) else: return self._decode_response(response) post_stream = post def get(self, endpoint, **opts): chunk_size = opts.pop('chunk_size', self.chunk_size) response = self.raw_verb( 'get', endpoint, headers={'accept': 'application/x-msgpack'}, **opts) if opts.get('stream') or \ response.headers.get('transfer-encoding') == 'chunked': return response.iter_content(chunk_size) else: return self._decode_response(response) def get_stream(self, endpoint, **opts): return self.get(endpoint, stream=True, **opts) def _decode_response(self, response): if response.status_code == 404: return None if response.status_code == 500: data = decode_response(response) if 'exception_pickled' in data: raise pickle.loads(data['exception_pickled']) else: raise RemoteException(data['exception']) # XXX: this breaks language-independence and should be # replaced by proper unserialization if response.status_code == 400: raise pickle.loads(decode_response(response)) elif response.status_code != 200: raise RemoteException( "Unexpected status code for API request: %s (%s)" % ( response.status_code, response.content, ) ) return decode_response(response) def __repr__(self): return '<{} url={}>'.format(self.__class__.__name__, self.url) class BytesRequest(Request): """Request with proper escaping of arbitrary byte sequences.""" encoding = 'utf-8' encoding_errors = 'surrogateescape' ENCODERS = { 'application/x-msgpack': msgpack_dumps, 'application/json': json.dumps, } def encode_data_server(data, content_type='application/x-msgpack'): encoded_data = ENCODERS[content_type](data) return Response( encoded_data, mimetype=content_type, ) def decode_request(request): content_type = request.mimetype data = request.get_data() if not data: return {} if content_type == 'application/x-msgpack': r = msgpack_loads(data) elif content_type == 'application/json': - r = json.loads(data, cls=SWHJSONDecoder) + # XXX this .decode() is needed for py35. + # Should not be needed any more with py37 + r = json.loads(data.decode('utf-8'), cls=SWHJSONDecoder) else: raise ValueError('Wrong content type `%s` for API request' % content_type) return r def error_handler(exception, encoder): # XXX: this breaks language-independence and should be # replaced by proper serialization of errors logging.exception(exception) response = encoder(pickle.dumps(exception)) response.status_code = 400 return response class RPCServerApp(Flask): """For each endpoint of the given `backend_class`, tells app.route to call a function that decodes the request and sends it to the backend object provided by the factory. - :param Any backend_class: The class of the backend, which will be - analyzed to look for API endpoints. - :param Callable[[], backend_class] backend_factory: A function with no - argument that returns - an instance of - `backend_class`.""" + :param Any backend_class: + The class of the backend, which will be analyzed to look + for API endpoints. + :param Optional[Callable[[], backend_class]] backend_factory: + A function with no argument that returns an instance of + `backend_class`. If unset, defaults to calling `backend_class` + constructor directly. + """ request_class = BytesRequest def __init__(self, *args, backend_class=None, backend_factory=None, **kwargs): super().__init__(*args, **kwargs) + self.backend_class = backend_class if backend_class is not None: if backend_factory is None: - raise TypeError('Missing argument backend_factory') + backend_factory = backend_class for (meth_name, meth) in backend_class.__dict__.items(): if hasattr(meth, '_endpoint_path'): self.__add_endpoint(meth_name, meth, backend_factory) def __add_endpoint(self, meth_name, meth, backend_factory): from flask import request @self.route('/'+meth._endpoint_path, methods=['POST']) + @negotiate(MsgpackFormatter) + @negotiate(JSONFormatter) @functools.wraps(meth) # Copy signature and doc def _f(): # Call the actual code obj_meth = getattr(backend_factory(), meth_name) - return encode_data_server(obj_meth(**decode_request(request))) - - -@deprecated(version='0.0.64', - reason='Use the RPCServerApp instead') -class SWHServerAPIApp(RPCServerApp): - pass - - -@deprecated(version='0.0.64', - reason='Use the MetaRPCClient instead') -class MetaSWHRemoteAPI(MetaRPCClient): - pass - - -@deprecated(version='0.0.64', - reason='Use the RPCClient instead') -class SWHRemoteAPI(RPCClient): - pass + kw = decode_request(request) + return obj_meth(**kw) diff --git a/swh/core/api/tests/test_api.py b/swh/core/api/tests/test_api.py deleted file mode 100644 index 32180f8..0000000 --- a/swh/core/api/tests/test_api.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (C) 2018 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import unittest - -import requests_mock -from werkzeug.wrappers import BaseResponse -from werkzeug.test import Client as WerkzeugTestClient - -from swh.core.api import ( - error_handler, encode_data_server, - remote_api_endpoint, RPCClient, RPCServerApp) - - -class ApiTest(unittest.TestCase): - def test_server(self): - testcase = self - nb_endpoint_calls = 0 - - class TestStorage: - @remote_api_endpoint('test_endpoint_url') - def test_endpoint(self, test_data, db=None, cur=None): - nonlocal nb_endpoint_calls - nb_endpoint_calls += 1 - - testcase.assertEqual(test_data, 'spam') - return 'egg' - - app = RPCServerApp('testapp', - backend_class=TestStorage, - backend_factory=lambda: TestStorage()) - - @app.errorhandler(Exception) - def my_error_handler(exception): - return error_handler(exception, encode_data_server) - - client = WerkzeugTestClient(app, BaseResponse) - res = client.post('/test_endpoint_url', - headers={'Content-Type': 'application/x-msgpack'}, - data=b'\x81\xa9test_data\xa4spam') - - self.assertEqual(nb_endpoint_calls, 1) - self.assertEqual(b''.join(res.response), b'\xa3egg') - - def test_client(self): - class TestStorage: - @remote_api_endpoint('test_endpoint_url') - def test_endpoint(self, test_data, db=None, cur=None): - pass - - nb_http_calls = 0 - - def callback(request, context): - nonlocal nb_http_calls - nb_http_calls += 1 - self.assertEqual(request.headers['Content-Type'], - 'application/x-msgpack') - self.assertEqual(request.body, b'\x81\xa9test_data\xa4spam') - context.headers['Content-Type'] = 'application/x-msgpack' - context.content = b'\xa3egg' - return b'\xa3egg' - - adapter = requests_mock.Adapter() - adapter.register_uri('POST', - 'mock://example.com/test_endpoint_url', - content=callback) - - class Testclient(RPCClient): - backend_class = TestStorage - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - # we need to mount the mock adapter on the base url to override - # RPCClient's mechanism that also mounts an HTTPAdapter - # (for configuration purpose) - self.session.mount('mock://example.com/', adapter) - - c = Testclient(url='mock://example.com/') - res = c.test_endpoint('spam') - - self.assertEqual(nb_http_calls, 1) - self.assertEqual(res, 'egg') diff --git a/swh/core/api/tests/test_async.py b/swh/core/api/tests/test_async.py index 2de1ced..5086b59 100644 --- a/swh/core/api/tests/test_async.py +++ b/swh/core/api/tests/test_async.py @@ -1,186 +1,186 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import msgpack import json import pytest from swh.core.api.asynchronous import RPCServerApp, Response from swh.core.api.asynchronous import encode_msgpack, decode_request from swh.core.api.serializers import msgpack_dumps, SWHJSONEncoder pytest_plugins = ['aiohttp.pytest_plugin', 'pytester'] async def root(request): return Response('toor') STRUCT = {'txt': 'something stupid', # 'date': datetime.date(2019, 6, 9), # not supported 'datetime': datetime.datetime(2019, 6, 9, 10, 12), 'timedelta': datetime.timedelta(days=-2, hours=3), 'int': 42, 'float': 3.14, 'subdata': {'int': 42, 'datetime': datetime.datetime(2019, 6, 10, 11, 12), }, 'list': [42, datetime.datetime(2019, 9, 10, 11, 12), 'ok'], } async def struct(request): return Response(STRUCT) async def echo(request): data = await decode_request(request) return Response(data) async def echo_no_nego(request): # let the content negotiation handle the serialization for us... data = await decode_request(request) ret = encode_msgpack(data) return ret def check_mimetype(src, dst): src = src.split(';')[0].strip() dst = dst.split(';')[0].strip() assert src == dst @pytest.fixture -def app(): +def async_app(): app = RPCServerApp() app.router.add_route('GET', '/', root) app.router.add_route('GET', '/struct', struct) app.router.add_route('POST', '/echo', echo) app.router.add_route('POST', '/echo-no-nego', echo_no_nego) return app -async def test_get_simple(app, aiohttp_client) -> None: - assert app is not None +async def test_get_simple(async_app, aiohttp_client) -> None: + assert async_app is not None - cli = await aiohttp_client(app) + cli = await aiohttp_client(async_app) resp = await cli.get('/') assert resp.status == 200 check_mimetype(resp.headers['Content-Type'], 'application/x-msgpack') data = await resp.read() value = msgpack.unpackb(data, raw=False) assert value == 'toor' -async def test_get_simple_nego(app, aiohttp_client) -> None: - cli = await aiohttp_client(app) +async def test_get_simple_nego(async_app, aiohttp_client) -> None: + cli = await aiohttp_client(async_app) for ctype in ('x-msgpack', 'json'): resp = await cli.get('/', headers={'Accept': 'application/%s' % ctype}) assert resp.status == 200 check_mimetype(resp.headers['Content-Type'], 'application/%s' % ctype) assert (await decode_request(resp)) == 'toor' -async def test_get_struct(app, aiohttp_client) -> None: +async def test_get_struct(async_app, aiohttp_client) -> None: """Test returned structured from a simple GET data is OK""" - cli = await aiohttp_client(app) + cli = await aiohttp_client(async_app) resp = await cli.get('/struct') assert resp.status == 200 check_mimetype(resp.headers['Content-Type'], 'application/x-msgpack') assert (await decode_request(resp)) == STRUCT -async def test_get_struct_nego(app, aiohttp_client) -> None: +async def test_get_struct_nego(async_app, aiohttp_client) -> None: """Test returned structured from a simple GET data is OK""" - cli = await aiohttp_client(app) + cli = await aiohttp_client(async_app) for ctype in ('x-msgpack', 'json'): resp = await cli.get('/struct', headers={'Accept': 'application/%s' % ctype}) assert resp.status == 200 check_mimetype(resp.headers['Content-Type'], 'application/%s' % ctype) assert (await decode_request(resp)) == STRUCT -async def test_post_struct_msgpack(app, aiohttp_client) -> None: +async def test_post_struct_msgpack(async_app, aiohttp_client) -> None: """Test that msgpack encoded posted struct data is returned as is""" - cli = await aiohttp_client(app) + cli = await aiohttp_client(async_app) # simple struct resp = await cli.post( '/echo', headers={'Content-Type': 'application/x-msgpack'}, data=msgpack_dumps({'toto': 42})) assert resp.status == 200 check_mimetype(resp.headers['Content-Type'], 'application/x-msgpack') assert (await decode_request(resp)) == {'toto': 42} # complex struct resp = await cli.post( '/echo', headers={'Content-Type': 'application/x-msgpack'}, data=msgpack_dumps(STRUCT)) assert resp.status == 200 check_mimetype(resp.headers['Content-Type'], 'application/x-msgpack') assert (await decode_request(resp)) == STRUCT -async def test_post_struct_json(app, aiohttp_client) -> None: +async def test_post_struct_json(async_app, aiohttp_client) -> None: """Test that json encoded posted struct data is returned as is""" - cli = await aiohttp_client(app) + cli = await aiohttp_client(async_app) resp = await cli.post( '/echo', headers={'Content-Type': 'application/json'}, data=json.dumps({'toto': 42}, cls=SWHJSONEncoder)) assert resp.status == 200 check_mimetype(resp.headers['Content-Type'], 'application/x-msgpack') assert (await decode_request(resp)) == {'toto': 42} resp = await cli.post( '/echo', headers={'Content-Type': 'application/json'}, data=json.dumps(STRUCT, cls=SWHJSONEncoder)) assert resp.status == 200 check_mimetype(resp.headers['Content-Type'], 'application/x-msgpack') # assert resp.headers['Content-Type'] == 'application/x-msgpack' assert (await decode_request(resp)) == STRUCT -async def test_post_struct_nego(app, aiohttp_client) -> None: +async def test_post_struct_nego(async_app, aiohttp_client) -> None: """Test that json encoded posted struct data is returned as is using content negotiation (accept json or msgpack). """ - cli = await aiohttp_client(app) + cli = await aiohttp_client(async_app) for ctype in ('x-msgpack', 'json'): resp = await cli.post( '/echo', headers={'Content-Type': 'application/json', 'Accept': 'application/%s' % ctype}, data=json.dumps(STRUCT, cls=SWHJSONEncoder)) assert resp.status == 200 check_mimetype(resp.headers['Content-Type'], 'application/%s' % ctype) assert (await decode_request(resp)) == STRUCT -async def test_post_struct_no_nego(app, aiohttp_client) -> None: +async def test_post_struct_no_nego(async_app, aiohttp_client) -> None: """Test that json encoded posted struct data is returned as msgpack when using non-negotiation-compatible handlers. """ - cli = await aiohttp_client(app) + cli = await aiohttp_client(async_app) for ctype in ('x-msgpack', 'json'): resp = await cli.post( '/echo-no-nego', headers={'Content-Type': 'application/json', 'Accept': 'application/%s' % ctype}, data=json.dumps(STRUCT, cls=SWHJSONEncoder)) assert resp.status == 200 check_mimetype(resp.headers['Content-Type'], 'application/x-msgpack') assert (await decode_request(resp)) == STRUCT diff --git a/swh/core/api/tests/test_rpc_client.py b/swh/core/api/tests/test_rpc_client.py new file mode 100644 index 0000000..307a5e7 --- /dev/null +++ b/swh/core/api/tests/test_rpc_client.py @@ -0,0 +1,56 @@ +# Copyright (C) 2018-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import re +import pytest + +from swh.core.api import remote_api_endpoint, RPCClient + + +@pytest.fixture +def rpc_client(requests_mock): + class TestStorage: + @remote_api_endpoint('test_endpoint_url') + def test_endpoint(self, test_data, db=None, cur=None): + return 'egg' + + @remote_api_endpoint('path/to/endpoint') + def something(self, data, db=None, cur=None): + return 'spam' + + class Testclient(RPCClient): + backend_class = TestStorage + + def callback(request, context): + assert request.headers['Content-Type'] == 'application/x-msgpack' + context.headers['Content-Type'] = 'application/x-msgpack' + if request.path == '/test_endpoint_url': + context.content = b'\xa3egg' + elif request.path == '/path/to/endpoint': + context.content = b'\xa4spam' + else: + assert False + return context.content + + requests_mock.post(re.compile('mock://example.com/'), + content=callback) + + return Testclient(url='mock://example.com') + + +def test_client(rpc_client): + + assert hasattr(rpc_client, 'test_endpoint') + assert hasattr(rpc_client, 'something') + + res = rpc_client.test_endpoint('spam') + assert res == 'egg' + res = rpc_client.test_endpoint(test_data='spam') + assert res == 'egg' + + res = rpc_client.something('whatever') + assert res == 'spam' + res = rpc_client.something(data='whatever') + assert res == 'spam' diff --git a/swh/core/api/tests/test_rpc_client_server.py b/swh/core/api/tests/test_rpc_client_server.py new file mode 100644 index 0000000..f1a15ea --- /dev/null +++ b/swh/core/api/tests/test_rpc_client_server.py @@ -0,0 +1,89 @@ +# Copyright (C) 2018-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + +from swh.core.api import remote_api_endpoint, RPCServerApp, RPCClient +from swh.core.api import error_handler, encode_data_server + + +# this class is used on the server part +class RPCTest: + @remote_api_endpoint('endpoint_url') + def endpoint(self, test_data, db=None, cur=None): + assert test_data == 'spam' + return 'egg' + + @remote_api_endpoint('path/to/endpoint') + def something(self, data, db=None, cur=None): + return data + + +# this class is used on the client part. We cannot inherit from RPCTest +# because the automagic metaclass based code that generates the RPCClient +# proxy class from this does not handle inheritance properly. +# We do add an endpoint on the client side that has no implementation +# server-side to test this very situation (in should generate a 404) +class RPCTest2: + @remote_api_endpoint('endpoint_url') + def endpoint(self, test_data, db=None, cur=None): + assert test_data == 'spam' + return 'egg' + + @remote_api_endpoint('path/to/endpoint') + def something(self, data, db=None, cur=None): + return data + + @remote_api_endpoint('not_on_server') + def not_on_server(self, db=None, cur=None): + return 'ok' + + +class RPCTestClient(RPCClient): + backend_class = RPCTest2 + + +@pytest.fixture +def app(): + # This fixture is used by the 'swh_rpc_adapter' fixture + # which is defined in swh/core/pytest_plugin.py + application = RPCServerApp('testapp', backend_class=RPCTest) + @application.errorhandler(Exception) + def my_error_handler(exception): + return error_handler(exception, encode_data_server) + return application + + +@pytest.fixture +def swh_rpc_client_class(): + # This fixture is used by the 'swh_rpc_client' fixture + # which is defined in swh/core/pytest_plugin.py + return RPCTestClient + + +def test_api_client_endpoint_missing(swh_rpc_client): + with pytest.raises(AttributeError): + swh_rpc_client.missing(data='whatever') + + +def test_api_server_endpoint_missing(swh_rpc_client): + # A 'missing' endpoint (server-side) should raise an exception + # due to a 404, since at the end, we do a GET/POST an inexistant URL + with pytest.raises(Exception, match='404 Not Found'): + swh_rpc_client.not_on_server() + + +def test_api_endpoint_kwargs(swh_rpc_client): + res = swh_rpc_client.something(data='whatever') + assert res == 'whatever' + res = swh_rpc_client.endpoint(test_data='spam') + assert res == 'egg' + + +def test_api_endpoint_args(swh_rpc_client): + res = swh_rpc_client.something('whatever') + assert res == 'whatever' + res = swh_rpc_client.endpoint('spam') + assert res == 'egg' diff --git a/swh/core/api/tests/test_rpc_server.py b/swh/core/api/tests/test_rpc_server.py new file mode 100644 index 0000000..9399f62 --- /dev/null +++ b/swh/core/api/tests/test_rpc_server.py @@ -0,0 +1,73 @@ +# Copyright (C) 2018-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest +import json +import msgpack + +from flask import url_for + +from swh.core.api import remote_api_endpoint, RPCServerApp + + +@pytest.fixture +def app(): + class TestStorage: + @remote_api_endpoint('test_endpoint_url') + def test_endpoint(self, test_data, db=None, cur=None): + assert test_data == 'spam' + return 'egg' + + @remote_api_endpoint('path/to/endpoint') + def something(self, data, db=None, cur=None): + return data + + return RPCServerApp('testapp', backend_class=TestStorage) + + +def test_api_endpoint(flask_app_client): + res = flask_app_client.post( + url_for('something'), + headers=[('Content-Type', 'application/json'), + ('Accept', 'application/json')], + data=json.dumps({'data': 'toto'}), + ) + assert res.status_code == 200 + assert res.mimetype == 'application/json' + + +def test_api_nego_default(flask_app_client): + res = flask_app_client.post( + url_for('something'), + headers=[('Content-Type', 'application/json')], + data=json.dumps({'data': 'toto'}), + ) + assert res.status_code == 200 + assert res.mimetype == 'application/json' + assert res.data == b'"toto"' + + +def test_api_nego_accept(flask_app_client): + res = flask_app_client.post( + url_for('something'), + headers=[('Accept', 'application/x-msgpack'), + ('Content-Type', 'application/x-msgpack')], + data=msgpack.dumps({'data': 'toto'}), + ) + assert res.status_code == 200 + assert res.mimetype == 'application/x-msgpack' + assert res.data == b'\xa4toto' + + +def test_rpc_server(flask_app_client): + res = flask_app_client.post( + url_for('test_endpoint'), + headers=[('Content-Type', 'application/x-msgpack'), + ('Accept', 'application/x-msgpack')], + data=b'\x81\xa9test_data\xa4spam') + + assert res.status_code == 200 + assert res.mimetype == 'application/x-msgpack' + assert res.data == b'\xa3egg' diff --git a/swh/core/pytest_plugin.py b/swh/core/pytest_plugin.py index 9521c59..00e44e8 100644 --- a/swh/core/pytest_plugin.py +++ b/swh/core/pytest_plugin.py @@ -1,178 +1,294 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import re import pytest +import requests from functools import partial from os import path from typing import Dict, List, Optional from urllib.parse import urlparse +from requests.adapters import BaseAdapter +from requests.structures import CaseInsensitiveDict +from requests.utils import get_encoding_from_headers + logger = logging.getLogger(__name__) # Check get_local_factory function # Maximum number of iteration checks to generate requests responses MAX_VISIT_FILES = 10 def get_response_cb(request, context, datadir, ignore_urls: List[str] = [], visits: Optional[Dict] = None): """Mount point callback to fetch on disk the request's content. This is meant to be used as 'body' argument of the requests_mock.get() method. It will look for files on the local filesystem based on the requested URL, using the following rules: - files are searched in the datadir/ directory - the local file name is the path part of the URL with path hierarchy markers (aka '/') replaced by '_' Eg. if you use the requests_mock fixture in your test file as: requests_mock.get('https://nowhere.com', body=get_response_cb) # or even requests_mock.get(re.compile('https://'), body=get_response_cb) then a call requests.get like: requests.get('https://nowhere.com/path/to/resource?a=b&c=d') will look the content of the response in: datadir/nowhere.com/path_to_resource,a=b,c=d Args: request (requests.Request): Object requests context (requests.Context): Object holding response metadata information (status_code, headers, etc...) ignore_urls: urls whose status response should be 404 even if the local file exists visits: Dict of url, number of visits. If None, disable multi visit support (default) Returns: Optional[FileDescriptor] on disk file to read from the test context """ logger.debug('get_response_cb(%s, %s)', request, context) logger.debug('url: %s', request.url) logger.debug('ignore_urls: %s', ignore_urls) if request.url in ignore_urls: context.status_code = 404 return None url = urlparse(request.url) dirname = url.hostname # pypi.org | files.pythonhosted.org # url.path: pypi//json -> local file: pypi__json filename = url.path[1:] if filename.endswith('/'): filename = filename[:-1] filename = filename.replace('/', '_') if url.query: filename += ',' + url.query.replace('&', ',') filepath = path.join(datadir, dirname, filename) if visits is not None: visit = visits.get(url, 0) visits[url] = visit + 1 if visit: filepath = filepath + '_visit%s' % visit if not path.isfile(filepath): logger.debug('not found filepath: %s', filepath) context.status_code = 404 return None fd = open(filepath, 'rb') context.headers['content-length'] = str(path.getsize(filepath)) return fd @pytest.fixture def datadir(request): """By default, returns the test directory's data directory. - This can be overriden on a per arborescence basis. Add an override + This can be overridden on a per arborescence basis. Add an override definition in the local conftest, for example: import pytest from os import path @pytest.fixture def datadir(): return path.join(path.abspath(path.dirname(__file__)), 'resources') """ return path.join(path.dirname(str(request.fspath)), 'data') def requests_mock_datadir_factory(ignore_urls: List[str] = [], has_multi_visit: bool = False): """This factory generates fixture which allow to look for files on the local filesystem based on the requested URL, using the following rules: - files are searched in the datadir/ directory - the local file name is the path part of the URL with path hierarchy markers (aka '/') replaced by '_' Multiple implementations are possible, for example: - requests_mock_datadir_factory([]): This computes the file name from the query and always returns the same result. - requests_mock_datadir_factory(has_multi_visit=True): This computes the file name from the query and returns the content of the filename the first time, the next call returning the content of files suffixed with _visit1 and so on and so forth. If the file is not found, returns a 404. - requests_mock_datadir_factory(ignore_urls=['url1', 'url2']): This will ignore any files corresponding to url1 and url2, always returning 404. Args: ignore_urls: List of urls to always returns 404 (whether file exists or not) has_multi_visit: Activate or not the multiple visits behavior """ @pytest.fixture def requests_mock_datadir(requests_mock, datadir): if not has_multi_visit: cb = partial(get_response_cb, ignore_urls=ignore_urls, datadir=datadir) requests_mock.get(re.compile('https://'), body=cb) else: visits = {} requests_mock.get(re.compile('https://'), body=partial( get_response_cb, ignore_urls=ignore_urls, visits=visits, datadir=datadir) ) return requests_mock return requests_mock_datadir # Default `requests_mock_datadir` implementation requests_mock_datadir = requests_mock_datadir_factory([]) # Implementation for multiple visits behavior: # - first time, it checks for a file named `filename` # - second time, it checks for a file named `filename`_visit1 # etc... requests_mock_datadir_visits = requests_mock_datadir_factory( has_multi_visit=True) + + +@pytest.fixture +def swh_rpc_client(swh_rpc_client_class, swh_rpc_adapter): + """This fixture generates an RPCClient instance that uses the class generated + by the rpc_client_class fixture as backend. + + Since it uses the swh_rpc_adapter, HTTP queries will be intercepted and + routed directly to the current Flask app (as provided by the `app` + fixture). + + So this stack of fixtures allows to test the RPCClient -> RPCServerApp + communication path using a real RPCClient instance and a real Flask + (RPCServerApp) app instance. + + To use this fixture: + + - ensure an `app` fixture exists and generate a Flask application, + - implement an `swh_rpc_client_class` fixtures that returns the + RPCClient-based class to use as client side for the tests, + - implement your tests using this `swh_rpc_client` fixture. + + See swh/core/api/tests/test_rpc_client_server.py for an example of usage. + """ + url = 'mock://example.com' + cli = swh_rpc_client_class(url=url) + # we need to clear the list of existing adapters here so we ensure we + # have one and only one adapter which is then used for all the requests. + cli.session.adapters.clear() + cli.session.mount('mock://', swh_rpc_adapter) + return cli + + +@pytest.yield_fixture +def swh_rpc_adapter(app): + """Fixture that generates a requests.Adapter instance that + can be used to test client/servers code based on swh.core.api classes. + + See swh/core/api/tests/test_rpc_client_server.py for an example of usage. + + """ + with app.test_client() as client: + yield RPCTestAdapter(client) + + +class RPCTestAdapter(BaseAdapter): + def __init__(self, client): + self._client = client + + def build_response(self, req, resp): + response = requests.Response() + + # Fallback to None if there's no status_code, for whatever reason. + response.status_code = resp.status_code + + # Make headers case-insensitive. + response.headers = CaseInsensitiveDict(getattr(resp, 'headers', {})) + + # Set encoding. + response.encoding = get_encoding_from_headers(response.headers) + response.raw = resp + response.reason = response.raw.status + + if isinstance(req.url, bytes): + response.url = req.url.decode('utf-8') + else: + response.url = req.url + + # Give the Response some context. + response.request = req + response.connection = self + response._content = resp.data + + return response + + def send(self, request, **kw): + resp = self._client.open( + request.url, method=request.method, + headers=request.headers.items(), + data=request.body, + ) + return self.build_response(request, resp) + + +@pytest.yield_fixture +def flask_app_client(app): + with app.test_client() as client: + yield client + + +# stolen from pytest-flask, required to have url_for() working within tests +# using flask_app_client fixture. +@pytest.fixture(autouse=True) +def _push_request_context(request): + """During tests execution request context has been pushed, e.g. `url_for`, + `session`, etc. can be used in tests as is:: + + def test_app(app, client): + assert client.get(url_for('myview')).status_code == 200 + + """ + if 'app' not in request.fixturenames: + return + app = request.getfixturevalue('app') + ctx = app.test_request_context() + ctx.push() + + def teardown(): + ctx.pop() + + request.addfinalizer(teardown) diff --git a/version.txt b/version.txt index c9fea85..defad07 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.73-0-gf884d1d \ No newline at end of file +v0.0.74-0-g2acfa74 \ No newline at end of file