diff --git a/PKG-INFO b/PKG-INFO index 6ebb89b..1a859ee 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,28 +1,28 @@ Metadata-Version: 2.1 Name: swh.core -Version: 0.0.50 +Version: 0.0.51 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN +Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Funding, https://www.softwareheritage.org/donate -Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Description: swh-core ======== core library for swh's modules: - config parser - hash computations - serialization - logging mechanism Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: testing diff --git a/requirements.txt b/requirements.txt index 594d549..0b91831 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,11 @@ arrow aiohttp msgpack-python psycopg2 python-dateutil vcversioner PyYAML requests Flask systemd-python +decorator diff --git a/swh.core.egg-info/PKG-INFO b/swh.core.egg-info/PKG-INFO index 6ebb89b..1a859ee 100644 --- a/swh.core.egg-info/PKG-INFO +++ b/swh.core.egg-info/PKG-INFO @@ -1,28 +1,28 @@ Metadata-Version: 2.1 Name: swh.core -Version: 0.0.50 +Version: 0.0.51 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN +Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Funding, https://www.softwareheritage.org/donate -Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Description: swh-core ======== core library for swh's modules: - config parser - hash computations - serialization - logging mechanism Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: testing diff --git a/swh.core.egg-info/SOURCES.txt b/swh.core.egg-info/SOURCES.txt index 420548c..284d9da 100644 --- a/swh.core.egg-info/SOURCES.txt +++ b/swh.core.egg-info/SOURCES.txt @@ -1,34 +1,39 @@ MANIFEST.in Makefile README.md requirements-swh.txt requirements.txt setup.py version.txt swh/__init__.py swh.core.egg-info/PKG-INFO swh.core.egg-info/SOURCES.txt swh.core.egg-info/dependency_links.txt swh.core.egg-info/entry_points.txt swh.core.egg-info/requires.txt swh.core.egg-info/top_level.txt swh/core/__init__.py -swh/core/api.py swh/core/api_async.py swh/core/cli.py swh/core/config.py swh/core/logger.py -swh/core/serializers.py swh/core/statsd.py swh/core/tarball.py swh/core/utils.py +swh/core/api/__init__.py +swh/core/api/asynchronous.py +swh/core/api/negotiate.py +swh/core/api/serializers.py +swh/core/db/__init__.py +swh/core/db/common.py +swh/core/db/db_utils.py swh/core/sql/log-schema.sql swh/core/tests/__init__.py swh/core/tests/db_testing.py swh/core/tests/server_testing.py swh/core/tests/test_api.py swh/core/tests/test_config.py swh/core/tests/test_logger.py swh/core/tests/test_serializers.py swh/core/tests/test_statsd.py swh/core/tests/test_utils.py \ No newline at end of file diff --git a/swh.core.egg-info/requires.txt b/swh.core.egg-info/requires.txt index 25ae0f0..ca675a9 100644 --- a/swh.core.egg-info/requires.txt +++ b/swh.core.egg-info/requires.txt @@ -1,14 +1,16 @@ arrow aiohttp msgpack-python psycopg2 python-dateutil vcversioner PyYAML requests Flask systemd-python +decorator [testing] -pytest +pytest<4 +pytest-postgresql requests-mock diff --git a/swh/core/api.py b/swh/core/api/__init__.py similarity index 78% rename from swh/core/api.py rename to swh/core/api/__init__.py index a377781..a62316a 100644 --- a/swh/core/api.py +++ b/swh/core/api/__init__.py @@ -1,237 +1,309 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import collections import functools import inspect import json import logging import pickle import requests +import datetime -from flask import Flask, Request, Response +from flask import Flask, Request, Response, request, abort from .serializers import (decode_response, encode_data_client as encode_data, msgpack_dumps, msgpack_loads, SWHJSONDecoder) +from .negotiate import (Formatter as FormatterBase, + Negotiator as NegotiatorBase, + negotiate as _negotiate) + + +logger = logging.getLogger(__name__) + + +# support for content negotation + +class Negotiator(NegotiatorBase): + def best_mimetype(self): + return request.accept_mimetypes.best_match( + self.accept_mimetypes, 'text/html') + + def _abort(self, status_code, err=None): + return abort(status_code, err) + + +def negotiate(formatter_cls, *args, **kwargs): + return _negotiate(Negotiator, formatter_cls, *args, **kwargs) + + +class Formatter(FormatterBase): + def _make_response(self, body, content_type): + return Response(body, content_type=content_type) + + +class SWHJSONEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, (datetime.datetime, datetime.date)): + return obj.isoformat() + if isinstance(obj, datetime.timedelta): + return str(obj) + # Let the base class default method raise the TypeError + return super().default(obj) + + +class JSONFormatter(Formatter): + format = 'json' + mimetypes = ['application/json'] + + def render(self, obj): + return json.dumps(obj, cls=SWHJSONEncoder) + + +class MsgpackFormatter(Formatter): + format = 'msgpack' + mimetypes = ['application/x-msgpack'] + + def render(self, obj): + return msgpack_dumps(obj) + + +# base API classes class RemoteException(Exception): pass def remote_api_endpoint(path): def dec(f): f._endpoint_path = path return f return dec class MetaSWHRemoteAPI(type): """Metaclass for SWHRemoteAPI, which adds a method for each endpoint of the database it is designed to access. See for example :class:`swh.indexer.storage.api.client.RemoteStorage`""" def __new__(cls, name, bases, attributes): # For each method wrapped with @remote_api_endpoint in an API backend # (eg. :class:`swh.indexer.storage.IndexerStorage`), add a new # method in RemoteStorage, with the same documentation. # # Note that, despite the usage of decorator magic (eg. functools.wrap), # this never actually calls an IndexerStorage method. backend_class = attributes.get('backend_class', None) for base in bases: if backend_class is not None: break backend_class = getattr(base, 'backend_class', None) if backend_class: for (meth_name, meth) in backend_class.__dict__.items(): if hasattr(meth, '_endpoint_path'): cls.__add_endpoint(meth_name, meth, attributes) return super().__new__(cls, name, bases, attributes) @staticmethod def __add_endpoint(meth_name, meth, attributes): wrapped_meth = inspect.unwrap(meth) @functools.wraps(meth) # Copy signature and doc def meth_(*args, **kwargs): # Match arguments and parameters post_data = inspect.getcallargs( wrapped_meth, *args, **kwargs) # Remove arguments that should not be passed self = post_data.pop('self') post_data.pop('cur', None) post_data.pop('db', None) # Send the request. return self.post(meth._endpoint_path, post_data) attributes[meth_name] = meth_ class SWHRemoteAPI(metaclass=MetaSWHRemoteAPI): """Proxy to an internal SWH API """ backend_class = None """For each method of `backend_class` decorated with :func:`remote_api_endpoint`, a method with the same prototype and docstring will be added to this class. Calls to this new method will be translated into HTTP requests to a remote server. This backend class will never be instantiated, it only serves as a template.""" def __init__(self, api_exception, url, timeout=None): super().__init__() self.api_exception = api_exception base_url = url if url.endswith('/') else url + '/' self.url = base_url self.session = requests.Session() self.timeout = timeout def _url(self, endpoint): return '%s%s' % (self.url, endpoint) def raw_post(self, endpoint, data, **opts): if self.timeout and 'timeout' not in opts: opts['timeout'] = self.timeout try: return self.session.post( self._url(endpoint), data=data, **opts ) except requests.exceptions.ConnectionError as e: raise self.api_exception(e) def raw_get(self, endpoint, params=None, **opts): if self.timeout and 'timeout' not in opts: opts['timeout'] = self.timeout try: return self.session.get( self._url(endpoint), params=params, **opts ) except requests.exceptions.ConnectionError as e: raise self.api_exception(e) def post(self, endpoint, data, params=None): data = encode_data(data) response = self.raw_post( endpoint, data, params=params, - headers={'content-type': 'application/x-msgpack'}) + headers={'content-type': 'application/x-msgpack', + 'accept': 'application/x-msgpack'}) return self._decode_response(response) def get(self, endpoint, params=None): - response = self.raw_get(endpoint, params=params) + response = self.raw_get( + endpoint, params=params, + headers={'accept': 'application/x-msgpack'}) return self._decode_response(response) def post_stream(self, endpoint, data, params=None): if not isinstance(data, collections.Iterable): raise ValueError("`data` must be Iterable") - response = self.raw_post(endpoint, data, params=params) + response = self.raw_post( + endpoint, data, params=params, + headers={'accept': 'application/x-msgpack'}) + return self._decode_response(response) def get_stream(self, endpoint, params=None, chunk_size=4096): - response = self.raw_get(endpoint, params=params, stream=True) + response = self.raw_get(endpoint, params=params, stream=True, + headers={'accept': 'application/x-msgpack'}) return response.iter_content(chunk_size) def _decode_response(self, response): if response.status_code == 404: return None if response.status_code == 500: data = decode_response(response) if 'exception_pickled' in data: raise pickle.loads(data['exception_pickled']) else: raise RemoteException(data['exception']) # XXX: this breaks language-independence and should be # replaced by proper unserialization if response.status_code == 400: raise pickle.loads(decode_response(response)) elif response.status_code != 200: raise RemoteException( "Unexpected status code for API request: %s (%s)" % ( response.status_code, response.content, ) ) return decode_response(response) class BytesRequest(Request): """Request with proper escaping of arbitrary byte sequences.""" encoding = 'utf-8' encoding_errors = 'surrogateescape' -def encode_data_server(data): +ENCODERS = { + 'application/x-msgpack': msgpack_dumps, + 'application/json': json.dumps, +} + + +def encode_data_server(data, content_type='application/x-msgpack'): + encoded_data = ENCODERS[content_type](data) return Response( - msgpack_dumps(data), - mimetype='application/x-msgpack', - ) + encoded_data, + mimetype=content_type, + ) def decode_request(request): content_type = request.mimetype data = request.get_data() + if not data: + return {} if content_type == 'application/x-msgpack': r = msgpack_loads(data) elif content_type == 'application/json': r = json.loads(data, cls=SWHJSONDecoder) else: raise ValueError('Wrong content type `%s` for API request' % content_type) return r def error_handler(exception, encoder): # XXX: this breaks language-independence and should be # replaced by proper serialization of errors logging.exception(exception) response = encoder(pickle.dumps(exception)) response.status_code = 400 return response class SWHServerAPIApp(Flask): """For each endpoint of the given `backend_class`, tells app.route to call a function that decodes the request and sends it to the backend object provided by the factory. :param Any backend_class: The class of the backend, which will be analyzed to look for API endpoints. :param Callable[[], backend_class] backend_factory: A function with no argument that returns an instance of `backend_class`.""" request_class = BytesRequest def __init__(self, *args, backend_class=None, backend_factory=None, **kwargs): super().__init__(*args, **kwargs) if backend_class is not None: if backend_factory is None: raise TypeError('Missing argument backend_factory') for (meth_name, meth) in backend_class.__dict__.items(): if hasattr(meth, '_endpoint_path'): self.__add_endpoint(meth_name, meth, backend_factory) def __add_endpoint(self, meth_name, meth, backend_factory): from flask import request @self.route('/'+meth._endpoint_path, methods=['POST']) @functools.wraps(meth) # Copy signature and doc def _f(): # Call the actual code obj_meth = getattr(backend_factory(), meth_name) return encode_data_server(obj_meth(**decode_request(request))) diff --git a/swh/core/api_async.py b/swh/core/api/asynchronous.py similarity index 97% copy from swh/core/api_async.py copy to swh/core/api/asynchronous.py index eed981e..7c717c8 100644 --- a/swh/core/api_async.py +++ b/swh/core/api/asynchronous.py @@ -1,56 +1,57 @@ import aiohttp.web import asyncio import json import logging import multidict import pickle import sys import traceback from .serializers import msgpack_dumps, msgpack_loads, SWHJSONDecoder def encode_data_server(data, **kwargs): return aiohttp.web.Response( body=msgpack_dumps(data), headers=multidict.MultiDict({'Content-Type': 'application/x-msgpack'}), **kwargs ) @asyncio.coroutine def decode_request(request): content_type = request.headers.get('Content-Type') data = yield from request.read() - + if not data: + return {} if content_type == 'application/x-msgpack': r = msgpack_loads(data) elif content_type == 'application/json': r = json.loads(data, cls=SWHJSONDecoder) else: raise ValueError('Wrong content type `%s` for API request' % content_type) return r @asyncio.coroutine def error_middleware(app, handler): @asyncio.coroutine def middleware_handler(request): try: return (yield from handler(request)) except Exception as e: if isinstance(e, aiohttp.web.HTTPException): raise logging.exception(e) exception = traceback.format_exception(*sys.exc_info()) res = {'exception': exception, 'exception_pickled': pickle.dumps(e)} return encode_data_server(res, status=500) return middleware_handler class SWHRemoteAPI(aiohttp.web.Application): def __init__(self, *args, middlewares=(), **kwargs): middlewares = (error_middleware,) + middlewares super().__init__(*args, middlewares=middlewares, **kwargs) diff --git a/swh/core/api/negotiate.py b/swh/core/api/negotiate.py new file mode 100644 index 0000000..3fce1b6 --- /dev/null +++ b/swh/core/api/negotiate.py @@ -0,0 +1,152 @@ +# This code is a partial and adapted copy of +# https://github.com/nickstenning/negotiate +# +# Copyright 2012-2013 Nick Stenning +# 2019 The Software Heritage developers +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + +from collections import defaultdict +from decorator import decorator + +from inspect import getcallargs + + +class FormatterNotFound(Exception): + pass + + +class Formatter: + format = None + mimetypes = [] + + def __init__(self, request_mimetype=None): + if request_mimetype is None or request_mimetype not in self.mimetypes: + try: + self.response_mimetype = self.mimetypes[0] + except IndexError: + raise NotImplementedError( + "%s.mimetypes should be a non-empty list" % + self.__class__.__name__) + else: + self.response_mimetype = request_mimetype + + def configure(self): + pass + + def render(self, obj): + raise NotImplementedError( + "render() should be implemented by Formatter subclasses") + + def __call__(self, obj): + return self._make_response( + self.render(obj), content_type=self.response_mimetype) + + def _make_response(self, body, content_type): + raise NotImplementedError( + "_make_response() should be implemented by " + "framework-specific subclasses of Formatter" + ) + + +class Negotiator: + + def __init__(self, func): + self.func = func + self._formatters = [] + self._formatters_by_format = defaultdict(list) + self._formatters_by_mimetype = defaultdict(list) + + def __call__(self, *args, **kwargs): + result = self.func(*args, **kwargs) + format = getcallargs(self.func, *args, **kwargs).get('format') + mimetype = self.best_mimetype() + + try: + formatter = self.get_formatter(format, mimetype) + except FormatterNotFound as e: + return self._abort(404, str(e)) + + return formatter(result) + + def register_formatter(self, formatter, *args, **kwargs): + self._formatters.append(formatter) + self._formatters_by_format[formatter.format].append( + (formatter, args, kwargs)) + for mimetype in formatter.mimetypes: + self._formatters_by_mimetype[mimetype].append( + (formatter, args, kwargs)) + + def get_formatter(self, format=None, mimetype=None): + if format is None and mimetype is None: + raise TypeError( + "get_formatter expects one of the 'format' or 'mimetype' " + "kwargs to be set") + + if format is not None: + try: + # the first added will be the most specific + formatter_cls, args, kwargs = ( + self._formatters_by_format[format][0]) + except IndexError: + raise FormatterNotFound( + "Formatter for format '%s' not found!" % format) + elif mimetype is not None: + try: + # the first added will be the most specific + formatter_cls, args, kwargs = ( + self._formatters_by_mimetype[mimetype][0]) + except IndexError: + raise FormatterNotFound( + "Formatter for mimetype '%s' not found!" % mimetype) + + formatter = formatter_cls(request_mimetype=mimetype) + formatter.configure(*args, **kwargs) + return formatter + + @property + def accept_mimetypes(self): + return [m for f in self._formatters for m in f.mimetypes] + + def best_mimetype(self): + raise NotImplementedError( + "best_mimetype() should be implemented in " + "framework-specific subclasses of Negotiator" + ) + + def _abort(self, status_code, err=None): + raise NotImplementedError( + "_abort() should be implemented in framework-specific " + "subclasses of Negotiator" + ) + + +def negotiate(negotiator_cls, formatter_cls, *args, **kwargs): + def _negotiate(f, *args, **kwargs): + return f.negotiator(*args, **kwargs) + + def decorate(f): + if not hasattr(f, 'negotiator'): + f.negotiator = negotiator_cls(f) + + f.negotiator.register_formatter(formatter_cls, *args, **kwargs) + return decorator(_negotiate, f) + + return decorate diff --git a/swh/core/serializers.py b/swh/core/api/serializers.py similarity index 100% rename from swh/core/serializers.py rename to swh/core/api/serializers.py diff --git a/swh/core/api_async.py b/swh/core/api_async.py index eed981e..9502582 100644 --- a/swh/core/api_async.py +++ b/swh/core/api_async.py @@ -1,56 +1 @@ -import aiohttp.web -import asyncio -import json -import logging -import multidict -import pickle -import sys -import traceback - -from .serializers import msgpack_dumps, msgpack_loads, SWHJSONDecoder - - -def encode_data_server(data, **kwargs): - return aiohttp.web.Response( - body=msgpack_dumps(data), - headers=multidict.MultiDict({'Content-Type': 'application/x-msgpack'}), - **kwargs - ) - - -@asyncio.coroutine -def decode_request(request): - content_type = request.headers.get('Content-Type') - data = yield from request.read() - - if content_type == 'application/x-msgpack': - r = msgpack_loads(data) - elif content_type == 'application/json': - r = json.loads(data, cls=SWHJSONDecoder) - else: - raise ValueError('Wrong content type `%s` for API request' - % content_type) - return r - - -@asyncio.coroutine -def error_middleware(app, handler): - @asyncio.coroutine - def middleware_handler(request): - try: - return (yield from handler(request)) - except Exception as e: - if isinstance(e, aiohttp.web.HTTPException): - raise - logging.exception(e) - exception = traceback.format_exception(*sys.exc_info()) - res = {'exception': exception, - 'exception_pickled': pickle.dumps(e)} - return encode_data_server(res, status=500) - return middleware_handler - - -class SWHRemoteAPI(aiohttp.web.Application): - def __init__(self, *args, middlewares=(), **kwargs): - middlewares = (error_middleware,) + middlewares - super().__init__(*args, middlewares=middlewares, **kwargs) +from swh.core.api.asynchronous import * # noqa, for bw compat diff --git a/swh/core/config.py b/swh/core/config.py index 940adb4..ac0ac4d 100644 --- a/swh/core/config.py +++ b/swh/core/config.py @@ -1,281 +1,357 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import configparser import logging import os import yaml +from itertools import chain +from copy import deepcopy + + +logger = logging.getLogger(__name__) SWH_CONFIG_DIRECTORIES = [ '~/.config/swh', '~/.swh', '/etc/softwareheritage', ] SWH_GLOBAL_CONFIG = 'global.ini' SWH_DEFAULT_GLOBAL_CONFIG = { 'content_size_limit': ('int', 100 * 1024 * 1024), 'log_db': ('str', 'dbname=softwareheritage-log'), } SWH_CONFIG_EXTENSIONS = [ '.yml', '.ini', ] # conversion per type _map_convert_fn = { 'int': int, 'bool': lambda x: x.lower() == 'true', 'list[str]': lambda x: [value.strip() for value in x.split(',')], 'list[int]': lambda x: [int(value.strip()) for value in x.split(',')], } _map_check_fn = { 'int': lambda x: isinstance(x, int), 'bool': lambda x: isinstance(x, bool), 'list[str]': lambda x: (isinstance(x, list) and all(isinstance(y, str) for y in x)), 'list[int]': lambda x: (isinstance(x, list) and all(isinstance(y, int) for y in x)), } def exists_accessible(file): """Check whether a file exists, and is accessible. Returns: True if the file exists and is accessible False if the file does not exist Raises: PermissionError if the file cannot be read. """ try: os.stat(file) except PermissionError: raise except FileNotFoundError: return False else: if os.access(file, os.R_OK): return True else: raise PermissionError("Permission denied: %r" % file) def config_basepath(config_path): """Return the base path of a configuration file""" if config_path.endswith(('.ini', '.yml')): return config_path[:-4] return config_path def read_raw_config(base_config_path): """Read the raw config corresponding to base_config_path. Can read yml or ini files. """ - yml_file = base_config_path + '.yml' if exists_accessible(yml_file): - logging.debug('Using config file %s', yml_file) + logger.info('Loading config file %s', yml_file) with open(yml_file) as f: return yaml.safe_load(f) ini_file = base_config_path + '.ini' if exists_accessible(ini_file): config = configparser.ConfigParser() config.read(ini_file) if 'main' in config._sections: - logging.debug('Using config file %s', ini_file) + logger.info('Loading config file %s', ini_file) return config._sections['main'] else: - logging.debug('Ignoring config file %s (no [main])', ini_file) + logger.warning('Ignoring config file %s (no [main] section)', + ini_file) return {} def config_exists(config_path): """Check whether the given config exists""" basepath = config_basepath(config_path) return any(exists_accessible(basepath + extension) for extension in SWH_CONFIG_EXTENSIONS) def read(conf_file=None, default_conf=None): """Read the user's configuration file. Fill in the gap using `default_conf`. `default_conf` is similar to this:: DEFAULT_CONF = { 'a': ('str', '/tmp/swh-loader-git/log'), 'b': ('str', 'dbname=swhloadergit') 'c': ('bool', true) 'e': ('bool', None) 'd': ('int', 10) } If conf_file is None, return the default config. """ conf = {} if conf_file: base_config_path = config_basepath(os.path.expanduser(conf_file)) conf = read_raw_config(base_config_path) if not default_conf: default_conf = {} # remaining missing default configuration key are set # also type conversion is enforced for underneath layer for key in default_conf: nature_type, default_value = default_conf[key] val = conf.get(key, None) if val is None: # fallback to default value conf[key] = default_value elif not _map_check_fn.get(nature_type, lambda x: True)(val): # value present but not in the proper format, force type conversion conf[key] = _map_convert_fn.get(nature_type, lambda x: x)(val) return conf def priority_read(conf_filenames, default_conf=None): """Try reading the configuration files from conf_filenames, in order, and return the configuration from the first one that exists. default_conf has the same specification as it does in read. """ # Try all the files in order for filename in conf_filenames: full_filename = os.path.expanduser(filename) if config_exists(full_filename): return read(full_filename, default_conf) # Else, return the default configuration return read(None, default_conf) def merge_default_configs(base_config, *other_configs): """Merge several default config dictionaries, from left to right""" full_config = base_config.copy() for config in other_configs: full_config.update(config) return full_config +def merge_configs(base, other): + """Merge two config dictionaries + + This does merge config dicts recursively, with the rules, for every value + of the dicts (with 'val' not being a dict): + + - None + type -> type + - type + None -> None + - dict + dict -> dict (merged) + - val + dict -> TypeError + - dict + val -> TypeError + - val + val -> val (other) + + so merging + + { + 'key1': { + 'skey1': value1, + 'skey2': {'sskey1': value2}, + }, + 'key2': value3, + } + + with + + { + 'key1': { + 'skey1': value4, + 'skey2': {'sskey2': value5}, + }, + 'key3': value6, + } + + will give: + + { + 'key1': { + 'skey1': value4, # <-- note this + 'skey2': { + 'sskey1': value2, + 'sskey2': value5, + }, + }, + 'key2': value3, + 'key3': value6, + } + + Note that no type checking is done for anything but dicts. + """ + if not isinstance(base, dict) or not isinstance(other, dict): + raise TypeError( + 'Cannot merge a %s with a %s' % (type(base), type(other))) + + output = {} + allkeys = set(chain(base.keys(), other.keys())) + for k in allkeys: + vb = base.get(k) + vo = other.get(k) + + if isinstance(vo, dict): + output[k] = merge_configs(vb is not None and vb or {}, vo) + elif isinstance(vb, dict) and k in other and other[k] is not None: + output[k] = merge_configs(vb, vo is not None and vo or {}) + elif k in other: + output[k] = deepcopy(vo) + else: + output[k] = deepcopy(vb) + + return output + + def swh_config_paths(base_filename): """Return the Software Heritage specific configuration paths for the given filename.""" return [os.path.join(dirname, base_filename) for dirname in SWH_CONFIG_DIRECTORIES] def prepare_folders(conf, *keys): """Prepare the folder mentioned in config under keys. """ def makedir(folder): if not os.path.exists(folder): os.makedirs(folder) for key in keys: makedir(conf[key]) def load_global_config(): """Load the global Software Heritage config""" return priority_read( swh_config_paths(SWH_GLOBAL_CONFIG), SWH_DEFAULT_GLOBAL_CONFIG, ) def load_named_config(name, default_conf=None, global_conf=True): """Load the config named `name` from the Software Heritage configuration paths. If global_conf is True (default), read the global configuration too. """ conf = {} if global_conf: conf.update(load_global_config()) conf.update(priority_read(swh_config_paths(name), default_conf)) return conf class SWHConfig: """Mixin to add configuration parsing abilities to classes The class should override the class attributes: - DEFAULT_CONFIG (default configuration to be parsed) - CONFIG_BASE_FILENAME (the filename of the configuration to be used) This class defines one classmethod, parse_config_file, which parses a configuration file using the default config as set in the class attribute. """ DEFAULT_CONFIG = {} CONFIG_BASE_FILENAME = '' @classmethod def parse_config_file(cls, base_filename=None, config_filename=None, additional_configs=None, global_config=True): """Parse the configuration file associated to the current class. By default, parse_config_file will load the configuration cls.CONFIG_BASE_FILENAME from one of the Software Heritage configuration directories, in order, unless it is overridden by base_filename or config_filename (which shortcuts the file lookup completely). Args: - base_filename (str) overrides the default cls.CONFIG_BASE_FILENAME - config_filename (str) sets the file to parse instead of the defaults set from cls.CONFIG_BASE_FILENAME - additional_configs (list of default configuration dicts) allows to override or extend the configuration set in cls.DEFAULT_CONFIG. - global_config (bool): Load the global configuration (default: True) """ if config_filename: config_filenames = [config_filename] else: if not base_filename: base_filename = cls.CONFIG_BASE_FILENAME config_filenames = swh_config_paths(base_filename) if not additional_configs: additional_configs = [] full_default_config = merge_default_configs(cls.DEFAULT_CONFIG, *additional_configs) config = {} if global_config: config = load_global_config() config.update(priority_read(config_filenames, full_default_config)) return config diff --git a/swh/core/db/__init__.py b/swh/core/db/__init__.py new file mode 100644 index 0000000..cab7ddb --- /dev/null +++ b/swh/core/db/__init__.py @@ -0,0 +1,193 @@ +# Copyright (C) 2015-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import binascii +import datetime +import enum +import json +import os +import threading + +from contextlib import contextmanager + +import psycopg2 +import psycopg2.extras + + +psycopg2.extras.register_uuid() + + +def escape(data): + if data is None: + return '' + if isinstance(data, bytes): + return '\\x%s' % binascii.hexlify(data).decode('ascii') + elif isinstance(data, str): + return '"%s"' % data.replace('"', '""') + elif isinstance(data, datetime.datetime): + # We escape twice to make sure the string generated by + # isoformat gets escaped + return escape(data.isoformat()) + elif isinstance(data, dict): + return escape(json.dumps(data)) + elif isinstance(data, list): + return escape("{%s}" % ','.join(escape(d) for d in data)) + elif isinstance(data, psycopg2.extras.Range): + # We escape twice here too, so that we make sure + # everything gets passed to copy properly + return escape( + '%s%s,%s%s' % ( + '[' if data.lower_inc else '(', + '-infinity' if data.lower_inf else escape(data.lower), + 'infinity' if data.upper_inf else escape(data.upper), + ']' if data.upper_inc else ')', + ) + ) + elif isinstance(data, enum.IntEnum): + return escape(int(data)) + else: + # We don't escape here to make sure we pass literals properly + return str(data) + + +def typecast_bytea(value, cur): + if value is not None: + data = psycopg2.BINARY(value, cur) + return data.tobytes() + + +class BaseDb: + """Base class for swh.*.*Db. + + cf. swh.storage.db.Db, swh.archiver.db.ArchiverDb + + """ + + @classmethod + def adapt_conn(cls, conn): + """Makes psycopg2 use 'bytes' to decode bytea instead of + 'memoryview', for this connection.""" + cur = conn.cursor() + cur.execute("SELECT null::bytea, null::bytea[]") + bytea_oid = cur.description[0][1] + bytea_array_oid = cur.description[1][1] + + t_bytes = psycopg2.extensions.new_type( + (bytea_oid,), "bytea", typecast_bytea) + psycopg2.extensions.register_type(t_bytes, conn) + + t_bytes_array = psycopg2.extensions.new_array_type( + (bytea_array_oid,), "bytea[]", t_bytes) + psycopg2.extensions.register_type(t_bytes_array, conn) + + @classmethod + def connect(cls, *args, **kwargs): + """factory method to create a DB proxy + + Accepts all arguments of psycopg2.connect; only some specific + possibilities are reported below. + + Args: + connstring: libpq2 connection string + + """ + conn = psycopg2.connect(*args, **kwargs) + cls.adapt_conn(conn) + return cls(conn) + + @classmethod + def from_pool(cls, pool): + conn = pool.getconn() + cls.adapt_conn(conn) + return cls(conn, pool=pool) + + def __init__(self, conn, pool=None): + """create a DB proxy + + Args: + conn: psycopg2 connection to the SWH DB + pool: psycopg2 pool of connections + + """ + self.conn = conn + self.pool = pool + + def __del__(self): + if self.pool: + self.pool.putconn(self.conn) + + def cursor(self, cur_arg=None): + """get a cursor: from cur_arg if given, or a fresh one otherwise + + meant to avoid boilerplate if/then/else in methods that proxy stored + procedures + + """ + if cur_arg is not None: + return cur_arg + else: + return self.conn.cursor() + _cursor = cursor # for bw compat + + @contextmanager + def transaction(self): + """context manager to execute within a DB transaction + + Yields: + a psycopg2 cursor + + """ + with self.conn.cursor() as cur: + try: + yield cur + self.conn.commit() + except Exception: + if not self.conn.closed: + self.conn.rollback() + raise + + def copy_to(self, items, tblname, columns, + cur=None, item_cb=None, default_values={}): + """Copy items' entries to table tblname with columns information. + + Args: + items (dict): dictionary of data to copy over tblname. + tblname (str): destination table's name. + columns ([str]): keys to access data in items and also the + column names in the destination table. + default_values (dict): dictionnary of default values to use when + inserting entried int the tblname table. + cur: a db cursor; if not given, a new cursor will be created. + item_cb (fn): optional function to apply to items's entry. + """ + + read_file, write_file = os.pipe() + + def writer(): + cursor = self.cursor(cur) + with open(read_file, 'r') as f: + cursor.copy_expert('COPY %s (%s) FROM STDIN CSV' % ( + tblname, ', '.join(columns)), f) + + write_thread = threading.Thread(target=writer) + write_thread.start() + + try: + with open(write_file, 'w') as f: + for d in items: + if item_cb is not None: + item_cb(d) + line = [escape(d.get(k, default_values.get(k))) + for k in columns] + f.write(','.join(line)) + f.write('\n') + finally: + # No problem bubbling up exceptions, but we still need to make sure + # we finish copying, even though we're probably going to cancel the + # transaction. + write_thread.join() + + def mktemp(self, tblname, cur=None): + self.cursor(cur).execute('SELECT swh_mktemp(%s)', (tblname,)) diff --git a/swh/core/db/common.py b/swh/core/db/common.py new file mode 100644 index 0000000..a09ad63 --- /dev/null +++ b/swh/core/db/common.py @@ -0,0 +1,80 @@ +# Copyright (C) 2015-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import inspect +import functools + + +def apply_options(cursor, options): + """Applies the given postgresql client options to the given cursor. + + Returns a dictionary with the old values if they changed.""" + old_options = {} + for option, value in options.items(): + cursor.execute('SHOW %s' % option) + old_value = cursor.fetchall()[0][0] + if old_value != value: + cursor.execute('SET LOCAL %s TO %%s' % option, (value,)) + old_options[option] = old_value + return old_options + + +def db_transaction(**client_options): + """decorator to execute Backend methods within DB transactions + + The decorated method must accept a `cur` and `db` keyword argument + + Client options are passed as `set` options to the postgresql server + """ + def decorator(meth, __client_options=client_options): + if inspect.isgeneratorfunction(meth): + raise ValueError( + 'Use db_transaction_generator for generator functions.') + + @functools.wraps(meth) + def _meth(self, *args, **kwargs): + if 'cur' in kwargs and kwargs['cur']: + cur = kwargs['cur'] + old_options = apply_options(cur, __client_options) + ret = meth(self, *args, **kwargs) + apply_options(cur, old_options) + return ret + else: + db = self.get_db() + with db.transaction() as cur: + apply_options(cur, __client_options) + return meth(self, *args, db=db, cur=cur, **kwargs) + return _meth + + return decorator + + +def db_transaction_generator(**client_options): + """decorator to execute Backend methods within DB transactions, while + returning a generator + + The decorated method must accept a `cur` and `db` keyword argument + + Client options are passed as `set` options to the postgresql server + """ + def decorator(meth, __client_options=client_options): + if not inspect.isgeneratorfunction(meth): + raise ValueError( + 'Use db_transaction for non-generator functions.') + + @functools.wraps(meth) + def _meth(self, *args, **kwargs): + if 'cur' in kwargs and kwargs['cur']: + cur = kwargs['cur'] + old_options = apply_options(cur, __client_options) + yield from meth(self, *args, **kwargs) + apply_options(cur, old_options) + else: + db = self.get_db() + with db.transaction() as cur: + apply_options(cur, __client_options) + yield from meth(self, *args, db=db, cur=cur, **kwargs) + return _meth + return decorator diff --git a/swh/core/db/db_utils.py b/swh/core/db/db_utils.py new file mode 100644 index 0000000..451fb58 --- /dev/null +++ b/swh/core/db/db_utils.py @@ -0,0 +1,149 @@ +# Copyright (C) 2015-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# +# This code has been imported from psycopg2, version 2.7.4, +# https://github.com/psycopg/psycopg2/tree/5afb2ce803debea9533e293eef73c92ffce95bcd +# and modified by Software Heritage. +# +# Original file: lib/extras.py +# +# psycopg2 is free software: you can redistribute it and/or modify it under the +# terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. + + +import re +import functools + +import psycopg2.extensions + + +def stored_procedure(stored_proc): + """decorator to execute remote stored procedure, specified as argument + + Generally, the body of the decorated function should be empty. If it is + not, the stored procedure will be executed first; the function body then. + + """ + def wrap(meth): + @functools.wraps(meth) + def _meth(self, *args, **kwargs): + cur = kwargs.get('cur', None) + self._cursor(cur).execute('SELECT %s()' % stored_proc) + meth(self, *args, **kwargs) + return _meth + return wrap + + +def jsonize(value): + """Convert a value to a psycopg2 JSON object if necessary""" + if isinstance(value, dict): + return psycopg2.extras.Json(value) + + return value + + +def _paginate(seq, page_size): + """Consume an iterable and return it in chunks. + Every chunk is at most `page_size`. Never return an empty chunk. + """ + page = [] + it = iter(seq) + while 1: + try: + for i in range(page_size): + page.append(next(it)) + yield page + page = [] + except StopIteration: + if page: + yield page + return + + +def _split_sql(sql): + """Split *sql* on a single ``%s`` placeholder. + Split on the %s, perform %% replacement and return pre, post lists of + snippets. + """ + curr = pre = [] + post = [] + tokens = re.split(br'(%.)', sql) + for token in tokens: + if len(token) != 2 or token[:1] != b'%': + curr.append(token) + continue + + if token[1:] == b's': + if curr is pre: + curr = post + else: + raise ValueError( + "the query contains more than one '%s' placeholder") + elif token[1:] == b'%': + curr.append(b'%') + else: + raise ValueError("unsupported format character: '%s'" + % token[1:].decode('ascii', 'replace')) + + if curr is pre: + raise ValueError("the query doesn't contain any '%s' placeholder") + + return pre, post + + +def execute_values_generator(cur, sql, argslist, template=None, page_size=100): + '''Execute a statement using SQL ``VALUES`` with a sequence of parameters. + Rows returned by the query are returned through a generator. + You need to consume the generator for the queries to be executed! + + :param cur: the cursor to use to execute the query. + :param sql: the query to execute. It must contain a single ``%s`` + placeholder, which will be replaced by a `VALUES list`__. + Example: ``"INSERT INTO mytable (id, f1, f2) VALUES %s"``. + :param argslist: sequence of sequences or dictionaries with the arguments + to send to the query. The type and content must be consistent with + *template*. + :param template: the snippet to merge to every item in *argslist* to + compose the query. + + - If the *argslist* items are sequences it should contain positional + placeholders (e.g. ``"(%s, %s, %s)"``, or ``"(%s, %s, 42)``" if there + are constants value...). + - If the *argslist* items are mappings it should contain named + placeholders (e.g. ``"(%(id)s, %(f1)s, 42)"``). + + If not specified, assume the arguments are sequence and use a simple + positional template (i.e. ``(%s, %s, ...)``), with the number of + placeholders sniffed by the first element in *argslist*. + :param page_size: maximum number of *argslist* items to include in every + statement. If there are more items the function will execute more than + one statement. + :param yield_from_cur: Whether to yield results from the cursor in this + function directly. + + .. __: https://www.postgresql.org/docs/current/static/queries-values.html + + After the execution of the function the `cursor.rowcount` property will + **not** contain a total result. + ''' + # we can't just use sql % vals because vals is bytes: if sql is bytes + # there will be some decoding error because of stupid codec used, and Py3 + # doesn't implement % on bytes. + if not isinstance(sql, bytes): + sql = sql.encode( + psycopg2.extensions.encodings[cur.connection.encoding] + ) + pre, post = _split_sql(sql) + + for page in _paginate(argslist, page_size=page_size): + if template is None: + template = b'(' + b','.join([b'%s'] * len(page[0])) + b')' + parts = pre[:] + for args in page: + parts.append(cur.mogrify(template, args)) + parts.append(b',') + parts[-1:] = post + cur.execute(b''.join(parts)) + yield from cur diff --git a/swh/core/logger.py b/swh/core/logger.py index 4198503..74b9dd5 100644 --- a/swh/core/logger.py +++ b/swh/core/logger.py @@ -1,192 +1,192 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import logging import os import socket import psycopg2 from psycopg2.extras import Json from systemd.journal import JournalHandler as _JournalHandler, send try: from celery import current_task except ImportError: current_task = None EXTRA_LOGDATA_PREFIX = 'swh_' def db_level_of_py_level(lvl): """convert a log level of the logging module to a log level suitable for the logging Postgres DB """ return logging.getLevelName(lvl).lower() def get_extra_data(record, task_args=True): """Get the extra data to insert to the database from the logging record""" log_data = record.__dict__ extra_data = {k[len(EXTRA_LOGDATA_PREFIX):]: v for k, v in log_data.items() if k.startswith(EXTRA_LOGDATA_PREFIX)} args = log_data.get('args') if args: extra_data['logging_args'] = args # Retrieve Celery task info if current_task and current_task.request: extra_data['task'] = { 'id': current_task.request.id, 'name': current_task.name, } if task_args: extra_data['task'].update({ 'kwargs': current_task.request.kwargs, 'args': current_task.request.args, }) return extra_data def flatten(data, separator='_'): """Flatten the data dictionary into a flat structure""" def inner_flatten(data, prefix): if isinstance(data, dict): for key, value in data.items(): yield from inner_flatten(value, prefix + [key]) elif isinstance(data, (list, tuple)): for key, value in enumerate(data): yield from inner_flatten(value, prefix + [str(key)]) else: yield prefix, data for path, value in inner_flatten(data, []): yield separator.join(path), value def stringify(value): """Convert value to string""" if isinstance(value, datetime.datetime): return value.isoformat() return str(value) class PostgresHandler(logging.Handler): """log handler that store messages in a Postgres DB See swh-core/swh/core/sql/log-schema.sql for the DB schema. All logging methods can be used as usual. Additionally, arbitrary metadata can be passed to logging methods, requesting that they will be stored in the DB as a single JSONB value. To do so, pass a dictionary to the 'extra' kwarg of any logging method; all keys in that dictionary that start with EXTRA_LOGDATA_PREFIX (currently: ``swh_``) will be extracted to form the JSONB dictionary. The prefix will be stripped and not included in the DB. Note: the logger name will be used to fill the 'module' DB column. Sample usage:: logging.basicConfig(level=logging.INFO) h = PostgresHandler('dbname=softwareheritage-log') logging.getLogger().addHandler(h) logger.info('not so important notice', extra={'swh_type': 'swh_logging_test', 'swh_meditation': 'guru'}) logger.warn('something weird just happened, did you see that?') """ def __init__(self, connstring): """ Create a Postgres log handler. Args: config: configuration dictionary, with a key "log_db" containing a libpq connection string to the log DB """ super().__init__() self.connstring = connstring self.fqdn = socket.getfqdn() # cache FQDN value def _connect(self): return psycopg2.connect(self.connstring) def emit(self, record): msg = self.format(record) extra_data = get_extra_data(record) if 'task' in extra_data: task_args = { 'args': extra_data['task']['args'], 'kwargs': extra_data['task']['kwargs'], } try: json_args = Json(task_args).getquoted() except TypeError: - task_args = { - 'args': [''], - 'kwargs': {}, - } + task_args = { + 'args': [''], + 'kwargs': {}, + } else: json_args_length = len(json_args) if json_args_length >= 1000: task_args = { 'args': [''], 'kwargs': {}, } extra_data['task'].update(task_args) log_entry = (db_level_of_py_level(record.levelno), msg, Json(extra_data), record.name, self.fqdn, os.getpid()) db = self._connect() with db.cursor() as cur: cur.execute('INSERT INTO log ' '(level, message, data, src_module, src_host, src_pid)' 'VALUES (%s, %s, %s, %s, %s, %s)', log_entry) db.commit() db.close() class JournalHandler(_JournalHandler): def emit(self, record): """Write `record` as a journal event. MESSAGE is taken from the message provided by the user, and PRIORITY, LOGGER, THREAD_NAME, CODE_{FILE,LINE,FUNC} fields are appended automatically. In addition, record.MESSAGE_ID will be used if present. """ try: extra_data = flatten(get_extra_data(record, task_args=False)) extra_data = { (EXTRA_LOGDATA_PREFIX + key).upper(): stringify(value) for key, value in extra_data } msg = self.format(record) pri = self.mapPriority(record.levelno) send(msg, PRIORITY=format(pri), LOGGER=record.name, THREAD_NAME=record.threadName, CODE_FILE=record.pathname, CODE_LINE=record.lineno, CODE_FUNC=record.funcName, **extra_data) except Exception: self.handleError(record) diff --git a/swh/core/tests/test_config.py b/swh/core/tests/test_config.py index b0461b4..8e5bbf8 100644 --- a/swh/core/tests/test_config.py +++ b/swh/core/tests/test_config.py @@ -1,210 +1,312 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import shutil -import tempfile -import unittest +import pytest +import pkg_resources.extern.packaging.version from swh.core import config - -class ConfReaderTest(unittest.TestCase): - - @classmethod - def setUpClass(cls): - # create a temporary folder - cls.tmpdir = tempfile.mkdtemp(prefix='test-swh-core.') - cls.conffile = os.path.join(cls.tmpdir, 'config.ini') - conf_contents = """[main] +pytest_v = pkg_resources.get_distribution("pytest").parsed_version +if pytest_v < pkg_resources.extern.packaging.version.parse('3.9'): + @pytest.fixture + def tmp_path(request): + import tempfile + import pathlib + with tempfile.TemporaryDirectory() as tmpdir: + yield pathlib.Path(tmpdir) + + +default_conf = { + 'a': ('int', 2), + 'b': ('string', 'default-string'), + 'c': ('bool', True), + 'd': ('int', 10), + 'e': ('int', None), + 'f': ('bool', None), + 'g': ('string', None), + 'h': ('bool', True), + 'i': ('bool', True), + 'ls': ('list[str]', ['a', 'b', 'c']), + 'li': ('list[int]', [42, 43]), +} + +other_default_conf = { + 'a': ('int', 3), +} + +full_default_conf = default_conf.copy() +full_default_conf['a'] = other_default_conf['a'] + +parsed_default_conf = { + key: value + for key, (type, value) + in default_conf.items() +} + +parsed_conffile = { + 'a': 1, + 'b': 'this is a string', + 'c': True, + 'd': 10, + 'e': None, + 'f': None, + 'g': None, + 'h': False, + 'i': True, + 'ls': ['list', 'of', 'strings'], + 'li': [1, 2, 3, 4], +} + + +@pytest.fixture +def swh_config(tmp_path): + # create a temporary folder + conffile = tmp_path / 'config.ini' + conf_contents = """[main] a = 1 b = this is a string c = true h = false ls = list, of, strings li = 1, 2, 3, 4 """ - with open(cls.conffile, 'w') as conf: - conf.write(conf_contents) - - cls.non_existing_conffile = os.path.join(cls.tmpdir, - 'config-nonexisting.ini') - - # Create an unreadable, proper configuration file - cls.perms_broken_file = os.path.join(cls.tmpdir, 'unreadable.ini') - with open(cls.perms_broken_file, 'w') as conf: - conf.write(conf_contents) - os.chmod(cls.perms_broken_file, 0o000) - - # Create a proper configuration file in an unreadable directory - cls.perms_broken_dir = os.path.join(cls.tmpdir, 'unreadabledir') - cls.file_in_broken_dir = os.path.join(cls.perms_broken_dir, - 'unreadable.ini') - os.makedirs(cls.perms_broken_dir) - with open(cls.file_in_broken_dir, 'w') as conf: - conf.write(conf_contents) - os.chmod(cls.perms_broken_dir, 0o000) - - cls.empty_conffile = os.path.join(cls.tmpdir, 'empty.ini') - open(cls.empty_conffile, 'w').close() - - cls.default_conf = { - 'a': ('int', 2), - 'b': ('string', 'default-string'), - 'c': ('bool', True), - 'd': ('int', 10), - 'e': ('int', None), - 'f': ('bool', None), - 'g': ('string', None), - 'h': ('bool', True), - 'i': ('bool', True), - 'ls': ('list[str]', ['a', 'b', 'c']), - 'li': ('list[int]', [42, 43]), - } - - cls.other_default_conf = { - 'a': ('int', 3), - } - - cls.full_default_conf = cls.default_conf.copy() - cls.full_default_conf['a'] = cls.other_default_conf['a'] - - cls.parsed_default_conf = { - key: value - for key, (type, value) - in cls.default_conf.items() - } - - cls.parsed_conffile = { - 'a': 1, - 'b': 'this is a string', - 'c': True, - 'd': 10, - 'e': None, - 'f': None, - 'g': None, - 'h': False, - 'i': True, - 'ls': ['list', 'of', 'strings'], - 'li': [1, 2, 3, 4], - } - - @classmethod - def tearDownClass(cls): - # Make the broken perms items readable again to be able to remove them - os.chmod(cls.perms_broken_dir, 0o755) - os.chmod(cls.perms_broken_file, 0o644) - shutil.rmtree(cls.tmpdir) - - def test_read(self): - # when - res = config.read(self.conffile, self.default_conf) - - # then - self.assertEqual(res, self.parsed_conffile) - - def test_read_empty_file(self): - # when - res = config.read(None, self.default_conf) - - # then - self.assertEqual(res, self.parsed_default_conf) - - def test_support_non_existing_conffile(self): - # when - res = config.read(self.non_existing_conffile, self.default_conf) - - # then - self.assertEqual(res, self.parsed_default_conf) - - def test_support_empty_conffile(self): - # when - res = config.read(self.empty_conffile, self.default_conf) - - # then - self.assertEqual(res, self.parsed_default_conf) - - def test_raise_on_broken_directory_perms(self): - with self.assertRaises(PermissionError): - config.read(self.file_in_broken_dir, self.default_conf) - - def test_raise_on_broken_file_perms(self): - with self.assertRaises(PermissionError): - config.read(self.perms_broken_file, self.default_conf) - - def test_merge_default_configs(self): - # when - res = config.merge_default_configs(self.default_conf, - self.other_default_conf) - - # then - self.assertEqual(res, self.full_default_conf) - - def test_priority_read_nonexist_conf(self): - # when - res = config.priority_read([self.non_existing_conffile, self.conffile], - self.default_conf) - - # then - self.assertEqual(res, self.parsed_conffile) - - def test_priority_read_conf_nonexist_empty(self): - # when - res = config.priority_read([ - self.conffile, - self.non_existing_conffile, - self.empty_conffile, - ], self.default_conf) - - # then - self.assertEqual(res, self.parsed_conffile) - - def test_priority_read_empty_conf_nonexist(self): - # when - res = config.priority_read([ - self.empty_conffile, - self.conffile, - self.non_existing_conffile, - ], self.default_conf) - - # then - self.assertEqual(res, self.parsed_default_conf) - - def test_swh_config_paths(self): - res = config.swh_config_paths('foo/bar.ini') - - self.assertEqual(res, [ - '~/.config/swh/foo/bar.ini', - '~/.swh/foo/bar.ini', - '/etc/softwareheritage/foo/bar.ini', - ]) - - def test_prepare_folder(self): - # given - conf = {'path1': os.path.join(self.tmpdir, 'path1'), - 'path2': os.path.join(self.tmpdir, 'path2', 'depth1')} - - # the folders does not exists - self.assertFalse(os.path.exists(conf['path1']), - "path1 should not exist.") - self.assertFalse(os.path.exists(conf['path2']), - "path2 should not exist.") - - # when - config.prepare_folders(conf, 'path1') - - # path1 exists but not path2 - self.assertTrue(os.path.exists(conf['path1']), - "path1 should now exist!") - self.assertFalse(os.path.exists(conf['path2']), - "path2 should not exist.") - - # path1 already exists, skips it but creates path2 - config.prepare_folders(conf, 'path1', 'path2') - - self.assertTrue(os.path.exists(conf['path1']), - "path1 should still exist!") - self.assertTrue(os.path.exists(conf['path2']), - "path2 should now exist.") + conffile.open('w').write(conf_contents) + return conffile + + +@pytest.fixture +def swh_config_unreadable(swh_config): + # Create an unreadable, proper configuration file + os.chmod(str(swh_config), 0o000) + yield swh_config + # Make the broken perms file readable again to be able to remove them + os.chmod(str(swh_config), 0o644) + + +@pytest.fixture +def swh_config_unreadable_dir(swh_config): + # Create a proper configuration file in an unreadable directory + perms_broken_dir = swh_config.parent / 'unreadabledir' + perms_broken_dir.mkdir() + shutil.move(str(swh_config), str(perms_broken_dir)) + os.chmod(str(perms_broken_dir), 0o000) + yield perms_broken_dir / swh_config.name + # Make the broken perms items readable again to be able to remove them + os.chmod(str(perms_broken_dir), 0o755) + + +@pytest.fixture +def swh_config_empty(tmp_path): + # create a temporary folder + conffile = tmp_path / 'config.ini' + conffile.touch() + return conffile + + +def test_read(swh_config): + # when + res = config.read(str(swh_config), default_conf) + + # then + assert res == parsed_conffile + + +def test_read_empty_file(): + # when + res = config.read(None, default_conf) + + # then + assert res == parsed_default_conf + + +def test_support_non_existing_conffile(tmp_path): + # when + res = config.read(str(tmp_path / 'void.ini'), default_conf) + + # then + assert res == parsed_default_conf + + +def test_support_empty_conffile(swh_config_empty): + # when + res = config.read(str(swh_config_empty), default_conf) + + # then + assert res == parsed_default_conf + + +def test_raise_on_broken_directory_perms(swh_config_unreadable_dir): + with pytest.raises(PermissionError): + config.read(str(swh_config_unreadable_dir), default_conf) + + +def test_raise_on_broken_file_perms(swh_config_unreadable): + with pytest.raises(PermissionError): + config.read(str(swh_config_unreadable), default_conf) + + +def test_merge_default_configs(): + # when + res = config.merge_default_configs(default_conf, other_default_conf) + + # then + assert res == full_default_conf + + +def test_priority_read_nonexist_conf(swh_config): + noexist = str(swh_config.parent / 'void.ini') + # when + res = config.priority_read([noexist, str(swh_config)], default_conf) + + # then + assert res == parsed_conffile + + +def test_priority_read_conf_nonexist_empty(swh_config): + noexist = swh_config.parent / 'void.ini' + empty = swh_config.parent / 'empty.ini' + empty.touch() + + # when + res = config.priority_read([str(p) for p in ( + swh_config, noexist, empty)], default_conf) + + # then + assert res == parsed_conffile + + +def test_priority_read_empty_conf_nonexist(swh_config): + noexist = swh_config.parent / 'void.ini' + empty = swh_config.parent / 'empty.ini' + empty.touch() + + # when + res = config.priority_read([str(p) for p in ( + empty, swh_config, noexist)], default_conf) + + # then + assert res == parsed_default_conf + + +def test_swh_config_paths(): + res = config.swh_config_paths('foo/bar.ini') + + assert res == [ + '~/.config/swh/foo/bar.ini', + '~/.swh/foo/bar.ini', + '/etc/softwareheritage/foo/bar.ini', + ] + + +def test_prepare_folder(tmp_path): + # given + conf = {'path1': str(tmp_path / 'path1'), + 'path2': str(tmp_path / 'path2' / 'depth1')} + + # the folders does not exists + assert not os.path.exists(conf['path1']), "path1 should not exist." + assert not os.path.exists(conf['path2']), "path2 should not exist." + + # when + config.prepare_folders(conf, 'path1') + + # path1 exists but not path2 + assert os.path.exists(conf['path1']), "path1 should now exist!" + assert not os.path.exists(conf['path2']), "path2 should not exist." + + # path1 already exists, skips it but creates path2 + config.prepare_folders(conf, 'path1', 'path2') + + assert os.path.exists(conf['path1']), "path1 should still exist!" + assert os.path.exists(conf['path2']), "path2 should now exist." + + +def test_merge_config(): + cfg_a = { + 'a': 42, + 'b': [1, 2, 3], + 'c': None, + 'd': {'gheez': 27}, + 'e': { + 'ea': 'Mr. Bungle', + 'eb': None, + 'ec': [11, 12, 13], + 'ed': {'eda': 'Secret Chief 3', + 'edb': 'Faith No More'}, + 'ee': 451, + }, + 'f': 'Janis', + } + cfg_b = { + 'a': 43, + 'b': [41, 42, 43], + 'c': 'Tom Waits', + 'd': None, + 'e': { + 'ea': 'Igorrr', + 'ec': [51, 52], + 'ed': {'edb': 'Sleepytime Gorilla Museum', + 'edc': 'Nils Peter Molvaer'}, + }, + 'g': 'Hüsker Dü', + } + + # merge A, B + cfg_m = config.merge_configs(cfg_a, cfg_b) + assert cfg_m == { + 'a': 43, # b takes precedence + 'b': [41, 42, 43], # b takes precedence + 'c': 'Tom Waits', # b takes precedence + 'd': None, # b['d'] takes precedence (explicit None) + 'e': { + 'ea': 'Igorrr', # a takes precedence + 'eb': None, # only in a + 'ec': [51, 52], # b takes precedence + 'ed': { + 'eda': 'Secret Chief 3', # only in a + 'edb': 'Sleepytime Gorilla Museum', # b takes precedence + 'edc': 'Nils Peter Molvaer'}, # only defined in b + 'ee': 451, + }, + 'f': 'Janis', # only defined in a + 'g': 'Hüsker Dü', # only defined in b + } + + # merge B, A + cfg_m = config.merge_configs(cfg_b, cfg_a) + assert cfg_m == { + 'a': 42, # a takes precedence + 'b': [1, 2, 3], # a takes precedence + 'c': None, # a takes precedence + 'd': {'gheez': 27}, # a takes precedence + 'e': { + 'ea': 'Mr. Bungle', # a takes precedence + 'eb': None, # only defined in a + 'ec': [11, 12, 13], # a takes precedence + 'ed': { + 'eda': 'Secret Chief 3', # only in a + 'edb': 'Faith No More', # a takes precedence + 'edc': 'Nils Peter Molvaer'}, # only in b + 'ee': 451, + }, + 'f': 'Janis', # only in a + 'g': 'Hüsker Dü', # only in b + } + + +def test_merge_config_type_error(): + for v in (1, 'str', None): + with pytest.raises(TypeError): + config.merge_configs(v, {}) + with pytest.raises(TypeError): + config.merge_configs({}, v) + + for v in (1, 'str'): + with pytest.raises(TypeError): + config.merge_configs({'a': v}, {'a': {}}) + with pytest.raises(TypeError): + config.merge_configs({'a': {}}, {'a': v}) diff --git a/swh/core/tests/test_logger.py b/swh/core/tests/test_logger.py index ba4c9f6..240aac1 100644 --- a/swh/core/tests/test_logger.py +++ b/swh/core/tests/test_logger.py @@ -1,45 +1,50 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import os -import unittest import pytest from swh.core.logger import PostgresHandler -from swh.core.tests.db_testing import SingleDbTestFixture from swh.core.tests import SQL_DIR +DUMP_FILE = os.path.join(SQL_DIR, 'log-schema.sql') -@pytest.mark.db -class PgLogHandler(SingleDbTestFixture, unittest.TestCase): - TEST_DB_DUMP = os.path.join(SQL_DIR, 'log-schema.sql') +@pytest.fixture +def swh_db_logger(postgresql_proc, postgresql): - def setUp(self): - super().setUp() - self.modname = 'swh.core.tests.test_logger' - self.logger = logging.Logger(self.modname, logging.DEBUG) - self.logger.addHandler(PostgresHandler('dbname=' + self.TEST_DB_NAME)) + cursor = postgresql.cursor() + with open(DUMP_FILE) as fobj: + cursor.execute(fobj.read()) + postgresql.commit() + modname = 'swh.core.tests.test_logger' + logger = logging.Logger(modname, logging.DEBUG) + dsn = 'postgresql://{user}@{host}:{port}/{dbname}'.format( + host=postgresql_proc.host, + port=postgresql_proc.port, + user='postgres', + dbname='tests') + logger.addHandler(PostgresHandler(dsn)) + return logger - def tearDown(self): - logging.shutdown() - super().tearDown() - def test_log(self): - self.logger.info('notice', - extra={'swh_type': 'test entry', 'swh_data': 42}) - self.logger.warning('warning') +def test_log(swh_db_logger, postgresql): + logger = swh_db_logger + modname = logger.name - with self.conn.cursor() as cur: - cur.execute('SELECT level, message, data, src_module FROM log') - db_log_entries = cur.fetchall() + logger.info('notice', + extra={'swh_type': 'test entry', 'swh_data': 42}) + logger.warning('warning') - self.assertIn(('info', 'notice', {'type': 'test entry', 'data': 42}, - self.modname), - db_log_entries) - self.assertIn(('warning', 'warning', {}, self.modname), db_log_entries) + with postgresql.cursor() as cur: + cur.execute('SELECT level, message, data, src_module FROM log') + db_log_entries = cur.fetchall() + + assert ('info', 'notice', {'type': 'test entry', 'data': 42}, + modname) in db_log_entries + assert ('warning', 'warning', {}, modname) in db_log_entries diff --git a/swh/core/tests/test_serializers.py b/swh/core/tests/test_serializers.py index f9e80e9..40aec9a 100644 --- a/swh/core/tests/test_serializers.py +++ b/swh/core/tests/test_serializers.py @@ -1,81 +1,81 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import json import unittest from uuid import UUID import arrow -from swh.core.serializers import ( +from swh.core.api.serializers import ( SWHJSONDecoder, SWHJSONEncoder, msgpack_dumps, msgpack_loads ) class Serializers(unittest.TestCase): def setUp(self): self.tz = datetime.timezone(datetime.timedelta(minutes=118)) self.data = { 'bytes': b'123456789\x99\xaf\xff\x00\x12', 'datetime_naive': datetime.datetime(2015, 1, 1, 12, 4, 42, 231455), 'datetime_tz': datetime.datetime(2015, 3, 4, 18, 25, 13, 1234, tzinfo=self.tz), 'datetime_utc': datetime.datetime(2015, 3, 4, 18, 25, 13, 1234, tzinfo=datetime.timezone.utc), 'datetime_delta': datetime.timedelta(64), 'arrow_date': arrow.get('2018-04-25T16:17:53.533672+00:00'), 'swhtype': 'fake', 'swh_dict': {'swhtype': 42, 'd': 'test'}, 'random_dict': {'swhtype': 43}, 'uuid': UUID('cdd8f804-9db6-40c3-93ab-5955d3836234'), } self.encoded_data = { 'bytes': {'swhtype': 'bytes', 'd': 'F)}kWH8wXmIhn8j01^'}, 'datetime_naive': {'swhtype': 'datetime', 'd': '2015-01-01T12:04:42.231455'}, 'datetime_tz': {'swhtype': 'datetime', 'd': '2015-03-04T18:25:13.001234+01:58'}, 'datetime_utc': {'swhtype': 'datetime', 'd': '2015-03-04T18:25:13.001234+00:00'}, 'datetime_delta': {'swhtype': 'timedelta', 'd': {'days': 64, 'seconds': 0, 'microseconds': 0}}, 'arrow_date': {'swhtype': 'arrow', 'd': '2018-04-25T16:17:53.533672+00:00'}, 'swhtype': 'fake', 'swh_dict': {'swhtype': 42, 'd': 'test'}, 'random_dict': {'swhtype': 43}, 'uuid': {'swhtype': 'uuid', 'd': 'cdd8f804-9db6-40c3-93ab-5955d3836234'}, } self.generator = (i for i in range(5)) self.gen_lst = list(range(5)) def test_round_trip_json(self): data = json.dumps(self.data, cls=SWHJSONEncoder) self.assertEqual(self.data, json.loads(data, cls=SWHJSONDecoder)) def test_encode_swh_json(self): data = json.dumps(self.data, cls=SWHJSONEncoder) self.assertEqual(self.encoded_data, json.loads(data)) def test_round_trip_msgpack(self): data = msgpack_dumps(self.data) self.assertEqual(self.data, msgpack_loads(data)) def test_generator_json(self): data = json.dumps(self.generator, cls=SWHJSONEncoder) self.assertEqual(self.gen_lst, json.loads(data, cls=SWHJSONDecoder)) def test_generator_msgpack(self): data = msgpack_dumps(self.generator) self.assertEqual(self.gen_lst, msgpack_loads(data)) diff --git a/version.txt b/version.txt index 2cde1ec..d871e33 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.50-0-g24f9d5e \ No newline at end of file +v0.0.51-0-gcc65e0f \ No newline at end of file