diff --git a/PKG-INFO b/PKG-INFO index 637739a..7e35bae 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,88 +1,88 @@ Metadata-Version: 2.1 Name: swh.core -Version: 0.0.66 +Version: 0.0.67 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN -Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest +Project-URL: Funding, https://www.softwareheritage.org/donate Description: swh-core ======== core library for swh's modules: - config parser - hash computations - serialization - logging mechanism - database connection - http-based RPC client/server Development ----------- We strongly recommend you to use a [virtualenv][1] if you want to run tests or hack the code. To set up your development environment: ``` (swh) user@host:~/swh-environment/swh-core$ pip install -e .[testing] ``` This will install every Python package needed to run this package's tests. Unit tests can be executed using [pytest][2] or [tox][3]. ``` (swh) user@host:~/swh-environment/swh-core$ pytest ============================== test session starts ============================== platform linux -- Python 3.7.3, pytest-3.10.1, py-1.8.0, pluggy-0.12.0 hypothesis profile 'default' -> database=DirectoryBasedExampleDatabase('/home/ddouard/src/swh-environment/swh-core/.hypothesis/examples') rootdir: /home/ddouard/src/swh-environment/swh-core, inifile: pytest.ini plugins: requests-mock-1.6.0, hypothesis-4.26.4, celery-4.3.0, postgresql-1.4.1 collected 89 items swh/core/api/tests/test_api.py .. [ 2%] swh/core/api/tests/test_async.py .... [ 6%] swh/core/api/tests/test_serializers.py ..... [ 12%] swh/core/db/tests/test_db.py .... [ 16%] swh/core/tests/test_cli.py ...... [ 23%] swh/core/tests/test_config.py .............. [ 39%] swh/core/tests/test_statsd.py ........................................... [ 87%] .... [ 92%] swh/core/tests/test_utils.py ....... [100%] ===================== 89 passed, 9 warnings in 6.94 seconds ===================== ``` Note: this git repository uses [pre-commit][4] hooks to ensure better and more consistent code. It should already be installed in your virtualenv (if not, just type `pip install pre-commit`). Make sure to activate it in your local copy of the git repository: ``` (swh) user@host:~/swh-environment/swh-core$ pre-commit install pre-commit installed at .git/hooks/pre-commit ``` Please read the [developer setup manual][5] for more information on how to hack on Software Heritage. [1]: https://virtualenv.pypa.io [2]: https://docs.pytest.org [3]: https://tox.readthedocs.io [4]: https://pre-commit.com [5]: https://docs.softwareheritage.org/devel/developer-setup.html Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown +Provides-Extra: http Provides-Extra: db Provides-Extra: testing -Provides-Extra: http diff --git a/swh.core.egg-info/PKG-INFO b/swh.core.egg-info/PKG-INFO index 637739a..7e35bae 100644 --- a/swh.core.egg-info/PKG-INFO +++ b/swh.core.egg-info/PKG-INFO @@ -1,88 +1,88 @@ Metadata-Version: 2.1 Name: swh.core -Version: 0.0.66 +Version: 0.0.67 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN -Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest +Project-URL: Funding, https://www.softwareheritage.org/donate Description: swh-core ======== core library for swh's modules: - config parser - hash computations - serialization - logging mechanism - database connection - http-based RPC client/server Development ----------- We strongly recommend you to use a [virtualenv][1] if you want to run tests or hack the code. To set up your development environment: ``` (swh) user@host:~/swh-environment/swh-core$ pip install -e .[testing] ``` This will install every Python package needed to run this package's tests. Unit tests can be executed using [pytest][2] or [tox][3]. ``` (swh) user@host:~/swh-environment/swh-core$ pytest ============================== test session starts ============================== platform linux -- Python 3.7.3, pytest-3.10.1, py-1.8.0, pluggy-0.12.0 hypothesis profile 'default' -> database=DirectoryBasedExampleDatabase('/home/ddouard/src/swh-environment/swh-core/.hypothesis/examples') rootdir: /home/ddouard/src/swh-environment/swh-core, inifile: pytest.ini plugins: requests-mock-1.6.0, hypothesis-4.26.4, celery-4.3.0, postgresql-1.4.1 collected 89 items swh/core/api/tests/test_api.py .. [ 2%] swh/core/api/tests/test_async.py .... [ 6%] swh/core/api/tests/test_serializers.py ..... [ 12%] swh/core/db/tests/test_db.py .... [ 16%] swh/core/tests/test_cli.py ...... [ 23%] swh/core/tests/test_config.py .............. [ 39%] swh/core/tests/test_statsd.py ........................................... [ 87%] .... [ 92%] swh/core/tests/test_utils.py ....... [100%] ===================== 89 passed, 9 warnings in 6.94 seconds ===================== ``` Note: this git repository uses [pre-commit][4] hooks to ensure better and more consistent code. It should already be installed in your virtualenv (if not, just type `pip install pre-commit`). Make sure to activate it in your local copy of the git repository: ``` (swh) user@host:~/swh-environment/swh-core$ pre-commit install pre-commit installed at .git/hooks/pre-commit ``` Please read the [developer setup manual][5] for more information on how to hack on Software Heritage. [1]: https://virtualenv.pypa.io [2]: https://docs.pytest.org [3]: https://tox.readthedocs.io [4]: https://pre-commit.com [5]: https://docs.softwareheritage.org/devel/developer-setup.html Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown +Provides-Extra: http Provides-Extra: db Provides-Extra: testing -Provides-Extra: http diff --git a/swh/core/api/tests/server_testing.py b/swh/core/api/tests/server_testing.py index 9a708a4..f007d95 100644 --- a/swh/core/api/tests/server_testing.py +++ b/swh/core/api/tests/server_testing.py @@ -1,149 +1,144 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import abc import multiprocessing import os -import socket import time from urllib.request import urlopen import aiohttp +import aiohttp.test_utils class ServerTestFixtureBaseClass(metaclass=abc.ABCMeta): """Base class for http client/server testing implementations. Override this class to implement the following methods: - process_config: to do something needed for the server configuration (e.g propagate the configuration to other part) - define_worker_function: define the function that will actually run the server. To ensure test isolation, each test will run in a different server and a different folder. In order to correctly work, the subclass must call the parents class's setUp() and tearDown() methods. """ def setUp(self): super().setUp() self.start_server() def tearDown(self): self.stop_server() super().tearDown() def url(self): return 'http://127.0.0.1:%d/' % self.port def process_config(self): """Process the server's configuration. Do something useful for example, pass along the self.config dictionary inside the self.app. By default, do nothing. """ pass @abc.abstractmethod def define_worker_function(self, app, port): """Define how the actual implementation server will run. """ pass def start_server(self): """ Spawn the API server using multiprocessing. """ self.process = None self.process_config() - - # Get an available port number - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.bind(('127.0.0.1', 0)) - self.port = sock.getsockname()[1] - sock.close() + self.port = aiohttp.test_utils.unused_port() worker_fn = self.define_worker_function() self.process = multiprocessing.Process( target=worker_fn, args=(self.app, self.port) ) self.process.start() # Wait max 5 seconds for server to spawn i = 0 while i < 500: try: urlopen(self.url()) except Exception: i += 1 time.sleep(0.01) else: return def stop_server(self): """ Terminate the API server's process. """ if self.process: self.process.terminate() class ServerTestFixture(ServerTestFixtureBaseClass): """Base class for http client/server testing (e.g flask). Mix this in a test class in order to have access to an http server running in background. Note that the subclass should define a dictionary in self.config that contains the server config. And an application in self.app that corresponds to the type of server the tested client needs. To ensure test isolation, each test will run in a different server and a different folder. In order to correctly work, the subclass must call the parents class's setUp() and tearDown() methods. """ def process_config(self): # WSGI app configuration for key, value in self.config.items(): self.app.config[key] = value def define_worker_function(self): def worker(app, port): # Make Flask 1.0 stop printing its server banner os.environ['WERKZEUG_RUN_MAIN'] = 'true' return app.run(port=port, use_reloader=False) return worker class ServerTestFixtureAsync(ServerTestFixtureBaseClass): """Base class for http client/server async testing (e.g aiohttp). Mix this in a test class in order to have access to an http server running in background. Note that the subclass should define an application in self.app that corresponds to the type of server the tested client needs. To ensure test isolation, each test will run in a different server and a different folder. In order to correctly work, the subclass must call the parents class's setUp() and tearDown() methods. """ def define_worker_function(self): def worker(app, port): return aiohttp.web.run_app(app, port=int(port), print=lambda *_: None) return worker diff --git a/swh/core/db/__init__.py b/swh/core/db/__init__.py index 455f56d..9f46544 100644 --- a/swh/core/db/__init__.py +++ b/swh/core/db/__init__.py @@ -1,193 +1,217 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import binascii import datetime import enum import json +import logging import os +import sys import threading from contextlib import contextmanager import psycopg2 import psycopg2.extras +logger = logging.getLogger(__name__) + + psycopg2.extras.register_uuid() def escape(data): if data is None: return '' if isinstance(data, bytes): return '\\x%s' % binascii.hexlify(data).decode('ascii') elif isinstance(data, str): return '"%s"' % data.replace('"', '""') elif isinstance(data, datetime.datetime): # We escape twice to make sure the string generated by # isoformat gets escaped return escape(data.isoformat()) elif isinstance(data, dict): return escape(json.dumps(data)) elif isinstance(data, list): return escape("{%s}" % ','.join(escape(d) for d in data)) elif isinstance(data, psycopg2.extras.Range): # We escape twice here too, so that we make sure # everything gets passed to copy properly return escape( '%s%s,%s%s' % ( '[' if data.lower_inc else '(', '-infinity' if data.lower_inf else escape(data.lower), 'infinity' if data.upper_inf else escape(data.upper), ']' if data.upper_inc else ')', ) ) elif isinstance(data, enum.IntEnum): return escape(int(data)) else: # We don't escape here to make sure we pass literals properly return str(data) def typecast_bytea(value, cur): if value is not None: data = psycopg2.BINARY(value, cur) return data.tobytes() class BaseDb: """Base class for swh.*.*Db. cf. swh.storage.db.Db, swh.archiver.db.ArchiverDb """ @classmethod def adapt_conn(cls, conn): """Makes psycopg2 use 'bytes' to decode bytea instead of 'memoryview', for this connection.""" cur = conn.cursor() cur.execute("SELECT null::bytea, null::bytea[]") bytea_oid = cur.description[0][1] bytea_array_oid = cur.description[1][1] t_bytes = psycopg2.extensions.new_type( (bytea_oid,), "bytea", typecast_bytea) psycopg2.extensions.register_type(t_bytes, conn) t_bytes_array = psycopg2.extensions.new_array_type( (bytea_array_oid,), "bytea[]", t_bytes) psycopg2.extensions.register_type(t_bytes_array, conn) @classmethod def connect(cls, *args, **kwargs): """factory method to create a DB proxy Accepts all arguments of psycopg2.connect; only some specific possibilities are reported below. Args: connstring: libpq2 connection string """ conn = psycopg2.connect(*args, **kwargs) return cls(conn) @classmethod def from_pool(cls, pool): conn = pool.getconn() return cls(conn, pool=pool) def __init__(self, conn, pool=None): """create a DB proxy Args: conn: psycopg2 connection to the SWH DB pool: psycopg2 pool of connections """ self.adapt_conn(conn) self.conn = conn self.pool = pool def put_conn(self): if self.pool: self.pool.putconn(self.conn) def cursor(self, cur_arg=None): """get a cursor: from cur_arg if given, or a fresh one otherwise meant to avoid boilerplate if/then/else in methods that proxy stored procedures """ if cur_arg is not None: return cur_arg else: return self.conn.cursor() _cursor = cursor # for bw compat @contextmanager def transaction(self): """context manager to execute within a DB transaction Yields: a psycopg2 cursor """ with self.conn.cursor() as cur: try: yield cur self.conn.commit() except Exception: if not self.conn.closed: self.conn.rollback() raise def copy_to(self, items, tblname, columns, cur=None, item_cb=None, default_values={}): """Copy items' entries to table tblname with columns information. Args: items (List[dict]): dictionaries of data to copy over tblname. tblname (str): destination table's name. columns ([str]): keys to access data in items and also the column names in the destination table. default_values (dict): dictionary of default values to use when inserting entried int the tblname table. cur: a db cursor; if not given, a new cursor will be created. item_cb (fn): optional function to apply to items's entry. """ read_file, write_file = os.pipe() + exc_info = None def writer(): + nonlocal exc_info cursor = self.cursor(cur) with open(read_file, 'r') as f: - cursor.copy_expert('COPY %s (%s) FROM STDIN CSV' % ( - tblname, ', '.join(columns)), f) + try: + cursor.copy_expert('COPY %s (%s) FROM STDIN CSV' % ( + tblname, ', '.join(columns)), f) + except Exception: + # Tell the main thread about the exception + exc_info = sys.exc_info() write_thread = threading.Thread(target=writer) write_thread.start() try: with open(write_file, 'w') as f: for d in items: if item_cb is not None: item_cb(d) - line = [escape(d.get(k, default_values.get(k))) - for k in columns] + line = [] + for k in columns: + try: + value = d.get(k, default_values.get(k)) + line.append(escape(value)) + except Exception as e: + logger.error( + 'Could not escape value `%r` for column `%s`:' + 'Received exception: `%s`', + value, k, e + ) + raise e from None f.write(','.join(line)) f.write('\n') finally: # No problem bubbling up exceptions, but we still need to make sure # we finish copying, even though we're probably going to cancel the # transaction. write_thread.join() + if exc_info: + # postgresql returned an error, let's raise it. + raise exc_info[1].with_traceback(exc_info[2]) def mktemp(self, tblname, cur=None): self.cursor(cur).execute('SELECT swh_mktemp(%s)', (tblname,)) diff --git a/swh/core/db/tests/test_db.py b/swh/core/db/tests/test_db.py index 355a384..e599ed3 100644 --- a/swh/core/db/tests/test_db.py +++ b/swh/core/db/tests/test_db.py @@ -1,102 +1,110 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os.path import tempfile import unittest from hypothesis import strategies, given +import psycopg2 import pytest from swh.core.db import BaseDb from .db_testing import ( SingleDbTestFixture, db_create, db_destroy, db_close, ) INIT_SQL = ''' create table test_table ( i int, txt text, bytes bytea ); ''' db_rows = strategies.lists(strategies.tuples( strategies.integers(-2147483648, +2147483647), strategies.text( alphabet=strategies.characters( blacklist_categories=['Cs'], # surrogates blacklist_characters=[ '\x00', # pgsql does not support the null codepoint '\r', # pgsql normalizes those ] ), ), strategies.binary(), )) @pytest.mark.db def test_connect(): db_name = db_create('test-db2', dumps=[]) try: db = BaseDb.connect('dbname=%s' % db_name) with db.cursor() as cur: cur.execute(INIT_SQL) cur.execute("insert into test_table values (1, %s, %s);", ('foo', b'bar')) cur.execute("select * from test_table;") assert list(cur) == [(1, 'foo', b'bar')] finally: db_close(db.conn) db_destroy(db_name) @pytest.mark.db class TestDb(SingleDbTestFixture, unittest.TestCase): TEST_DB_NAME = 'test-db' @classmethod def setUpClass(cls): with tempfile.TemporaryDirectory() as td: with open(os.path.join(td, 'init.sql'), 'a') as fd: fd.write(INIT_SQL) cls.TEST_DB_DUMP = os.path.join(td, '*.sql') super().setUpClass() def setUp(self): super().setUp() self.db = BaseDb(self.conn) def test_initialized(self): cur = self.db.cursor() cur.execute("insert into test_table values (1, %s, %s);", ('foo', b'bar')) cur.execute("select * from test_table;") self.assertEqual(list(cur), [(1, 'foo', b'bar')]) def test_reset_tables(self): cur = self.db.cursor() cur.execute("insert into test_table values (1, %s, %s);", ('foo', b'bar')) self.reset_db_tables('test-db') cur.execute("select * from test_table;") self.assertEqual(list(cur), []) @given(db_rows) def test_copy_to(self, data): # the table is not reset between runs by hypothesis self.reset_db_tables('test-db') items = [dict(zip(['i', 'txt', 'bytes'], item)) for item in data] self.db.copy_to(items, 'test_table', ['i', 'txt', 'bytes']) cur = self.db.cursor() cur.execute('select * from test_table;') self.assertCountEqual(list(cur), data) + + def test_copy_to_thread_exception(self): + data = [(2**65, 'foo', b'bar')] + + items = [dict(zip(['i', 'txt', 'bytes'], item)) for item in data] + with self.assertRaises(psycopg2.errors.NumericValueOutOfRange): + self.db.copy_to(items, 'test_table', ['i', 'txt', 'bytes']) diff --git a/version.txt b/version.txt index 9396342..16db69b 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.66-0-ge3a59f4 \ No newline at end of file +v0.0.67-0-g9c720a1 \ No newline at end of file