diff --git a/MANIFEST.in b/MANIFEST.in --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,3 +2,4 @@ include requirements.txt include requirements-swh.txt include version.txt +recursive-include swh/core/sql *.sql diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -51,6 +51,9 @@ extras_require={'testing': parse_requirements('test')}, vcversioner={}, include_package_data=True, + entry_points={ + 'console_scripts': ['swh-db-init=swh.core.cli:db_init'], + }, classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", diff --git a/swh/core/cli.py b/swh/core/cli.py new file mode 100755 --- /dev/null +++ b/swh/core/cli.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import warnings +warnings.filterwarnings("ignore") # noqa prevent psycopg from telling us sh*t + +from os import path +import glob + +import click +from importlib import import_module + +from swh.core.utils import numfile_sortkey as sortkey +from swh.core.tests.db_testing import pg_createdb, pg_restore, DB_DUMP_TYPES + + +@click.command() +@click.argument('module', nargs=-1, required=True) +@click.option('--db-name', '-d', help='Database name.', + default='softwareheritage-dev', show_default=True) +@click.option('--no-create', '-C', + help='Do not attempt to create the database', default=False) +def db_init(module, db_name=None, no_create=None): + """Create and initialise a database for the Software Heritage . + + Example: + + swh-db-init storage -d swh-test + + If you want to specify non-default postgresql connection parameters, + please provide them using standard environment variables. + See psql(1) man page (section ENVIRONMENTS) for details. + + Example: + + PGPORT=5434 swh-db-init indexer -d swh-indexer + + """ + + dump_files = [] + + for modname in module: + if not modname.startswith('swh.'): + modname = 'swh.{}'.format(modname) + try: + m = import_module(modname) + except ImportError: + raise click.BadParameter( + 'Unable to load module {}'.format(modname)) + + sqldir = path.join(path.dirname(m.__file__), 'sql') + if not path.isdir(sqldir): + raise click.BadParameter( + 'Module {} does not provide a db schema ' + '(no sql/ dir)'.format(modname)) + dump_files.extend(sorted(glob.glob(path.join(sqldir, '*.sql')), + key=sortkey)) + if not no_create: + pg_createdb(db_name) + + dump_files = [(x, DB_DUMP_TYPES[path.splitext(x)[1]]) + for x in dump_files] + for dump, dtype in dump_files: + click.secho('Loading {}'.format(dump), fg='yellow') + pg_restore(db_name, dump, dtype) + + click.secho('DONE database is {}'.format(db_name), fg='green', bold=True) diff --git a/swh/core/logger.py b/swh/core/logger.py --- a/swh/core/logger.py +++ b/swh/core/logger.py @@ -83,7 +83,7 @@ class PostgresHandler(logging.Handler): """log handler that store messages in a Postgres DB - See swh-core/sql/log-schema.sql for the DB schema. + See swh-core/swh/core/sql/log-schema.sql for the DB schema. All logging methods can be used as usual. Additionally, arbitrary metadata can be passed to logging methods, requesting that they will be stored in diff --git a/sql/log-schema.sql b/swh/core/sql/log-schema.sql rename from sql/log-schema.sql rename to swh/core/sql/log-schema.sql diff --git a/swh/core/tests/__init__.py b/swh/core/tests/__init__.py --- a/swh/core/tests/__init__.py +++ b/swh/core/tests/__init__.py @@ -0,0 +1,5 @@ +from os import path +import swh.indexer + + +SQL_DIR = path.join(path.dirname(swh.core.__file__), 'sql') diff --git a/swh/core/tests/db_testing.py b/swh/core/tests/db_testing.py --- a/swh/core/tests/db_testing.py +++ b/swh/core/tests/db_testing.py @@ -4,9 +4,14 @@ # See top-level LICENSE file for more information import os +import glob import psycopg2 import subprocess +from swh.core.utils import numfile_sortkey as sortkey + +DB_DUMP_TYPES = {'.sql': 'psql', '.dump': 'pg_dump'} + def pg_restore(dbname, dumpfile, dumptype='pg_dump'): """ @@ -40,8 +45,10 @@ subprocess.check_call(['createdb', dbname]) -def db_create(dbname, dump=None, dumptype='pg_dump'): - """create the test DB and load the test data dump into it +def db_create(dbname, dumps=None): + """create the test DB and load the test data dumps into it + + dumps is an iterable of couples (dump_file, dump_type). context: setUpClass @@ -51,8 +58,8 @@ except subprocess.CalledProcessError: # try recovering once, in case pg_dropdb(dbname) # the db already existed pg_createdb(dbname) - if dump: - pg_restore(dbname, dump, dumptype) + for dump, dtype in dumps: + pg_restore(dbname, dump, dtype) return dbname @@ -104,16 +111,13 @@ class DbTestContext: - def __init__(self, name='softwareheritage-test', dump=None, - dump_type='pg_dump'): + def __init__(self, name='softwareheritage-test', dumps=None): self.dbname = name - self.dump = dump - self.dump_type = dump_type + self.dumps = dumps def __enter__(self): db_create(dbname=self.dbname, - dump=self.dump, - dumptype=self.dump_type) + dumps=self.dumps) return self def __exit__(self, *_): @@ -174,14 +178,13 @@ DB_TEST_FIXTURE_IMPORTED = True @classmethod - def add_db(cls, name='softwareheritage-test', dump=None, - dump_type='pg_dump'): - cls._DB_DUMP_LIST[name] = (dump, dump_type) + def add_db(cls, name='softwareheritage-test', dumps=None): + cls._DB_DUMP_LIST[name] = dumps @classmethod def setUpClass(cls): - for name, (dump, dump_type) in cls._DB_DUMP_LIST.items(): - cls._DB_LIST[name] = DbTestContext(name, dump, dump_type) + for name, dumps in cls._DB_DUMP_LIST.items(): + cls._DB_LIST[name] = DbTestContext(name, dumps) cls._DB_LIST[name].__enter__() super().setUpClass() @@ -232,9 +235,13 @@ The class can override the following class attributes: TEST_DB_NAME: name of the DB used for testing TEST_DB_DUMP: DB dump to be restored before running test methods; can - be set to None if no restore from dump is required - TEST_DB_DUMP_TYPE: one of 'pg_dump' (binary dump) or 'psql' (SQL dump); - if unset, will be guessed from the TEST_DB_DUMP file name + be set to None if no restore from dump is required. + If the dump file name endswith" + - '.sql' it will be loaded via psql, + - '.dump' it will be loaded via pg_restore. + Other file extensions will be ignored. + Can be a string or a list of strings; each path will be expanded + using glob pattern matching. The test case class will then have the following attributes, accessible via self: @@ -246,18 +253,24 @@ TEST_DB_NAME = 'softwareheritage-test' TEST_DB_DUMP = None - TEST_DB_DUMP_TYPE = None - DB_DUMP_TYPES = {'.sql': 'psql', '.dump': 'pg_dump'} @classmethod def setUpClass(cls): - cls.dbname = cls.TEST_DB_NAME - dump_type = (cls.TEST_DB_DUMP_TYPE or - cls.DB_DUMP_TYPES[os.path.splitext(cls.TEST_DB_DUMP)[-1]]) + cls.dbname = cls.TEST_DB_NAME # XXX to kill? + + dump_files = cls.TEST_DB_DUMP + if isinstance(dump_files, str): + dump_files = [dump_files] + all_dump_files = [] + for files in dump_files: + all_dump_files.extend( + sorted(glob.glob(files), key=sortkey)) + + all_dump_files = [(x, DB_DUMP_TYPES[os.path.splitext(x)[1]]) + for x in all_dump_files] cls.add_db(name=cls.TEST_DB_NAME, - dump=cls.TEST_DB_DUMP, - dump_type=dump_type) + dumps=all_dump_files) super().setUpClass() def setUp(self): diff --git a/swh/core/tests/test_logger.py b/swh/core/tests/test_logger.py --- a/swh/core/tests/test_logger.py +++ b/swh/core/tests/test_logger.py @@ -12,15 +12,13 @@ from swh.core.logger import PostgresHandler from swh.core.tests.db_testing import SingleDbTestFixture -TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -SQL_DIR = os.path.join(TEST_DIR, '../../../sql') +from swh.core.tests import SQL_DIR @attr('db') class PgLogHandler(SingleDbTestFixture, unittest.TestCase): TEST_DB_DUMP = os.path.join(SQL_DIR, 'log-schema.sql') - TEST_DB_DUMP_TYPE = 'psql' def setUp(self): super().setUp() diff --git a/swh/core/utils.py b/swh/core/utils.py --- a/swh/core/utils.py +++ b/swh/core/utils.py @@ -6,6 +6,7 @@ import os import itertools import codecs +import re from contextlib import contextmanager @@ -101,3 +102,16 @@ """ return path1.split(path0)[1] + + +def numfile_sortkey(fname): + """Simple function to sort filenames of the form: + + nnxxx.ext + + where nn is a number according to the numbers. + + Typically used to sort sql/nn-swh-xxx.sql files. + """ + num, rem = re.match(r'(\d*)(.*)', fname).groups() + return (num and int(num) or 99, rem)