diff --git a/swh/core/cli/db.py b/swh/core/cli/db.py index b3f8482..51c3d81 100755 --- a/swh/core/cli/db.py +++ b/swh/core/cli/db.py @@ -1,209 +1,244 @@ #!/usr/bin/env python3 # Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging from os import environ, path -from typing import Tuple +from typing import Collection, Tuple import warnings import click from swh.core.cli import CONTEXT_SETTINGS +from swh.core.cli import swh as swh_cli_group warnings.filterwarnings("ignore") # noqa prevent psycopg from telling us sh*t logger = logging.getLogger(__name__) -@click.group(name="db", context_settings=CONTEXT_SETTINGS) +@swh_cli_group.group(name="db", context_settings=CONTEXT_SETTINGS) @click.option( "--config-file", "-C", default=None, type=click.Path(exists=True, dir_okay=False), help="Configuration file.", ) @click.pass_context def db(ctx, config_file): - """Software Heritage database generic tools. - """ + """Software Heritage database generic tools.""" from swh.core.config import read as config_read ctx.ensure_object(dict) if config_file is None: config_file = environ.get("SWH_CONFIG_FILENAME") cfg = config_read(config_file) ctx.obj["config"] = cfg -@db.command(name="init", context_settings=CONTEXT_SETTINGS) -@click.pass_context -def init(ctx): - """Initialize the database for every Software Heritage module found in the - configuration file. For every configuration section in the config file - that: +@db.command(name="create", context_settings=CONTEXT_SETTINGS) +@click.argument("module", required=True) +@click.option( + "--db-name", + "-d", + help="Database name.", + default="softwareheritage-dev", + show_default=True, +) +@click.option( + "--template", + "-T", + help="Template database from which to build this database.", + default="template1", + show_default=True, +) +def db_create(module, db_name, template): + """Create a database for the Software Heritage . - 1. has the name of an existing swh package, - 2. has credentials for a local db access, + and potentially execute superuser-level initialization steps. - it will run the initialization scripts from the swh package against the - given database. + Example: + + swh db create -d swh-test storage - Example for the config file:: + If you want to specify non-default postgresql connection parameters, please + provide them using standard environment variables or by the mean of a + properly crafted libpq connection URI. See psql(1) man page (section + ENVIRONMENTS) for details. - \b - storage: - cls: local - args: - db: postgresql:///?service=swh-storage - objstorage: - cls: remote - args: - url: http://swh-objstorage:5003/ + Note: this command requires a postgresql connection with superuser permissions. - the command: + Example: - swh db -C /path/to/config.yml init + PGPORT=5434 swh db create indexer + swh db create -d postgresql://superuser:passwd@pghost:5433/swh-storage storage - will initialize the database for the `storage` section using initialization - scripts from the `swh.storage` package. """ - for modname, cfg in ctx.obj["config"].items(): - if cfg.get("cls") == "local" and cfg.get("args", {}).get("db"): - try: - initialized, dbversion = populate_database_for_package( - modname, cfg["args"]["db"] - ) - except click.BadParameter: - logger.info( - "Failed to load/find sql initialization files for %s", modname - ) - - click.secho( - "DONE database for {} {} at version {}".format( - modname, "initialized" if initialized else "exists", dbversion - ), - fg="green", - bold=True, - ) + logger.debug("db_create %s dn_name=%s", module, db_name) + create_database_for_package(module, db_name, template) -@click.command(context_settings=CONTEXT_SETTINGS) + +@db.command(name="init", context_settings=CONTEXT_SETTINGS) @click.argument("module", required=True) @click.option( "--db-name", "-d", help="Database name.", default="softwareheritage-dev", show_default=True, ) -@click.option( - "--create-db/--no-create-db", - "-C", - help="Attempt to create the database.", - default=False, -) -def db_init(module, db_name, create_db): - """Initialize a database for the Software Heritage . By - default, does not attempt to create the database. +def db_init(module, db_name): + """Initialize a database for the Software Heritage . Example: - swh db-init -d swh-test storage + swh db init -d swh-test storage If you want to specify non-default postgresql connection parameters, please provide them using standard environment variables. See psql(1) man page (section ENVIRONMENTS) for details. Example: PGPORT=5434 swh db-init indexer + swh db init -d postgresql://user:passwd@pghost:5433/swh-storage storage """ logger.debug("db_init %s dn_name=%s", module, db_name) - if create_db: - from swh.core.db.tests.db_testing import pg_createdb - - # Create the db (or fail silently if already existing) - pg_createdb(db_name, check=False) - initialized, dbversion = populate_database_for_package(module, db_name) # TODO: Ideally migrate the version from db_version to the latest # db version click.secho( "DONE database for {} {} at version {}".format( module, "initialized" if initialized else "exists", dbversion ), fg="green", bold=True, ) def get_sql_for_package(modname): import glob from importlib import import_module from swh.core.utils import numfile_sortkey as sortkey if not modname.startswith("swh."): modname = "swh.{}".format(modname) try: m = import_module(modname) except ImportError: raise click.BadParameter("Unable to load module {}".format(modname)) sqldir = path.join(path.dirname(m.__file__), "sql") if not path.isdir(sqldir): raise click.BadParameter( "Module {} does not provide a db schema " "(no sql/ dir)".format(modname) ) - return list(sorted(glob.glob(path.join(sqldir, "*.sql")), key=sortkey)) + return sorted(glob.glob(path.join(sqldir, "*.sql")), key=sortkey) def populate_database_for_package(modname: str, conninfo: str) -> Tuple[bool, int]: """Populate the database, pointed at with `conninfo`, using the SQL files found in the package `modname`. Args: modname: Name of the module of which we're loading the files conninfo: connection info string for the SQL database Returns: Tuple with two elements: whether the database has been initialized; the current version of the database. """ - import subprocess - from swh.core.db.db_utils import swh_db_version current_version = swh_db_version(conninfo) if current_version is not None: return False, current_version sqlfiles = get_sql_for_package(modname) + sqlfiles = [fname for fname in sqlfiles if "-superuser-" not in fname] + execute_sqlfiles(sqlfiles, conninfo) + + current_version = swh_db_version(conninfo) + assert current_version is not None + return True, current_version + + +def create_database_for_package( + modname: str, conninfo: str, template: str = "template1" +): + """Create the database pointed at with `conninfo`, and initialize it using + -superuser- SQL files found in the package `modname`. + + Args: + modname: Name of the module of which we're loading the files + conninfo: connection info string for the SQL database + template: the name of the database to connect to and use as template to create + the new database + + """ + import subprocess + + from psycopg2.extensions import make_dsn, parse_dsn + + # Use the given conninfo but with dbname replaced by the template dbname + # for the database creation step + creation_dsn = parse_dsn(conninfo) + db_name = creation_dsn["dbname"] + creation_dsn["dbname"] = template + logger.debug("db_create db_name=%s (from %s)", db_name, template) + subprocess.check_call( + [ + "psql", + "--quiet", + "--no-psqlrc", + "-v", + "ON_ERROR_STOP=1", + "-d", + make_dsn(**creation_dsn), + "-c", + f"CREATE DATABASE {db_name}", + ] + ) + + # the remaining initialization process -- running -superuser- SQL files -- + # is done using the given conninfo, thus connecting to the newly created + # database + sqlfiles = get_sql_for_package(modname) + sqlfiles = [fname for fname in sqlfiles if "-superuser-" in fname] + execute_sqlfiles(sqlfiles, conninfo) + + +def execute_sqlfiles(sqlfiles: Collection[str], conninfo: str): + """Execute a list of SQL files on the database pointed at with `conninfo`. + + Args: + sqlfiles: List of SQL files to execute + conninfo: connection info string for the SQL database + """ + import subprocess for sqlfile in sqlfiles: + logger.debug(f"execute SQL file {sqlfile} db_name={conninfo}") subprocess.check_call( [ "psql", "--quiet", "--no-psqlrc", "-v", "ON_ERROR_STOP=1", "-d", conninfo, "-f", sqlfile, ] ) - - current_version = swh_db_version(conninfo) - assert current_version is not None - return True, current_version diff --git a/swh/core/db/tests/test_cli.py b/swh/core/db/tests/test_cli.py index 87b40eb..7c8520d 100644 --- a/swh/core/db/tests/test_cli.py +++ b/swh/core/db/tests/test_cli.py @@ -1,56 +1,57 @@ # from click.testing import CliRunner from swh.core.cli.db import db as swhdb help_msg = """Usage: swh [OPTIONS] COMMAND [ARGS]... Command line interface for Software Heritage. Options: -l, --log-level [NOTSET|DEBUG|INFO|WARNING|ERROR|CRITICAL] Log level (defaults to INFO). --log-config FILENAME Python yaml logging configuration file. --sentry-dsn TEXT DSN of the Sentry instance to report to -h, --help Show this message and exit. Notes: If both options are present, --log-level will override the root logger configuration set in --log-config. The --log-config YAML must conform to the logging.config.dictConfig schema documented at https://docs.python.org/3/library/logging.config.html. Commands: db Software Heritage database generic tools. """ def test_swh_help(swhmain): swhmain.add_command(swhdb) runner = CliRunner() result = runner.invoke(swhmain, ["-h"]) assert result.exit_code == 0 assert result.output == help_msg help_db_msg = """Usage: swh db [OPTIONS] COMMAND [ARGS]... Software Heritage database generic tools. Options: -C, --config-file FILE Configuration file. -h, --help Show this message and exit. Commands: - init Initialize the database for every Software Heritage module found in... + create Create a database for the Software Heritage . + init Initialize a database for the Software Heritage . """ def test_swh_db_help(swhmain): swhmain.add_command(swhdb) runner = CliRunner() result = runner.invoke(swhmain, ["db", "-h"]) assert result.exit_code == 0 assert result.output == help_db_msg