diff --git a/swh/core/cli/db.py b/swh/core/cli/db.py index 71de8ae..b0dee82 100755 --- a/swh/core/cli/db.py +++ b/swh/core/cli/db.py @@ -1,423 +1,421 @@ #!/usr/bin/env python3 # Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging from os import environ import warnings import click from swh.core.cli import CONTEXT_SETTINGS from swh.core.cli import swh as swh_cli_group warnings.filterwarnings("ignore") # noqa prevent psycopg from telling us sh*t logger = logging.getLogger(__name__) @swh_cli_group.group(name="db", context_settings=CONTEXT_SETTINGS) @click.option( "--config-file", "-C", default=None, type=click.Path(exists=True, dir_okay=False), help="Configuration file.", ) @click.pass_context def db(ctx, config_file): """Software Heritage database generic tools.""" from swh.core.config import read as config_read ctx.ensure_object(dict) if config_file is None: config_file = environ.get("SWH_CONFIG_FILENAME") cfg = config_read(config_file) ctx.obj["config"] = cfg @db.command(name="create", context_settings=CONTEXT_SETTINGS) @click.argument("module", required=True) @click.option( "--dbname", "--db-name", "-d", help="Database name.", default="softwareheritage-dev", show_default=True, ) @click.option( "--template", "-T", help="Template database from which to build this database.", default="template1", show_default=True, ) def db_create(module, dbname, template): """Create a database for the Software Heritage . and potentially execute superuser-level initialization steps. Example:: swh db create -d swh-test storage If you want to specify non-default postgresql connection parameters, please provide them using standard environment variables or by the mean of a properly crafted libpq connection URI. See psql(1) man page (section ENVIRONMENTS) for details. Note: this command requires a postgresql connection with superuser permissions. Example:: PGPORT=5434 swh db create indexer swh db create -d postgresql://superuser:passwd@pghost:5433/swh-storage storage """ from swh.core.db.db_utils import create_database_for_package logger.debug("db_create %s dn_name=%s", module, dbname) create_database_for_package(module, dbname, template) @db.command(name="init-admin", context_settings=CONTEXT_SETTINGS) @click.argument("module", required=True) @click.option( "--dbname", "--db-name", "-d", help="Database name.", default="softwareheritage-dev", show_default=True, ) def db_init_admin(module: str, dbname: str) -> None: """Execute superuser-level initialization steps (e.g pg extensions, admin functions, ...) Example:: PGPASSWORD=... swh db init-admin -d swh-test scheduler If you want to specify non-default postgresql connection parameters, please provide them using standard environment variables or by the mean of a properly crafted libpq connection URI. See psql(1) man page (section ENVIRONMENTS) for details. Note: this command requires a postgresql connection with superuser permissions (e.g postgres, swh-admin, ...) Example:: PGPORT=5434 swh db init-admin scheduler swh db init-admin -d postgresql://superuser:passwd@pghost:5433/swh-scheduler \ scheduler """ from swh.core.db.db_utils import init_admin_extensions logger.debug("db_init_admin %s dbname=%s", module, dbname) init_admin_extensions(module, dbname) @db.command(name="init", context_settings=CONTEXT_SETTINGS) @click.argument("module", required=True) @click.option( "--dbname", "--db-name", "-d", help="Database name or connection URI.", default=None, show_default=False, ) @click.option( "--flavor", help="Database flavor.", default=None, ) @click.option( "--initial-version", help="Database initial version.", default=1, show_default=True ) @click.pass_context def db_init(ctx, module, dbname, flavor, initial_version): """Initialize a database for the Software Heritage . The database connection string comes from the configuration file (see option ``--config-file`` in ``swh db --help``) in the section named after the MODULE argument. Example:: $ cat conf.yml storage: cls: postgresql db: postgresql://user:passwd@pghost:5433/swh-storage objstorage: cls: memory $ swh db -C conf.yml init storage # or $ SWH_CONFIG_FILENAME=conf.yml swh db init storage Note that the connection string can also be passed directly using the '--db-name' option, but this usage is about to be deprecated. """ from swh.core.db.db_utils import ( get_database_info, import_swhmodule, populate_database_for_package, swh_set_db_version, ) cfg = None if dbname is None: # use the db cnx from the config file; the expected config entry is the # given module name cfg = ctx.obj["config"].get(module, {}) dbname = get_dburl_from_config(cfg) if not dbname: raise click.BadParameter( "Missing the postgresql connection configuration. Either fix your " "configuration file or use the --dbname option." ) logger.debug("db_init %s flavor=%s dbname=%s", module, flavor, dbname) initialized, dbversion, dbflavor = populate_database_for_package( module, dbname, flavor ) if dbversion is None: if cfg is not None: # db version has not been populated by sql init scripts (new style), # let's do it; instantiate the data source to retrieve the current # (expected) db version datastore_factory = getattr(import_swhmodule(module), "get_datastore", None) if datastore_factory: datastore = datastore_factory(**cfg) - try: - get_current_version = datastore.get_current_version - except AttributeError: + if not hasattr(datastore, "current_version"): logger.warning( - "Datastore %s does not implement the " - "'get_current_version()' method", + "Datastore %s does not declare the " + "'current_version' attribute", datastore, ) else: - code_version = get_current_version() + code_version = datastore.current_version logger.info( "Initializing database version to %s from the %s datastore", code_version, module, ) swh_set_db_version(dbname, code_version, desc="DB initialization") dbversion = get_database_info(dbname)[1] if dbversion is None: logger.info( "Initializing database version to %s " "from the command line option --initial-version", initial_version, ) swh_set_db_version(dbname, initial_version, desc="DB initialization") dbversion = get_database_info(dbname)[1] assert dbversion is not None # TODO: Ideally migrate the version from db_version to the latest # db version click.secho( "DONE database for {} {}{} at version {}".format( module, "initialized" if initialized else "exists", f" (flavor {dbflavor})" if dbflavor is not None else "", dbversion, ), fg="green", bold=True, ) if flavor is not None and dbflavor != flavor: click.secho( f"WARNING requested flavor '{flavor}' != recorded flavor '{dbflavor}'", fg="red", bold=True, ) @db.command(name="version", context_settings=CONTEXT_SETTINGS) @click.argument("module", required=True) @click.option( "--all/--no-all", "show_all", help="Show version history.", default=False, show_default=True, ) @click.pass_context def db_version(ctx, module, show_all): """Print the database version for the Software Heritage. Example:: swh db version -d swh-test """ from swh.core.db.db_utils import get_database_info, import_swhmodule # use the db cnx from the config file; the expected config entry is the # given module name cfg = ctx.obj["config"].get(module, {}) dbname = get_dburl_from_config(cfg) if not dbname: raise click.BadParameter( "Missing the postgresql connection configuration. Either fix your " "configuration file or use the --dbname option." ) logger.debug("db_version dbname=%s", dbname) db_module, db_version, db_flavor = get_database_info(dbname) if db_module is None: click.secho( "WARNING the database does not have a dbmodule table.", fg="red", bold=True ) db_module = module assert db_module == module, f"{db_module} (in the db) != {module} (given)" click.secho(f"module: {db_module}", fg="green", bold=True) if db_flavor is not None: click.secho(f"flavor: {db_flavor}", fg="green", bold=True) # instantiate the data source to retrieve the current (expected) db version datastore_factory = getattr(import_swhmodule(db_module), "get_datastore", None) if datastore_factory: datastore = datastore_factory(**cfg) - code_version = datastore.get_current_version() + code_version = datastore.current_version click.secho( f"current code version: {code_version}", fg="green" if code_version == db_version else "red", bold=True, ) if not show_all: click.secho(f"version: {db_version}", fg="green", bold=True) else: from swh.core.db.db_utils import swh_db_versions versions = swh_db_versions(dbname) for version, tstamp, desc in versions: click.echo(f"{version} [{tstamp}] {desc}") @db.command(name="upgrade", context_settings=CONTEXT_SETTINGS) @click.argument("module", required=True) @click.option( "--to-version", type=int, help="Upgrade up to version VERSION", metavar="VERSION", default=None, ) @click.option( "--interactive/--non-interactive", help="Do not ask questions (use default answer to all questions)", default=True, ) @click.pass_context def db_upgrade(ctx, module, to_version, interactive): """Upgrade the database for given module (to a given version if specified). Examples:: swh db upgrade storage swh db upgrade scheduler --to-version=10 """ from swh.core.db.db_utils import ( get_database_info, import_swhmodule, swh_db_upgrade, swh_set_db_module, ) # use the db cnx from the config file; the expected config entry is the # given module name cfg = ctx.obj["config"].get(module, {}) dbname = get_dburl_from_config(cfg) if not dbname: raise click.BadParameter( "Missing the postgresql connection configuration. Either fix your " "configuration file or use the --dbname option." ) logger.debug("db_version dbname=%s", dbname) db_module, db_version, db_flavor = get_database_info(dbname) if db_module is None: click.secho( "Warning: the database does not have a dbmodule table.", fg="yellow", bold=True, ) if interactive and not click.confirm( f"Write the module information ({module}) in the database?", default=True ): raise click.BadParameter("Migration aborted.") swh_set_db_module(dbname, module) db_module = module if db_module != module: raise click.BadParameter( f"Error: the given module ({module}) does not match the value " f"stored in the database ({db_module})." ) # instantiate the data source to retrieve the current (expected) db version datastore_factory = getattr(import_swhmodule(db_module), "get_datastore", None) if not datastore_factory: raise click.UsageError( "You cannot use this command on old-style datastore backend {db_module}" ) datastore = datastore_factory(**cfg) - ds_version = datastore.get_current_version() + ds_version = datastore.current_version if to_version is None: to_version = ds_version if to_version > ds_version: raise click.UsageError( f"The target version {to_version} is larger than the current version " f"{ds_version} of the datastore backend {db_module}" ) if to_version == db_version: click.secho( f"No migration needed: the current version is {db_version}", fg="yellow", ) else: new_db_version = swh_db_upgrade(dbname, module, to_version) click.secho(f"Migration to version {new_db_version} done", fg="green") if new_db_version < ds_version: click.secho( "Warning: migration was not complete: " f"the current version is {ds_version}", fg="yellow", ) def get_dburl_from_config(cfg): if cfg.get("cls") != "postgresql": raise click.BadParameter( "Configuration cls must be set to 'postgresql' for this command." ) if "args" in cfg: # for bw compat cfg = cfg["args"] return cfg.get("db") diff --git a/swh/core/db/tests/conftest.py b/swh/core/db/tests/conftest.py index b1d42f4..7622311 100644 --- a/swh/core/db/tests/conftest.py +++ b/swh/core/db/tests/conftest.py @@ -1,67 +1,67 @@ # Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from click.testing import CliRunner from hypothesis import HealthCheck import pytest from swh.core.db.db_utils import import_swhmodule os.environ["LC_ALL"] = "C.UTF-8" # we use getattr here to keep mypy happy regardless hypothesis version function_scoped_fixture_check = ( [getattr(HealthCheck, "function_scoped_fixture")] if hasattr(HealthCheck, "function_scoped_fixture") else [] ) @pytest.fixture def cli_runner(): return CliRunner() @pytest.fixture() def mock_import_swhmodule(mocker, datadir): """This bypasses the module manipulation to make import_swhmodule return a mock object suitable for data test files listing via get_sql_for_package. For a given module `test.`, return a MagicMock object with a __name__ set to `` and __file__ pointing to `data//__init__.py`. The Mock object also defines a `get_datastore()` attribute on which the - `get_current_version()` exists and will return 42. + `current_version` attribute is set to 42. Typical usage:: def test_xxx(cli_runner, mock_import_swhmodule): conninfo = craft_conninfo(test_db, "new-db") module_name = "test.cli" # the command below will use sql scripts from # swh/core/db/tests/data/cli/sql/*.sql cli_runner.invoke(swhdb, ["init", module_name, "--dbname", conninfo]) """ mock = mocker.MagicMock def import_swhmodule_mock(modname): if modname.startswith("test."): dirname = modname.split(".", 1)[1] def get_datastore(*args, **kw): - return mock(get_current_version=lambda: 42) + return mock(current_version=42) return mock( __name__=modname, __file__=os.path.join(datadir, dirname, "__init__.py"), get_datastore=get_datastore, ) else: return import_swhmodule(modname) return mocker.patch("swh.core.db.db_utils.import_swhmodule", import_swhmodule_mock) diff --git a/swh/core/db/tests/test_cli.py b/swh/core/db/tests/test_cli.py index 12d9927..a29ebf2 100644 --- a/swh/core/db/tests/test_cli.py +++ b/swh/core/db/tests/test_cli.py @@ -1,336 +1,336 @@ # Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import copy import os import traceback import pytest import yaml from swh.core.cli.db import db as swhdb from swh.core.db import BaseDb from swh.core.db.db_utils import import_swhmodule, swh_db_module, swh_db_version from swh.core.tests.test_cli import assert_section_contains def test_cli_swh_help(swhmain, cli_runner): swhmain.add_command(swhdb) result = cli_runner.invoke(swhmain, ["-h"]) assert result.exit_code == 0 assert_section_contains( result.output, "Commands", "db Software Heritage database generic tools." ) help_db_snippets = ( ( "Usage", ( "Usage: swh db [OPTIONS] COMMAND [ARGS]...", "Software Heritage database generic tools.", ), ), ( "Commands", ( "create Create a database for the Software Heritage .", "init Initialize a database for the Software Heritage .", "init-admin Execute superuser-level initialization steps", ), ), ) def test_cli_swh_db_help(swhmain, cli_runner): swhmain.add_command(swhdb) result = cli_runner.invoke(swhmain, ["db", "-h"]) assert result.exit_code == 0 for section, snippets in help_db_snippets: for snippet in snippets: assert_section_contains(result.output, section, snippet) @pytest.fixture def swh_db_cli(cli_runner, monkeypatch, postgresql): """This initializes a cli_runner and sets the correct environment variable expected by the cli to run appropriately (when not specifying the --dbname flag) """ db_params = postgresql.get_dsn_parameters() monkeypatch.setenv("PGHOST", db_params["host"]) monkeypatch.setenv("PGUSER", db_params["user"]) monkeypatch.setenv("PGPORT", db_params["port"]) return cli_runner, db_params def craft_conninfo(test_db, dbname=None) -> str: """Craft conninfo string out of the test_db object. This also allows to override the dbname.""" db_params = test_db.get_dsn_parameters() if dbname: params = copy.deepcopy(db_params) params["dbname"] = dbname else: params = db_params return "postgresql://{user}@{host}:{port}/{dbname}".format(**params) def test_cli_swh_db_create_and_init_db(cli_runner, postgresql, mock_import_swhmodule): """Create a db then initializing it should be ok""" module_name = "test.cli" conninfo = craft_conninfo(postgresql, "new-db") # This creates the db and installs the necessary admin extensions result = cli_runner.invoke(swhdb, ["create", module_name, "--dbname", conninfo]) assert result.exit_code == 0, f"Unexpected output: {result.output}" # This initializes the schema and data result = cli_runner.invoke(swhdb, ["init", module_name, "--dbname", conninfo]) assert result.exit_code == 0, f"Unexpected output: {result.output}" # the origin value in the scripts uses a hash function (which implementation wise # uses a function from the pgcrypt extension, installed during db creation step) with BaseDb.connect(conninfo).cursor() as cur: cur.execute("select * from origin") origins = cur.fetchall() assert len(origins) == 1 def test_cli_swh_db_initialization_fail_without_creation_first( cli_runner, postgresql, mock_import_swhmodule ): """Init command on an inexisting db cannot work""" module_name = "test.cli" # it's mocked here conninfo = craft_conninfo(postgresql, "inexisting-db") result = cli_runner.invoke(swhdb, ["init", module_name, "--dbname", conninfo]) # Fails because we cannot connect to an inexisting db assert result.exit_code == 1, f"Unexpected output: {result.output}" def test_cli_swh_db_initialization_fail_without_extension( cli_runner, postgresql, mock_import_swhmodule ): """Init command cannot work without privileged extension. In this test, the schema needs privileged extension to work. """ module_name = "test.cli" # it's mocked here conninfo = craft_conninfo(postgresql) result = cli_runner.invoke(swhdb, ["init", module_name, "--dbname", conninfo]) # Fails as the function `public.digest` is not installed, init-admin calls is needed # first (the next tests show such behavior) assert result.exit_code == 1, f"Unexpected output: {result.output}" def test_cli_swh_db_initialization_works_with_flags( cli_runner, postgresql, mock_import_swhmodule ): """Init commands with carefully crafted libpq conninfo works""" module_name = "test.cli" # it's mocked here conninfo = craft_conninfo(postgresql) result = cli_runner.invoke(swhdb, ["init-admin", module_name, "--dbname", conninfo]) assert result.exit_code == 0, f"Unexpected output: {result.output}" result = cli_runner.invoke(swhdb, ["init", module_name, "--dbname", conninfo]) assert result.exit_code == 0, f"Unexpected output: {result.output}" # the origin values in the scripts uses a hash function (which implementation wise # uses a function from the pgcrypt extension, init-admin calls installs it) with BaseDb.connect(postgresql.dsn).cursor() as cur: cur.execute("select * from origin") origins = cur.fetchall() assert len(origins) == 1 def test_cli_swh_db_initialization_with_env( swh_db_cli, mock_import_swhmodule, postgresql ): """Init commands with standard environment variables works""" module_name = "test.cli" # it's mocked here cli_runner, db_params = swh_db_cli result = cli_runner.invoke( swhdb, ["init-admin", module_name, "--dbname", db_params["dbname"]] ) assert result.exit_code == 0, f"Unexpected output: {result.output}" result = cli_runner.invoke( swhdb, ["init", module_name, "--dbname", db_params["dbname"]] ) assert result.exit_code == 0, f"Unexpected output: {result.output}" # the origin values in the scripts uses a hash function (which implementation wise # uses a function from the pgcrypt extension, init-admin calls installs it) with BaseDb.connect(postgresql.dsn).cursor() as cur: cur.execute("select * from origin") origins = cur.fetchall() assert len(origins) == 1 def test_cli_swh_db_initialization_idempotent( swh_db_cli, mock_import_swhmodule, postgresql ): """Multiple runs of the init commands are idempotent""" module_name = "test.cli" # mocked cli_runner, db_params = swh_db_cli result = cli_runner.invoke( swhdb, ["init-admin", module_name, "--dbname", db_params["dbname"]] ) assert result.exit_code == 0, f"Unexpected output: {result.output}" result = cli_runner.invoke( swhdb, ["init", module_name, "--dbname", db_params["dbname"]] ) assert result.exit_code == 0, f"Unexpected output: {result.output}" result = cli_runner.invoke( swhdb, ["init-admin", module_name, "--dbname", db_params["dbname"]] ) assert result.exit_code == 0, f"Unexpected output: {result.output}" result = cli_runner.invoke( swhdb, ["init", module_name, "--dbname", db_params["dbname"]] ) assert result.exit_code == 0, f"Unexpected output: {result.output}" # the origin values in the scripts uses a hash function (which implementation wise # uses a function from the pgcrypt extension, init-admin calls installs it) with BaseDb.connect(postgresql.dsn).cursor() as cur: cur.execute("select * from origin") origins = cur.fetchall() assert len(origins) == 1 def test_cli_swh_db_create_and_init_db_new_api( cli_runner, postgresql, mock_import_swhmodule, mocker, tmp_path ): """Create a db then initializing it should be ok for a "new style" datastore""" module_name = "test.cli_new" conninfo = craft_conninfo(postgresql) # This initializes the schema and data cfgfile = tmp_path / "config.yml" cfgfile.write_text(yaml.dump({module_name: {"cls": "postgresql", "db": conninfo}})) result = cli_runner.invoke(swhdb, ["init-admin", module_name, "--dbname", conninfo]) assert result.exit_code == 0, f"Unexpected output: {result.output}" result = cli_runner.invoke(swhdb, ["-C", cfgfile, "init", module_name]) assert ( result.exit_code == 0 ), f"Unexpected output: {traceback.print_tb(result.exc_info[2])}" # the origin value in the scripts uses a hash function (which implementation wise # uses a function from the pgcrypt extension, installed during db creation step) with BaseDb.connect(conninfo).cursor() as cur: cur.execute("select * from origin") origins = cur.fetchall() assert len(origins) == 1 def test_cli_swh_db_upgrade_new_api(cli_runner, postgresql, datadir, mocker, tmp_path): """Upgrade scenario for a "new style" datastore""" module_name = "test.cli_new" # the `current_version` variable is the version that will be returned by # any call to `get_current_version()` in this test session, thanks to the # local mocked version of import_swhmodule() below. current_version = 1 # custom version of the mockup to make it easy to change the # current_version returned by get_current_version() # TODO: find a better solution for this... def import_swhmodule_mock(modname): if modname.startswith("test."): dirname = modname.split(".", 1)[1] def get_datastore(cls, **kw): - return mocker.MagicMock(get_current_version=lambda: current_version) + return mocker.MagicMock(current_version=current_version) return mocker.MagicMock( __name__=modname, __file__=os.path.join(datadir, dirname, "__init__.py"), name=modname, get_datastore=get_datastore, ) return import_swhmodule(modname) mocker.patch("swh.core.db.db_utils.import_swhmodule", import_swhmodule_mock) conninfo = craft_conninfo(postgresql) # This initializes the schema and data cfgfile = tmp_path / "config.yml" cfgfile.write_text(yaml.dump({module_name: {"cls": "postgresql", "db": conninfo}})) result = cli_runner.invoke(swhdb, ["init-admin", module_name, "--dbname", conninfo]) assert result.exit_code == 0, f"Unexpected output: {result.output}" result = cli_runner.invoke(swhdb, ["-C", cfgfile, "init", module_name]) assert ( result.exit_code == 0 ), f"Unexpected output: {traceback.print_tb(result.exc_info[2])}" assert swh_db_version(conninfo) == 1 # the upgrade should not do anything because the datastore does advertise # version 1 result = cli_runner.invoke(swhdb, ["-C", cfgfile, "upgrade", module_name]) assert swh_db_version(conninfo) == 1 # advertise current version as 3, a simple upgrade should get us there, but # no further current_version = 3 result = cli_runner.invoke(swhdb, ["-C", cfgfile, "upgrade", module_name]) assert swh_db_version(conninfo) == 3 # an attempt to go further should not do anything result = cli_runner.invoke( swhdb, ["-C", cfgfile, "upgrade", module_name, "--to-version", 5] ) assert swh_db_version(conninfo) == 3 # an attempt to go lower should not do anything result = cli_runner.invoke( swhdb, ["-C", cfgfile, "upgrade", module_name, "--to-version", 2] ) assert swh_db_version(conninfo) == 3 # advertise current version as 6, an upgrade with --to-version 4 should # stick to the given version 4 and no further current_version = 6 result = cli_runner.invoke( swhdb, ["-C", cfgfile, "upgrade", module_name, "--to-version", 4] ) assert swh_db_version(conninfo) == 4 assert "migration was not complete" in result.output # attempt to upgrade to a newer version than current code version fails result = cli_runner.invoke( swhdb, ["-C", cfgfile, "upgrade", module_name, "--to-version", current_version + 1], ) assert result.exit_code != 0 assert swh_db_version(conninfo) == 4 cnx = BaseDb.connect(conninfo) with cnx.transaction() as cur: cur.execute("drop table dbmodule") assert swh_db_module(conninfo) is None # db migration should recreate the missing dbmodule table result = cli_runner.invoke(swhdb, ["-C", cfgfile, "upgrade", module_name]) assert result.exit_code == 0 assert "Warning: the database does not have a dbmodule table." in result.output assert ( "Write the module information (test.cli_new) in the database? [Y/n]" in result.output ) assert swh_db_module(conninfo) == module_name