diff --git a/swh/core/cli/db.py b/swh/core/cli/db.py --- a/swh/core/cli/db.py +++ b/swh/core/cli/db.py @@ -314,6 +314,90 @@ click.echo(f"{version} [{tstamp}] {desc}") +@db.command(name="upgrade", context_settings=CONTEXT_SETTINGS) +@click.argument("module", required=True) +@click.option( + "--to-version", + type=int, + help="Upgrade up to version VERSION", + metavar="VERSION", + default=None, +) +@click.pass_context +def db_upgrade(ctx, module, to_version): + """Upgrade the database for given module (to a given version if specified). + + Examples:: + + swh db upgrade storage + swg db upgrade scheduler --to-version=10 + + """ + from swh.core.db.db_utils import ( + get_database_info, + import_swhmodule, + swh_db_upgrade, + swh_set_db_module, + ) + + # use the db cnx from the config file; the expected config entry is the + # given module name + cfg = ctx.obj["config"].get(module, {}) + dbname = get_dburl_from_config(cfg) + + if not dbname: + raise click.BadParameter( + "Missing the postgresql connection configuration. Either fix your " + "configuration file or use the --dbname option." + ) + + logger.debug("db_version dbname=%s", dbname) + + db_module, db_version, db_flavor = get_database_info(dbname) + if db_module is None: + click.secho( + "Warning: the database does not have a dbmodule table.", + fg="yellow", + bold=True, + ) + if not click.confirm( + f"Write the module information ({module}) in the database?", default=True + ): + raise click.BadParameter("Migration aborted.") + swh_set_db_module(dbname, module) + db_module = module + + if db_module != module: + raise click.BadParameter( + f"Error: the given module ({module}) does not match the value " + f"stored in the database ({db_module})." + ) + + # instantiate the data source to retrieve the current (expected) db version + datastore_factory = getattr(import_swhmodule(db_module), "get_datastore", None) + if not datastore_factory: + raise click.UsageError( + "You cannot use this command on old-style datastore backend {db_module}" + ) + datastore = datastore_factory(**cfg) + ds_version = datastore.get_current_version() + if to_version is None: + to_version = ds_version + if to_version > ds_version: + raise click.UsageError( + f"The target version {to_version} is larger than the current version " + f"{ds_version} of the datastore backend {db_module}" + ) + + new_db_version = swh_db_upgrade(dbname, module, to_version) + click.secho(f"Migration to version {new_db_version} done", fg="green") + if new_db_version < ds_version: + click.secho( + f"Warning: migration was not complete: the current version is {ds_version}", + fg="yellow", + ) + + def get_dburl_from_config(cfg): if cfg.get("cls") != "postgresql": raise click.BadParameter( diff --git a/swh/core/db/db_utils.py b/swh/core/db/db_utils.py --- a/swh/core/db/db_utils.py +++ b/swh/core/db/db_utils.py @@ -150,6 +150,78 @@ return None +def swh_db_upgrade( + conninfo: str, modname: str, to_version: Optional[int] = None +) -> int: + """Upgrade the database at `conninfo` for module `modname` + + This will run migration scripts found in the `sql/upgrades` subdirectory of + the module `modname`. By default, will upgrade to the latest declared version. + + Args: + conninfo: A database connection, or a database connection info string + modname: datastore module the database stores content for + to_version: if given, update the database to this version rathe than the latest + + """ + + if to_version is None: + to_version = 99999999 + + db_module, db_version, db_flavor = get_database_info(conninfo) + if db_version is None: + raise ValueError("Unable to retrieve the current version of the database") + if db_module is None: + raise ValueError("Unable to retrieve the module of the database") + if db_module != modname: + raise ValueError( + "The stored module of the database is different than the given one" + ) + + sqlfiles = [ + fname + for fname in get_sql_for_package(modname, upgrade=True) + if db_version < int(path.splitext(path.basename(fname))[0]) <= to_version + ] + + for sqlfile in sqlfiles: + new_version = int(path.splitext(path.basename(sqlfile))[0]) + logger.info("Executing migration script {sqlfile}") + if db_version is not None and (new_version - db_version) > 1: + logger.error( + f"There are missing migration steps between {db_version} and " + f"{new_version}. It might be expected but it most unlikely is not. " + "Will stop here." + ) + return db_version + + execute_sqlfiles([sqlfile], conninfo, db_flavor) + + # check if the db version has been updated by the upgrade script + db_version = swh_db_version(conninfo) + assert db_version is not None + if db_version == new_version: + # nothing to do, upgrade script did the job + pass + elif db_version == new_version - 1: + # it has not (new style), so do it + swh_set_db_version( + conninfo, + new_version, + desc=f"Upgraded to version {new_version} using {sqlfile}", + ) + db_version = swh_db_version(conninfo) + else: + # upgrade script did it wrong + logger.error( + f"The upgrade script {sqlfile} did not update the dbversion table " + f"consistently ({db_version} vs. expected {new_version}). " + "Will stop migration here. Please check your migration scripts." + ) + return db_version + return new_version + + def swh_db_module(db_or_conninfo: Union[str, pgconnection]) -> Optional[str]: """Retrieve the swh module used to create the database. @@ -183,7 +255,9 @@ return None -def swh_set_db_module(db_or_conninfo: Union[str, pgconnection], module: str) -> None: +def swh_set_db_module( + db_or_conninfo: Union[str, pgconnection], module: str, force=False +) -> None: """Set the swh module used to create the database. Fails if the dbmodule is already set or the table does not exist. @@ -192,9 +266,25 @@ db_or_conninfo: A database connection, or a database connection info string module: the swh module to register (without the leading 'swh.') """ + update = False if module.startswith("swh."): module = module[4:] + current_module = swh_db_module(db_or_conninfo) + if current_module is not None: + if current_module == module: + logger.warning("The database module is already set to %s", module) + return + + if not force: + raise ValueError( + "The database module is already set to a value %s " + "different than given %s", + current_module, + module, + ) + # force is True + update = True try: db = connect_to_conninfo(db_or_conninfo) except psycopg2.Error: @@ -202,8 +292,16 @@ # Database not initialized return None + sqlfiles = [ + fname for fname in get_sql_for_package("swh.core.db") if "dbmodule" in fname + ] + execute_sqlfiles(sqlfiles, db_or_conninfo) + with db.cursor() as c: - query = "insert into dbmodule(dbmodule) values (%s)" + if update: + query = "update dbmodule set dbmodule = %s" + else: + query = "insert into dbmodule(dbmodule) values (%s)" c.execute(query, (module,)) db.commit() @@ -398,11 +496,18 @@ return m -def get_sql_for_package(modname): +def get_sql_for_package(modname: str, upgrade=False) -> List[str]: + """Return the (sorted) list of sql script files for the given swh module + + If upgrade is True, return the list of available migration scripts, + otherwise, return the list of initialization scripts. + """ m = import_swhmodule(modname) if m is None: raise ValueError(f"Module {modname} cannot be loaded") sqldir = path.join(path.dirname(m.__file__), "sql") + if upgrade: + sqldir += "upgrades" if not path.isdir(sqldir): raise ValueError( "Module {} does not provide a db schema " "(no sql/ dir)".format(modname) diff --git a/swh/core/db/sql/35-dbmetadata.sql b/swh/core/db/sql/35-dbversion.sql rename from swh/core/db/sql/35-dbmetadata.sql rename to swh/core/db/sql/35-dbversion.sql --- a/swh/core/db/sql/35-dbmetadata.sql +++ b/swh/core/db/sql/35-dbversion.sql @@ -16,13 +16,3 @@ comment on column dbversion.version is 'SQL schema version'; comment on column dbversion.release is 'Version deployment timestamp'; comment on column dbversion.description is 'Release description'; - --- swh module this db is storing data for -create table if not exists dbmodule ( - dbmodule text, - single_row char(1) primary key default 'x', - check (single_row = 'x') -); -comment on table dbmodule is 'Database module storage'; -comment on column dbmodule.dbmodule is 'Database (swh) module currently deployed'; -comment on column dbmodule.single_row is 'Bogus column to force the table to have a single row'; diff --git a/swh/core/db/sql/35-dbmetadata.sql b/swh/core/db/sql/36-dbmodule.sql rename from swh/core/db/sql/35-dbmetadata.sql rename to swh/core/db/sql/36-dbmodule.sql --- a/swh/core/db/sql/35-dbmetadata.sql +++ b/swh/core/db/sql/36-dbmodule.sql @@ -1,22 +1,9 @@ -- common metadata/context structures -- --- we use a 35- prefix for this to make it executed after db schema initialisation +-- we use a 3x- prefix for this to make it executed after db schema initialisation -- sql scripts, which are normally 30- prefixed, so that it remains compatible -- with packages that have not yet migrated to swh.core 1.2 --- schema versions -create table if not exists dbversion -( - version int primary key, - release timestamptz, - description text -); - -comment on table dbversion is 'Details of current db version'; -comment on column dbversion.version is 'SQL schema version'; -comment on column dbversion.release is 'Version deployment timestamp'; -comment on column dbversion.description is 'Release description'; - -- swh module this db is storing data for create table if not exists dbmodule ( dbmodule text, diff --git a/swh/core/db/tests/conftest.py b/swh/core/db/tests/conftest.py --- a/swh/core/db/tests/conftest.py +++ b/swh/core/db/tests/conftest.py @@ -38,9 +38,11 @@ cli_runner.invoke(swhdb, ["init", module_name, "--dbname", conninfo]) """ - def get_sql_for_package_mock(modname): + def get_sql_for_package_mock(modname, upgrade=False): if modname.startswith("test."): sqldir = modname.split(".", 1)[1] + if upgrade: + sqldir += "/upgrades" return sorted( glob.glob(os.path.join(datadir, sqldir, "*.sql")), key=sortkey ) diff --git a/swh/core/db/tests/data/cli_new/upgrades/001.sql b/swh/core/db/tests/data/cli_new/upgrades/001.sql new file mode 100644 --- /dev/null +++ b/swh/core/db/tests/data/cli_new/upgrades/001.sql @@ -0,0 +1,5 @@ +-- this script should never be executed by an upgrade procedure (because +-- version 1 is set by 'swh db init') + +insert into origin(url, hash) +values ('this should never be executed', hash_sha1('')); diff --git a/swh/core/db/tests/data/cli_new/upgrades/002.sql b/swh/core/db/tests/data/cli_new/upgrades/002.sql new file mode 100644 --- /dev/null +++ b/swh/core/db/tests/data/cli_new/upgrades/002.sql @@ -0,0 +1,4 @@ +-- + +insert into origin(url, hash) +values ('version002', hash_sha1('version002')); diff --git a/swh/core/db/tests/data/cli_new/upgrades/003.sql b/swh/core/db/tests/data/cli_new/upgrades/003.sql new file mode 100644 --- /dev/null +++ b/swh/core/db/tests/data/cli_new/upgrades/003.sql @@ -0,0 +1,4 @@ +-- + +insert into origin(url, hash) +values ('version003', hash_sha1('version003')); diff --git a/swh/core/db/tests/data/cli_new/upgrades/004.sql b/swh/core/db/tests/data/cli_new/upgrades/004.sql new file mode 100644 --- /dev/null +++ b/swh/core/db/tests/data/cli_new/upgrades/004.sql @@ -0,0 +1,4 @@ +-- + +insert into origin(url, hash) +values ('version004', hash_sha1('version004')); diff --git a/swh/core/db/tests/data/cli_new/upgrades/005.sql b/swh/core/db/tests/data/cli_new/upgrades/005.sql new file mode 100644 --- /dev/null +++ b/swh/core/db/tests/data/cli_new/upgrades/005.sql @@ -0,0 +1,4 @@ +-- + +insert into origin(url, hash) +values ('version005', hash_sha1('version005')); diff --git a/swh/core/db/tests/data/cli_new/upgrades/006.sql b/swh/core/db/tests/data/cli_new/upgrades/006.sql new file mode 100644 --- /dev/null +++ b/swh/core/db/tests/data/cli_new/upgrades/006.sql @@ -0,0 +1,7 @@ +-- + +insert into origin(url, hash) +values ('version006', hash_sha1('version006')); + +insert into dbversion(version, release, description) +values (6, 'NOW()', 'Updated version from upgrade script'); diff --git a/swh/core/db/tests/test_cli.py b/swh/core/db/tests/test_cli.py --- a/swh/core/db/tests/test_cli.py +++ b/swh/core/db/tests/test_cli.py @@ -11,7 +11,7 @@ from swh.core.cli.db import db as swhdb from swh.core.db import BaseDb -from swh.core.db.db_utils import import_swhmodule +from swh.core.db.db_utils import import_swhmodule, swh_db_module from swh.core.tests.test_cli import assert_section_contains @@ -256,3 +256,107 @@ cur.execute("select * from origin") origins = cur.fetchall() assert len(origins) == 1 + + +def test_cli_swh_db_upgrade_new_api( + cli_runner, postgresql, mock_package_sql, mocker, tmp_path +): + """Create a db then initializing it should be ok for a "new style" datastore + + """ + module_name = "test.cli_new" + + from unittest.mock import MagicMock + + from swh.core.db.db_utils import import_swhmodule, swh_db_version + + current_version = 1 + + def import_swhmodule_mock(modname): + if modname.startswith("test."): + + def get_datastore(cls, **kw): + # XXX probably not the best way of doing this... + return MagicMock(get_current_version=lambda: current_version) + + return MagicMock(name=modname, get_datastore=get_datastore) + + return import_swhmodule(modname) + + mocker.patch("swh.core.db.db_utils.import_swhmodule", import_swhmodule_mock) + conninfo = craft_conninfo(postgresql) + + # This initializes the schema and data + cfgfile = tmp_path / "config.yml" + cfgfile.write_text( + f""" +{module_name}: + cls: postgresql + db: {conninfo} +""" + ) + result = cli_runner.invoke(swhdb, ["init-admin", module_name, "--dbname", conninfo]) + assert result.exit_code == 0, f"Unexpected output: {result.output}" + result = cli_runner.invoke(swhdb, ["-C", cfgfile, "init", module_name]) + + import traceback + + assert ( + result.exit_code == 0 + ), f"Unexpected output: {traceback.print_tb(result.exc_info[2])}" + + assert swh_db_version(conninfo) == 1 + + # the upgrade should not do anything because the datastore does advertise + # version 1 + result = cli_runner.invoke(swhdb, ["-C", cfgfile, "upgrade", module_name]) + assert swh_db_version(conninfo) == 1 + + # advertise current version as 3, a simple upgrade should get us there, but + # no further + current_version = 3 + result = cli_runner.invoke(swhdb, ["-C", cfgfile, "upgrade", module_name]) + assert swh_db_version(conninfo) == 3 + + # an attempt to go further should not do anything + result = cli_runner.invoke( + swhdb, ["-C", cfgfile, "upgrade", module_name, "--to-version", 5] + ) + assert swh_db_version(conninfo) == 3 + # an attempt to go lower should not do anything + result = cli_runner.invoke( + swhdb, ["-C", cfgfile, "upgrade", module_name, "--to-version", 2] + ) + assert swh_db_version(conninfo) == 3 + + # advertise current version as 6, an upgrade with --to-version 4 should + # sticj to the given version 4 and no further + current_version = 6 + result = cli_runner.invoke( + swhdb, ["-C", cfgfile, "upgrade", module_name, "--to-version", 4] + ) + assert swh_db_version(conninfo) == 4 + assert "migration was not complete" in result.output + + # attempt to upgrade to a newer version than current code version fails + result = cli_runner.invoke( + swhdb, + ["-C", cfgfile, "upgrade", module_name, "--to-version", current_version + 1], + ) + assert result.exit_code != 0 + assert swh_db_version(conninfo) == 4 + + cnx = BaseDb.connect(conninfo) + with cnx.transaction() as cur: + cur.execute("drop table dbmodule") + assert swh_db_module(conninfo) is None + + # db migration should recreate the missing dbmodule table + result = cli_runner.invoke(swhdb, ["-C", cfgfile, "upgrade", module_name]) + assert result.exit_code == 0 + assert "Warning: the database does not have a dbmodule table." in result.output + assert ( + "Write the module information (test.cli_new) in the database? [Y/n]" + in result.output + ) + assert swh_db_module(conninfo) == module_name diff --git a/swh/core/db/tests/test_db_utils.py b/swh/core/db/tests/test_db_utils.py --- a/swh/core/db/tests/test_db_utils.py +++ b/swh/core/db/tests/test_db_utils.py @@ -1,10 +1,18 @@ from datetime import datetime, timedelta +from os import path import pytest from swh.core.cli.db import db as swhdb from swh.core.db import BaseDb -from swh.core.db.db_utils import get_database_info, now, swh_db_module, swh_db_versions +from swh.core.db.db_utils import ( + get_database_info, + now, + swh_db_module, + swh_db_upgrade, + swh_db_versions, + swh_set_db_module, +) from .test_cli import craft_conninfo @@ -71,3 +79,88 @@ if version > 10: assert desc == f"Upgrade to version {version}" assert (now() - ts) < timedelta(seconds=1) + + +@pytest.mark.parametrize("module", ["test.cli_new"]) +def test_db_utils_upgrade(cli_runner, postgresql, mock_package_sql, module, datadir): + """Check swh_db_upgrade + + """ + conninfo = craft_conninfo(postgresql) + result = cli_runner.invoke(swhdb, ["init-admin", module, "--dbname", conninfo]) + assert result.exit_code == 0, f"Unexpected output: {result.output}" + result = cli_runner.invoke(swhdb, ["init", module, "--dbname", conninfo]) + assert result.exit_code == 0, f"Unexpected output: {result.output}" + + new_version = swh_db_upgrade(conninfo, module) + assert new_version == 6 + + versions = swh_db_versions(conninfo) + # get rid of dates to ease checking + versions = [(v[0], v[2]) for v in versions] + assert versions[-1] == (1, "DB initialization") + sqlbasedir = path.join(datadir, module.split(".", 1)[1], "upgrades") + + assert versions[1:-1] == [ + (i, f"Upgraded to version {i} using {sqlbasedir}/{i:03d}.sql") + for i in range(5, 1, -1) + ] + assert versions[0] == (6, "Updated version from upgrade script") + + cnx = BaseDb.connect(conninfo) + with cnx.transaction() as cur: + cur.execute("select url from origin where url like 'version%'") + result = cur.fetchall() + assert result == [("version%03d" % i,) for i in range(2, 7)] + cur.execute( + "select url from origin where url = 'this should never be executed'" + ) + result = cur.fetchall() + assert not result + + +@pytest.mark.parametrize("module", ["test.cli_new"]) +def test_db_utils_swh_db_upgrade_sanity_checks( + cli_runner, postgresql, mock_package_sql, module, datadir +): + """Check swh_db_upgrade + + """ + conninfo = craft_conninfo(postgresql) + result = cli_runner.invoke(swhdb, ["init-admin", module, "--dbname", conninfo]) + assert result.exit_code == 0, f"Unexpected output: {result.output}" + result = cli_runner.invoke(swhdb, ["init", module, "--dbname", conninfo]) + assert result.exit_code == 0, f"Unexpected output: {result.output}" + + cnx = BaseDb.connect(conninfo) + with cnx.transaction() as cur: + cur.execute("drop table dbmodule") + + # try to upgrade with a unset module + with pytest.raises(ValueError): + swh_db_upgrade(conninfo, module) + + # check the dbmodule is unset + assert swh_db_module(conninfo) is None + + # set the stored module to something else + swh_set_db_module(conninfo, f"{module}2") + assert get_database_info(conninfo)[0] == f"{module}2" + + # try to upgrade with a different module + with pytest.raises(ValueError): + swh_db_upgrade(conninfo, module) + + # revert to the proper module in the db + swh_set_db_module(conninfo, module, force=True) + assert swh_db_module(conninfo) == module + # trying again is a noop + swh_set_db_module(conninfo, module) + assert swh_db_module(conninfo) == module + + # drop the dbversion table + with cnx.transaction() as cur: + cur.execute("drop table dbversion") + # an upgrade should fail due to missing stored version + with pytest.raises(ValueError): + swh_db_upgrade(conninfo, module)