diff --git a/MANIFEST.in b/MANIFEST.in --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,7 +2,6 @@ include conftest.py include requirements*.txt include version.txt -recursive-include swh/core/sql *.sql recursive-include swh py.typed recursive-include swh/core/tests/data/ * recursive-include swh/core/tests/fixture/data/ * diff --git a/swh/core/cli/db.py b/swh/core/cli/db.py old mode 100755 new mode 100644 --- a/swh/core/cli/db.py +++ b/swh/core/cli/db.py @@ -5,8 +5,7 @@ # See top-level LICENSE file for more information import logging -from os import environ, path -from typing import Collection, Dict, Optional, Tuple +from os import environ import warnings import click @@ -79,7 +78,7 @@ swh db create -d postgresql://superuser:passwd@pghost:5433/swh-storage storage """ - + from swh.core.db.db_utils import create_database_for_package logger.debug("db_create %s dn_name=%s", module, dbname) create_database_for_package(module, dbname, template) @@ -117,6 +116,7 @@ scheduler """ + from swh.core.db.db_utils import init_admin_extensions logger.debug("db_init_admin %s dbname=%s", module, dbname) init_admin_extensions(module, dbname) @@ -152,6 +152,7 @@ swh db init --flavor read_replica -d swh-storage storage """ + from swh.core.db.db_utils import populate_database_for_package logger.debug("db_init %s flavor=%s dbname=%s", module, flavor, dbname) @@ -181,155 +182,12 @@ ) -def get_sql_for_package(modname): - import glob - from importlib import import_module - - from swh.core.utils import numfile_sortkey as sortkey - - if not modname.startswith("swh."): - modname = "swh.{}".format(modname) - try: - m = import_module(modname) - except ImportError: - raise click.BadParameter("Unable to load module {}".format(modname)) - - sqldir = path.join(path.dirname(m.__file__), "sql") - if not path.isdir(sqldir): +def get_dburl_from_config(cfg): + if cfg.get("cls") != "postgresql": raise click.BadParameter( - "Module {} does not provide a db schema " "(no sql/ dir)".format(modname) - ) - return sorted(glob.glob(path.join(sqldir, "*.sql")), key=sortkey) - - -def populate_database_for_package( - modname: str, conninfo: str, flavor: Optional[str] = None -) -> Tuple[bool, int, Optional[str]]: - """Populate the database, pointed at with ``conninfo``, - using the SQL files found in the package ``modname``. - - Args: - modname: Name of the module of which we're loading the files - conninfo: connection info string for the SQL database - flavor: the module-specific flavor which we want to initialize the database under - - Returns: - Tuple with three elements: whether the database has been initialized; the current - version of the database; if it exists, the flavor of the database. - """ - from swh.core.db.db_utils import swh_db_flavor, swh_db_version - - current_version = swh_db_version(conninfo) - if current_version is not None: - dbflavor = swh_db_flavor(conninfo) - return False, current_version, dbflavor - - sqlfiles = get_sql_for_package(modname) - sqlfiles = [fname for fname in sqlfiles if "-superuser-" not in fname] - execute_sqlfiles(sqlfiles, conninfo, flavor) - - current_version = swh_db_version(conninfo) - assert current_version is not None - dbflavor = swh_db_flavor(conninfo) - return True, current_version, dbflavor - - -def parse_dsn_or_dbname(dsn_or_dbname: str) -> Dict[str, str]: - """Parse a psycopg2 dsn, falling back to supporting plain database names as well""" - import psycopg2 - from psycopg2.extensions import parse_dsn as _parse_dsn - - try: - return _parse_dsn(dsn_or_dbname) - except psycopg2.ProgrammingError: - # psycopg2 failed to parse the DSN; it's probably a database name, - # handle it as such - return _parse_dsn(f"dbname={dsn_or_dbname}") - - -def init_admin_extensions(modname: str, conninfo: str) -> None: - """The remaining initialization process -- running -superuser- SQL files -- is done - using the given conninfo, thus connecting to the newly created database - - """ - sqlfiles = get_sql_for_package(modname) - sqlfiles = [fname for fname in sqlfiles if "-superuser-" in fname] - execute_sqlfiles(sqlfiles, conninfo) - - -def create_database_for_package( - modname: str, conninfo: str, template: str = "template1" -): - """Create the database pointed at with ``conninfo``, and initialize it using - -superuser- SQL files found in the package ``modname``. - - Args: - modname: Name of the module of which we're loading the files - conninfo: connection info string or plain database name for the SQL database - template: the name of the database to connect to and use as template to create - the new database - - """ - import subprocess - - from psycopg2.extensions import make_dsn - - # Use the given conninfo string, but with dbname replaced by the template dbname - # for the database creation step - creation_dsn = parse_dsn_or_dbname(conninfo) - dbname = creation_dsn["dbname"] - creation_dsn["dbname"] = template - logger.debug("db_create dbname=%s (from %s)", dbname, template) - subprocess.check_call( - [ - "psql", - "--quiet", - "--no-psqlrc", - "-v", - "ON_ERROR_STOP=1", - "-d", - make_dsn(**creation_dsn), - "-c", - f'CREATE DATABASE "{dbname}"', - ] - ) - init_admin_extensions(modname, conninfo) - - -def execute_sqlfiles( - sqlfiles: Collection[str], conninfo: str, flavor: Optional[str] = None -): - """Execute a list of SQL files on the database pointed at with ``conninfo``. - - Args: - sqlfiles: List of SQL files to execute - conninfo: connection info string for the SQL database - flavor: the database flavor to initialize - """ - import subprocess - - psql_command = [ - "psql", - "--quiet", - "--no-psqlrc", - "-v", - "ON_ERROR_STOP=1", - "-d", - conninfo, - ] - - flavor_set = False - for sqlfile in sqlfiles: - logger.debug(f"execute SQL file {sqlfile} dbname={conninfo}") - subprocess.check_call(psql_command + ["-f", sqlfile]) - - if flavor is not None and not flavor_set and sqlfile.endswith("-flavor.sql"): - logger.debug("Setting database flavor %s", flavor) - query = f"insert into dbflavor (flavor) values ('{flavor}')" - subprocess.check_call(psql_command + ["-c", query]) - flavor_set = True - - if flavor is not None and not flavor_set: - logger.warn( - "Asked for flavor %s, but module does not support database flavors", flavor, + "Configuration cls must be set to 'postgresql' for this command." ) + if "args" in cfg: + # for bw compat + cfg = cfg["args"] + return cfg.get("db") diff --git a/swh/core/db/db_utils.py b/swh/core/db/db_utils.py --- a/swh/core/db/db_utils.py +++ b/swh/core/db/db_utils.py @@ -4,12 +4,22 @@ # See top-level LICENSE file for more information import functools +import glob +from importlib import import_module import logging +from os import path import re -from typing import Optional, Union +import subprocess +from typing import Collection, Dict, Optional, Tuple, Union import psycopg2 import psycopg2.extensions +from psycopg2.extensions import connection as pgconnection +from psycopg2.extensions import encodings as pgencodings +from psycopg2.extensions import make_dsn +from psycopg2.extensions import parse_dsn as _parse_dsn + +from swh.core.utils import numfile_sortkey as sortkey logger = logging.getLogger(__name__) @@ -42,9 +52,7 @@ return value -def connect_to_conninfo( - db_or_conninfo: Union[str, psycopg2.extensions.connection] -) -> psycopg2.extensions.connection: +def connect_to_conninfo(db_or_conninfo: Union[str, pgconnection]) -> pgconnection: """Connect to the database passed in argument Args: @@ -56,7 +64,7 @@ Raises: psycopg2.Error if the database doesn't exist """ - if isinstance(db_or_conninfo, psycopg2.extensions.connection): + if isinstance(db_or_conninfo, pgconnection): return db_or_conninfo if "=" not in db_or_conninfo and "//" not in db_or_conninfo: @@ -68,9 +76,7 @@ return db -def swh_db_version( - db_or_conninfo: Union[str, psycopg2.extensions.connection] -) -> Optional[int]: +def swh_db_version(db_or_conninfo: Union[str, pgconnection]) -> Optional[int]: """Retrieve the swh version of the database. If the database is not initialized, this logs a warning and returns None. @@ -101,9 +107,7 @@ return None -def swh_db_flavor( - db_or_conninfo: Union[str, psycopg2.extensions.connection] -) -> Optional[str]: +def swh_db_flavor(db_or_conninfo: Union[str, pgconnection]) -> Optional[str]: """Retrieve the swh flavor of the database. If the database is not initialized, or the database doesn't support @@ -237,7 +241,7 @@ # there will be some decoding error because of stupid codec used, and Py3 # doesn't implement % on bytes. if not isinstance(sql, bytes): - sql = sql.encode(psycopg2.extensions.encodings[cur.connection.encoding]) + sql = sql.encode(pgencodings[cur.connection.encoding]) pre, post = _split_sql(sql) for page in _paginate(argslist, page_size=page_size): @@ -250,3 +254,149 @@ parts[-1:] = post cur.execute(b"".join(parts)) yield from cur + + +def import_swhmodule(modname): + if not modname.startswith("swh."): + modname = f"swh.{modname}" + try: + m = import_module(modname) + except ImportError as exc: + logger.error(f"Could not load the {modname} module: {exc}") + return None + return m + + +def get_sql_for_package(modname): + m = import_swhmodule(modname) + if m is None: + raise ValueError(f"Module {modname} cannot be loaded") + sqldir = path.join(path.dirname(m.__file__), "sql") + if not path.isdir(sqldir): + raise ValueError( + "Module {} does not provide a db schema " "(no sql/ dir)".format(modname) + ) + return sorted(glob.glob(path.join(sqldir, "*.sql")), key=sortkey) + + +def populate_database_for_package( + modname: str, conninfo: str, flavor: Optional[str] = None +) -> Tuple[bool, int, Optional[str]]: + """Populate the database, pointed at with ``conninfo``, + using the SQL files found in the package ``modname``. + Also fill the 'dbmodule' table with the given ``modname``. + + Args: + modname: Name of the module of which we're loading the files + conninfo: connection info string for the SQL database + flavor: the module-specific flavor which we want to initialize the database under + + Returns: + Tuple with three elements: whether the database has been initialized; the current + version of the database; if it exists, the flavor of the database. + """ + current_version = swh_db_version(conninfo) + if current_version is not None: + dbflavor = swh_db_flavor(conninfo) + return False, current_version, dbflavor + + sqlfiles = get_sql_for_package(modname) + sqlfiles = [fname for fname in sqlfiles if "-superuser-" not in fname] + execute_sqlfiles(sqlfiles, conninfo, flavor) + + current_version = swh_db_version(conninfo) + assert current_version is not None + dbflavor = swh_db_flavor(conninfo) + return True, current_version, dbflavor + + +def parse_dsn_or_dbname(dsn_or_dbname: str) -> Dict[str, str]: + """Parse a psycopg2 dsn, falling back to supporting plain database names as well""" + try: + return _parse_dsn(dsn_or_dbname) + except psycopg2.ProgrammingError: + # psycopg2 failed to parse the DSN; it's probably a database name, + # handle it as such + return _parse_dsn(f"dbname={dsn_or_dbname}") + + +def init_admin_extensions(modname: str, conninfo: str) -> None: + """The remaining initialization process -- running -superuser- SQL files -- is done + using the given conninfo, thus connecting to the newly created database + + """ + sqlfiles = get_sql_for_package(modname) + sqlfiles = [fname for fname in sqlfiles if "-superuser-" in fname] + execute_sqlfiles(sqlfiles, conninfo) + + +def create_database_for_package( + modname: str, conninfo: str, template: str = "template1" +): + """Create the database pointed at with ``conninfo``, and initialize it using + -superuser- SQL files found in the package ``modname``. + + Args: + modname: Name of the module of which we're loading the files + conninfo: connection info string or plain database name for the SQL database + template: the name of the database to connect to and use as template to create + the new database + + """ + # Use the given conninfo string, but with dbname replaced by the template dbname + # for the database creation step + creation_dsn = parse_dsn_or_dbname(conninfo) + dbname = creation_dsn["dbname"] + creation_dsn["dbname"] = template + logger.debug("db_create dbname=%s (from %s)", dbname, template) + subprocess.check_call( + [ + "psql", + "--quiet", + "--no-psqlrc", + "-v", + "ON_ERROR_STOP=1", + "-d", + make_dsn(**creation_dsn), + "-c", + f'CREATE DATABASE "{dbname}"', + ] + ) + init_admin_extensions(modname, conninfo) + + +def execute_sqlfiles( + sqlfiles: Collection[str], conninfo: str, flavor: Optional[str] = None +): + """Execute a list of SQL files on the database pointed at with ``conninfo``. + + Args: + sqlfiles: List of SQL files to execute + conninfo: connection info string for the SQL database + flavor: the database flavor to initialize + """ + psql_command = [ + "psql", + "--quiet", + "--no-psqlrc", + "-v", + "ON_ERROR_STOP=1", + "-d", + conninfo, + ] + + flavor_set = False + for sqlfile in sqlfiles: + logger.debug(f"execute SQL file {sqlfile} dbname={conninfo}") + subprocess.check_call(psql_command + ["-f", sqlfile]) + + if flavor is not None and not flavor_set and sqlfile.endswith("-flavor.sql"): + logger.debug("Setting database flavor %s", flavor) + query = f"insert into dbflavor (flavor) values ('{flavor}')" + subprocess.check_call(psql_command + ["-c", query]) + flavor_set = True + + if flavor is not None and not flavor_set: + logger.warn( + "Asked for flavor %s, but module does not support database flavors", flavor, + ) diff --git a/swh/core/db/tests/test_cli.py b/swh/core/db/tests/test_cli.py --- a/swh/core/db/tests/test_cli.py +++ b/swh/core/db/tests/test_cli.py @@ -65,7 +65,7 @@ """ from swh.core.utils import numfile_sortkey as sortkey - mock_sql_files = mocker.patch("swh.core.cli.db.get_sql_for_package") + mock_sql_files = mocker.patch("swh.core.db.db_utils.get_sql_for_package") sql_files = sorted(glob.glob(path.join(datadir, "cli", "*.sql")), key=sortkey) mock_sql_files.return_value = sql_files return mock_sql_files diff --git a/swh/core/sql/log-schema.sql b/swh/core/sql/log-schema.sql deleted file mode 100644 --- a/swh/core/sql/log-schema.sql +++ /dev/null @@ -1,33 +0,0 @@ ---- ---- logging data model ---- - -create table dbversion -( - version int primary key, - release timestamptz, - description text -); - -insert into dbversion(version, release, description) - values(1, now(), 'Work In Progress'); - - -create type log_level as enum ('debug', 'info', 'warning', 'error', 'critical'); - -create table log -( - id bigserial primary key, - ts timestamptz not null default now(), - level log_level not null default 'info', -- importance - message text not null, -- human readable message - data jsonb, -- extra data; when NOT NULL, must contain a key "type" - -- denoting the kind of message within src_module - src_module text, -- fully-qualified source module, e.g., "swh.loader.git" - src_host text, -- FQDN source hostname, e.g., "worker03.softwareheritage.org" - src_pid int -- originating PID, relative to src_host -); - -create index on log (ts); -create index on log (src_module); -create index on log (src_host);