diff --git a/swh/core/cli/db.py b/swh/core/cli/db.py --- a/swh/core/cli/db.py +++ b/swh/core/cli/db.py @@ -6,7 +6,7 @@ import logging from os import environ, path -from typing import Collection, Tuple +from typing import Collection, Optional, Tuple import warnings import click @@ -92,7 +92,10 @@ default="softwareheritage-dev", show_default=True, ) -def db_init(module, db_name): +@click.option( + "--flavor", help="Database flavor.", default=None, +) +def db_init(module, db_name, flavor): """Initialize a database for the Software Heritage . Example: @@ -103,28 +106,41 @@ please provide them using standard environment variables. See psql(1) man page (section ENVIRONMENTS) for details. - Example: + Examples: - PGPORT=5434 swh db-init indexer + PGPORT=5434 swh db init indexer swh db init -d postgresql://user:passwd@pghost:5433/swh-storage storage + swh db init --flavor read_replica -d swh-storage storage """ - logger.debug("db_init %s dn_name=%s", module, db_name) + logger.debug("db_init %s flavor=%s dn_name=%s", module, flavor, db_name) - initialized, dbversion = populate_database_for_package(module, db_name) + initialized, dbversion, dbflavor = populate_database_for_package( + module, db_name, flavor + ) # TODO: Ideally migrate the version from db_version to the latest # db version click.secho( - "DONE database for {} {} at version {}".format( - module, "initialized" if initialized else "exists", dbversion + "DONE database for {} {}{} at version {}".format( + module, + "initialized" if initialized else "exists", + f" (flavor {dbflavor})" if dbflavor is not None else "", + dbversion, ), fg="green", bold=True, ) + if flavor is not None and dbflavor != flavor: + click.secho( + f"WARNING requested flavor '{flavor}' != recorded flavor '{dbflavor}'", + fg="red", + bold=True, + ) + def get_sql_for_package(modname): import glob @@ -147,30 +163,35 @@ return sorted(glob.glob(path.join(sqldir, "*.sql")), key=sortkey) -def populate_database_for_package(modname: str, conninfo: str) -> Tuple[bool, int]: +def populate_database_for_package( + modname: str, conninfo: str, flavor: Optional[str] = None +) -> Tuple[bool, int, Optional[str]]: """Populate the database, pointed at with `conninfo`, using the SQL files found in the package `modname`. Args: modname: Name of the module of which we're loading the files conninfo: connection info string for the SQL database + flavor: the module-specific flavor which we want to initialize the database under Returns: - Tuple with two elements: whether the database has been initialized; the current - version of the database. + Tuple with three elements: whether the database has been initialized; the current + version of the database; if it exists, the flavor of the database. """ - from swh.core.db.db_utils import swh_db_version + from swh.core.db.db_utils import swh_db_flavor, swh_db_version current_version = swh_db_version(conninfo) if current_version is not None: - return False, current_version + dbflavor = swh_db_flavor(conninfo) + return False, current_version, dbflavor sqlfiles = get_sql_for_package(modname) sqlfiles = [fname for fname in sqlfiles if "-superuser-" not in fname] - execute_sqlfiles(sqlfiles, conninfo) + execute_sqlfiles(sqlfiles, conninfo, flavor) current_version = swh_db_version(conninfo) assert current_version is not None - return True, current_version + dbflavor = swh_db_flavor(conninfo) + return True, current_version, dbflavor def create_database_for_package( @@ -218,27 +239,40 @@ execute_sqlfiles(sqlfiles, conninfo) -def execute_sqlfiles(sqlfiles: Collection[str], conninfo: str): +def execute_sqlfiles( + sqlfiles: Collection[str], conninfo: str, flavor: Optional[str] = None +): """Execute a list of SQL files on the database pointed at with `conninfo`. Args: sqlfiles: List of SQL files to execute conninfo: connection info string for the SQL database + flavor: the database flavor to initialize """ import subprocess + psql_command = [ + "psql", + "--quiet", + "--no-psqlrc", + "-v", + "ON_ERROR_STOP=1", + "-d", + conninfo, + ] + + flavor_set = False for sqlfile in sqlfiles: logger.debug(f"execute SQL file {sqlfile} db_name={conninfo}") - subprocess.check_call( - [ - "psql", - "--quiet", - "--no-psqlrc", - "-v", - "ON_ERROR_STOP=1", - "-d", - conninfo, - "-f", - sqlfile, - ] + subprocess.check_call(psql_command + ["-f", sqlfile]) + + if flavor is not None and not flavor_set and sqlfile.endswith("-flavor.sql"): + logger.debug("Setting database flavor %s", flavor) + query = f"insert into dbflavor (flavor) values ('{flavor}')" + subprocess.check_call(psql_command + ["-c", query]) + flavor_set = True + + if flavor is not None and not flavor_set: + logger.warn( + "Asked for flavor %s, but module does not support database flavors", flavor, ) diff --git a/swh/core/db/db_utils.py b/swh/core/db/db_utils.py --- a/swh/core/db/db_utils.py +++ b/swh/core/db/db_utils.py @@ -42,32 +42,51 @@ return value -def swh_db_version( +def connect_to_conninfo( db_or_conninfo: Union[str, psycopg2.extensions.connection] -) -> Optional[int]: - """Retrieve the swh version if any. In case of the db not initialized, - this returns None. Otherwise, this returns the db's version. +) -> psycopg2.extensions.connection: + """Connect to the database passed in argument Args: db_or_conninfo: A database connection, or a database connection info string Returns: - Optional[Int]: Either the db's version or None + a connected database handle + Raises: + psycopg2.Error if the database doesn't exist """ - if isinstance(db_or_conninfo, psycopg2.extensions.connection): - db = db_or_conninfo - else: - try: - if "=" not in db_or_conninfo: - # Database name - db_or_conninfo = f"dbname={db_or_conninfo}" - db = psycopg2.connect(db_or_conninfo) - except psycopg2.Error: - logger.exception("Failed to connect to `%s`", db_or_conninfo) - # Database not initialized - return None + return db_or_conninfo + + if "=" not in db_or_conninfo and "//" not in db_or_conninfo: + # Database name + db_or_conninfo = f"dbname={db_or_conninfo}" + + db = psycopg2.connect(db_or_conninfo) + + return db + + +def swh_db_version( + db_or_conninfo: Union[str, psycopg2.extensions.connection] +) -> Optional[int]: + """Retrieve the swh version of the database. + + If the database is not initialized, this logs a warning and returns None. + + Args: + db_or_conninfo: A database connection, or a database connection info string + + Returns: + Either the version of the database, or None if it couldn't be detected + """ + try: + db = connect_to_conninfo(db_or_conninfo) + except psycopg2.Error: + logger.exception("Failed to connect to `%s`", db_or_conninfo) + # Database not initialized + return None try: with db.cursor() as c: @@ -82,6 +101,41 @@ return None +def swh_db_flavor( + db_or_conninfo: Union[str, psycopg2.extensions.connection] +) -> Optional[str]: + """Retrieve the swh flavor of the database. + + If the database is not initialized, or the database doesn't support + flavors, this returns None. + + Args: + db_or_conninfo: A database connection, or a database connection info string + + Returns: + The flavor of the database, or None if it could not be detected. + """ + try: + db = connect_to_conninfo(db_or_conninfo) + except psycopg2.Error: + logger.exception("Failed to connect to `%s`", db_or_conninfo) + # Database not initialized + return None + + try: + with db.cursor() as c: + query = "select swh_get_dbflavor()" + try: + c.execute(query) + return c.fetchone()[0] + except psycopg2.errors.UndefinedFunction: + # function not found: no flavor + return None + except Exception: + logger.exception("Could not get flavor from `%s`", db_or_conninfo) + return None + + # The following code has been imported from psycopg2, version 2.7.4, # https://github.com/psycopg/psycopg2/tree/5afb2ce803debea9533e293eef73c92ffce95bcd # and modified by Software Heritage.