diff --git a/swh/provenance/sql/15-flavor.sql b/swh/provenance/sql/15-flavor.sql new file mode 100644 index 0000000..b270d0b --- /dev/null +++ b/swh/provenance/sql/15-flavor.sql @@ -0,0 +1,21 @@ +-- database flavor +create type database_flavor as enum ( + 'with-path', + 'without-path' +); +comment on type database_flavor is 'Flavor of the current database'; + +create table dbflavor ( + flavor database_flavor, + single_row char(1) primary key default 'x', + check (single_row = 'x') +); +comment on table dbflavor is 'Database flavor storage'; +comment on column dbflavor.flavor is 'Database flavor currently deployed'; +comment on column dbflavor.single_row is 'Bogus column to force the table to have a single row'; + +create or replace function swh_get_dbflavor() returns database_flavor language sql stable as $$ + select coalesce((select flavor from dbflavor), 'with-path'); +$$; + +comment on function swh_get_dbflavor is 'Get the flavor of the database currently deployed'; diff --git a/swh/provenance/sql/30-schema.sql b/swh/provenance/sql/30-schema.sql index 3c33f99..f4a3591 100644 --- a/swh/provenance/sql/30-schema.sql +++ b/swh/provenance/sql/30-schema.sql @@ -1,68 +1,152 @@ +-- psql variables to get the current database flavor +select swh_get_dbflavor() = 'with-path' as dbflavor_with_path \gset + + +create table dbversion +( + version int primary key, + release timestamptz, + description text +); + +comment on table dbversion is 'Details of current db version'; +comment on column dbversion.version is 'SQL schema version'; +comment on column dbversion.release is 'Version deployment timestamp'; +comment on column dbversion.description is 'Release description'; + +-- latest schema version +insert into dbversion(version, release, description) + values(1, now(), 'Work In Progress'); + -- a Git object ID, i.e., a Git-style salted SHA1 checksum create domain sha1_git as bytea check (length(value) = 20); -- UNIX path (absolute, relative, individual path component, etc.) create domain unix_path as bytea; create table content ( id bigserial primary key, -- internal identifier of the content blob sha1 sha1_git unique not null, -- intrinsic identifier of the content blob date timestamptz not null -- timestamp of the revision where the blob appears early ); comment on column content.id is 'Content internal identifier'; comment on column content.sha1 is 'Content intrinsic identifier'; comment on column content.date is 'Earliest timestamp for the content (first seen time)'; +create table content_early_in_rev +( + blob bigint not null, -- internal identifier of the content blob + rev bigint not null -- internal identifier of the revision where the blob appears for the first time +\if :dbflavor_with_path + , + loc bigint not null -- location of the content relative to the revision root directory +\endif + -- foreign key (blob) references content (id), + -- foreign key (rev) references revision (id), + -- foreign key (loc) references location (id) +); +comment on column content_early_in_rev.blob is 'Content internal identifier'; +comment on column content_early_in_rev.rev is 'Revision internal identifier'; +\if :dbflavor_with_path +comment on column content_early_in_rev.loc is 'Location of content in revision'; +\endif + +create table content_in_dir +( + blob bigint not null, -- internal identifier of the content blob + dir bigint not null -- internal identifier of the directory containing the blob +\if :dbflavor_with_path + , + loc bigint not null -- location of the content relative to its parent directory in the isochrone frontier +\endif + -- foreign key (blob) references content (id), + -- foreign key (dir) references directory (id), + -- foreign key (loc) references location (id) +); +comment on column content_in_dir.blob is 'Content internal identifier'; +comment on column content_in_dir.dir is 'Directory internal identifier'; +\if :dbflavor_with_path +comment on column content_in_dir.loc is 'Location of content in directory'; +\endif + create table directory ( id bigserial primary key, -- internal identifier of the directory appearing in an isochrone inner frontier sha1 sha1_git unique not null, -- intrinsic identifier of the directory date timestamptz not null -- max timestamp among those of the directory children's ); comment on column directory.id is 'Directory internal identifier'; comment on column directory.sha1 is 'Directory intrinsic identifier'; comment on column directory.date is 'Latest timestamp for the content in the directory'; +create table directory_in_rev +( + dir bigint not null, -- internal identifier of the directory appearing in the revision + rev bigint not null -- internal identifier of the revision containing the directory +\if :dbflavor_with_path + , + loc bigint not null -- location of the directory relative to the revision root directory +\endif + -- foreign key (dir) references directory (id), + -- foreign key (rev) references revision (id), + -- foreign key (loc) references location (id) +); +comment on column directory_in_rev.dir is 'Directory internal identifier'; +comment on column directory_in_rev.rev is 'Revision internal identifier'; +\if :dbflavor_with_path +comment on column directory_in_rev.loc is 'Location of directory in revision'; +\endif + create table origin ( id bigserial primary key, -- internal identifier of the origin url unix_path unique not null -- url of the origin ); comment on column origin.id is 'Origin internal identifier'; comment on column origin.url is 'URL of the origin'; create table revision ( id bigserial primary key, -- internal identifier of the revision sha1 sha1_git unique not null, -- intrinsic identifier of the revision date timestamptz not null, -- timestamp of the revision org bigint -- id of the preferred origin -- foreign key (org) references origin (id) ); comment on column revision.id is 'Revision internal identifier'; comment on column revision.sha1 is 'Revision intrinsic identifier'; comment on column revision.date is 'Revision timestamp'; comment on column revision.org is 'preferred origin for the revision'; create table revision_before_rev ( prev bigserial not null, -- internal identifier of the source revision next bigserial not null, -- internal identifier of the destination revision primary key (prev, next) -- foreign key (prev) references revision (id), -- foreign key (next) references revision (id) ); comment on column revision_before_rev.prev is 'Source revision internal identifier'; comment on column revision_before_rev.next is 'Destination revision internal identifier'; create table revision_in_org ( rev bigint not null, -- internal identifier of the revision poined by the origin org bigint not null, -- internal identifier of the origin that points to the revision primary key (rev, org) -- foreign key (rev) references revision (id), -- foreign key (org) references origin (id) ); comment on column revision_in_org.rev is 'Revision internal identifier'; comment on column revision_in_org.org is 'Origin internal identifier'; + +\if :dbflavor_with_path +create table location +( + id bigserial primary key, -- internal identifier of the location + path unix_path unique not null -- path to the location +); +comment on column location.id is 'Location internal identifier'; +comment on column location.path is 'Path to the location'; +\endif diff --git a/swh/provenance/sql/35-schema-with-path-flavor.sql b/swh/provenance/sql/35-schema-with-path-flavor.sql deleted file mode 100644 index e28bb3e..0000000 --- a/swh/provenance/sql/35-schema-with-path-flavor.sql +++ /dev/null @@ -1,46 +0,0 @@ -create table content_early_in_rev -( - blob bigint not null, -- internal identifier of the content blob - rev bigint not null, -- internal identifier of the revision where the blob appears for the first time - loc bigint not null -- location of the content relative to the revision root directory - -- foreign key (blob) references content (id), - -- foreign key (rev) references revision (id), - -- foreign key (loc) references location (id) -); -comment on column content_early_in_rev.blob is 'Content internal identifier'; -comment on column content_early_in_rev.rev is 'Revision internal identifier'; -comment on column content_early_in_rev.loc is 'Location of content in revision'; - -create table content_in_dir -( - blob bigint not null, -- internal identifier of the content blob - dir bigint not null, -- internal identifier of the directory containing the blob - loc bigint not null -- location of the content relative to its parent directory in the isochrone frontier - -- foreign key (blob) references content (id), - -- foreign key (dir) references directory (id), - -- foreign key (loc) references location (id) -); -comment on column content_in_dir.blob is 'Content internal identifier'; -comment on column content_in_dir.dir is 'Directory internal identifier'; -comment on column content_in_dir.loc is 'Location of content in directory'; - -create table directory_in_rev -( - dir bigint not null, -- internal identifier of the directory appearing in the revision - rev bigint not null, -- internal identifier of the revision containing the directory - loc bigint not null -- location of the directory relative to the revision root directory - -- foreign key (dir) references directory (id), - -- foreign key (rev) references revision (id), - -- foreign key (loc) references location (id) -); -comment on column directory_in_rev.dir is 'Directory internal identifier'; -comment on column directory_in_rev.rev is 'Revision internal identifier'; -comment on column directory_in_rev.loc is 'Location of directory in revision'; - -create table location -( - id bigserial primary key, -- internal identifier of the location - path unix_path unique not null -- path to the location -); -comment on column location.id is 'Location internal identifier'; -comment on column location.path is 'Path to the location'; diff --git a/swh/provenance/sql/35-schema-without-path-flavor.sql b/swh/provenance/sql/35-schema-without-path-flavor.sql deleted file mode 100644 index 9222595..0000000 --- a/swh/provenance/sql/35-schema-without-path-flavor.sql +++ /dev/null @@ -1,29 +0,0 @@ -create table content_early_in_rev -( - blob bigint not null, -- internal identifier of the content blob - rev bigint not null -- internal identifier of the revision where the blob appears for the first time - -- foreign key (blob) references content (id), - -- foreign key (rev) references revision (id) -); -comment on column content_early_in_rev.blob is 'Content internal identifier'; -comment on column content_early_in_rev.rev is 'Revision internal identifier'; - -create table content_in_dir -( - blob bigint not null, -- internal identifier of the content blob - dir bigint not null -- internal identifier of the directory containing the blob - -- foreign key (blob) references content (id), - -- foreign key (dir) references directory (id) -); -comment on column content_in_dir.blob is 'Content internal identifier'; -comment on column content_in_dir.dir is 'Directory internal identifier'; - -create table directory_in_rev -( - dir bigint not null, -- internal identifier of the directory appearing in the revision - rev bigint not null -- internal identifier of the revision containing the directory - -- foreign key (dir) references directory (id), - -- foreign key (rev) references revision (id) -); -comment on column directory_in_rev.dir is 'Directory internal identifier'; -comment on column directory_in_rev.rev is 'Revision internal identifier'; diff --git a/swh/provenance/sql/60-indexes-with-path-flavor.sql b/swh/provenance/sql/60-indexes-with-path-flavor.sql deleted file mode 100644 index 9190b7c..0000000 --- a/swh/provenance/sql/60-indexes-with-path-flavor.sql +++ /dev/null @@ -1,3 +0,0 @@ -alter table content_early_in_rev add primary key (blob, rev, loc); -alter table content_in_dir add primary key (blob, dir, loc); -alter table directory_in_rev add primary key (dir, rev, loc); diff --git a/swh/provenance/sql/60-indexes-without-path-flavor.sql b/swh/provenance/sql/60-indexes-without-path-flavor.sql deleted file mode 100644 index daf925c..0000000 --- a/swh/provenance/sql/60-indexes-without-path-flavor.sql +++ /dev/null @@ -1,3 +0,0 @@ -alter table content_early_in_rev add primary key (blob, rev); -alter table content_in_dir add primary key (blob, dir); -alter table directory_in_rev add primary key (dir, rev); diff --git a/swh/provenance/sql/60-indexes.sql b/swh/provenance/sql/60-indexes.sql new file mode 100644 index 0000000..a3dd4e0 --- /dev/null +++ b/swh/provenance/sql/60-indexes.sql @@ -0,0 +1,12 @@ +-- psql variables to get the current database flavor +select swh_get_dbflavor() = 'with-path' as dbflavor_with_path \gset + +\if :dbflavor_with_path +alter table content_early_in_rev add primary key (blob, rev, loc); +alter table content_in_dir add primary key (blob, dir, loc); +alter table directory_in_rev add primary key (dir, rev, loc); +\else +alter table content_early_in_rev add primary key (blob, rev); +alter table content_in_dir add primary key (blob, dir); +alter table directory_in_rev add primary key (dir, rev); +\endif diff --git a/swh/provenance/tests/test_cli.py b/swh/provenance/tests/test_cli.py index 51465fa..e0f332b 100644 --- a/swh/provenance/tests/test_cli.py +++ b/swh/provenance/tests/test_cli.py @@ -1,42 +1,111 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from click.testing import CliRunner +import psycopg2 +import pytest import yaml from swh.core.cli import swh as swhmain +import swh.core.cli.db # noqa ; ensure cli is loaded import swh.provenance.cli # noqa ; ensure cli is loaded def test_cli_swh_db_help(): # swhmain.add_command(provenance_cli) result = CliRunner().invoke(swhmain, ["provenance", "-h"]) assert result.exit_code == 0 assert "Commands:" in result.output commands = result.output.split("Commands:")[1] for command in ( "create", "find-all", "find-first", "iter-origins", "iter-revisions", ): assert f" {command} " in commands def test_cli_create_deprecated(provenance_db, tmp_path): conffile = tmp_path / "config.yml" conf = { - "provenance": { - "cls": "local", - "with_path": True, - }, + "provenance": {"cls": "local", "with_path": True,}, } yaml.dump(conf, conffile.open("w")) result = CliRunner().invoke( swhmain, ["provenance", "--config-file", str(conffile), "create", "--drop"] ) assert result.exit_code == 0, result.output assert "DeprecationWarning" in result.output + + +TABLES = { + "dbflavor", + "dbversion", + "content", + "content_early_in_rev", + "content_in_dir", + "directory", + "directory_in_rev", + "origin", + "revision", + "revision_before_rev", + "revision_in_org", +} + + +@pytest.mark.parametrize( + "flavor, dbtables", (("with-path", TABLES | {"location"}), ("without-path", TABLES)) +) +def test_cli_db_create_and_init_db_with_flavor( + monkeypatch, postgresql, flavor, dbtables +): + """Test that 'swh db init provenance' works with flavors + + for both with-path and without-path flavors""" + + dbname = f"{flavor}-db" + + # DB creation using 'swh db create' + db_params = postgresql.get_dsn_parameters() + monkeypatch.setenv("PGHOST", db_params["host"]) + monkeypatch.setenv("PGUSER", db_params["user"]) + monkeypatch.setenv("PGPORT", db_params["port"]) + result = CliRunner().invoke(swhmain, ["db", "create", "-d", dbname, "provenance"]) + assert result.exit_code == 0, result.output + + # DB init using 'swh db init' + result = CliRunner().invoke( + swhmain, ["db", "init", "-d", dbname, "--flavor", flavor, "provenance"] + ) + assert result.exit_code == 0, result.output + assert f"(flavor {flavor})" in result.output + + db_params["dbname"] = dbname + cnx = psycopg2.connect(**db_params) + # check the DB looks OK (check for db_flavor and expected tables) + with cnx.cursor() as cur: + cur.execute("select swh_get_dbflavor()") + assert cur.fetchone() == (flavor,) + + cur.execute( + "select table_name from information_schema.tables " + "where table_schema = 'public' " + f"and table_catalog = '{dbname}'" + ) + tables = set(x for (x,) in cur.fetchall()) + assert tables == dbtables + + +def test_cli_init_db_default_flavor(provenance_db): + "Test that 'swh db init provenance' defaults to a with-path flavored DB" + dbname = provenance_db.dsn + result = CliRunner().invoke(swhmain, ["db", "init", "-d", dbname, "provenance"]) + assert result.exit_code == 0, result.output + + with provenance_db.cursor() as cur: + cur.execute("select swh_get_dbflavor()") + assert cur.fetchone() == ("with-path",)