diff --git a/sql/upgrades/163.sql b/sql/upgrades/163.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/163.sql @@ -0,0 +1,29 @@ +-- SWH DB schema upgrade +-- from_version: 162 +-- to_version: 163 +-- description: Register database flavor scaffolding + +insert into dbversion(version, release, description) + values(163, now(), 'Work In Progress'); +-- database flavor +create type database_flavor as enum ( + 'default', -- default: full index availability for deduplication and read queries + 'mirror', -- mirror: reduced indexes to allow for out of order insertions + 'read_replica' -- read replica: minimal indexes to allow read queries +); +comment on type database_flavor is 'Flavor of the current database'; + +create table dbflavor ( + flavor database_flavor, + single_row char(1) primary key default 'x', + check (single_row = 'x') +); +comment on table dbflavor is 'Database flavor storage'; +comment on column dbflavor.flavor is 'Database flavor currently deployed'; +comment on column dbflavor.single_row is 'Bogus column to force the table to have a single row'; + +create or replace function swh_get_dbflavor() returns database_flavor language sql stable as $$ + select coalesce((select flavor from dbflavor), 'default'); +$$; + +comment on function swh_get_dbflavor is 'Get the flavor of the database currently deployed'; diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py --- a/swh/storage/postgresql/db.py +++ b/swh/storage/postgresql/db.py @@ -28,7 +28,7 @@ """ - current_version = 162 + current_version = 163 def mktemp_dir_entry(self, entry_type, cur=None): self._cursor(cur).execute( diff --git a/swh/storage/pytest_plugin.py b/swh/storage/pytest_plugin.py --- a/swh/storage/pytest_plugin.py +++ b/swh/storage/pytest_plugin.py @@ -5,6 +5,8 @@ import glob from os import environ, path + +import subprocess from typing import Union import pytest @@ -109,16 +111,24 @@ self.dump_files = sorted(glob.glob(dump_files), key=sortkey) def db_setup(self): - with psycopg2.connect( - dbname=self.db_name, user=self.user, host=self.host, port=self.port, - ) as cnx: - with cnx.cursor() as cur: - for fname in self.dump_files: - with open(fname) as fobj: - sql = fobj.read().replace("concurrently", "").strip() - if sql: - cur.execute(sql) - cnx.commit() + conninfo = ( + f"host={self.host} user={self.user} port={self.port} dbname={self.db_name}" + ) + + for fname in self.dump_files: + subprocess.check_call( + [ + "psql", + "--quiet", + "--no-psqlrc", + "-v", + "ON_ERROR_STOP=1", + "-d", + conninfo, + "-f", + fname, + ] + ) def db_reset(self): with psycopg2.connect( diff --git a/swh/storage/sql/15-flavor.sql b/swh/storage/sql/15-flavor.sql new file mode 100644 --- /dev/null +++ b/swh/storage/sql/15-flavor.sql @@ -0,0 +1,22 @@ +-- database flavor +create type database_flavor as enum ( + 'default', -- default: full index availability for deduplication and read queries + 'mirror', -- mirror: reduced indexes to allow for out of order insertions + 'read_replica' -- read replica: minimal indexes to allow read queries +); +comment on type database_flavor is 'Flavor of the current database'; + +create table dbflavor ( + flavor database_flavor, + single_row char(1) primary key default 'x', + check (single_row = 'x') +); +comment on table dbflavor is 'Database flavor storage'; +comment on column dbflavor.flavor is 'Database flavor currently deployed'; +comment on column dbflavor.single_row is 'Bogus column to force the table to have a single row'; + +create or replace function swh_get_dbflavor() returns database_flavor language sql stable as $$ + select coalesce((select flavor from dbflavor), 'default'); +$$; + +comment on function swh_get_dbflavor is 'Get the flavor of the database currently deployed'; diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql --- a/swh/storage/sql/30-schema.sql +++ b/swh/storage/sql/30-schema.sql @@ -17,7 +17,7 @@ -- latest schema version insert into dbversion(version, release, description) - values(162, now(), 'Work In Progress'); + values(163, now(), 'Work In Progress'); -- a SHA1 checksum create domain sha1 as bytea check (length(value) = 20); diff --git a/swh/storage/sql/60-indexes.sql b/swh/storage/sql/60-indexes.sql --- a/swh/storage/sql/60-indexes.sql +++ b/swh/storage/sql/60-indexes.sql @@ -1,28 +1,49 @@ +-- psql variables to get the current database flavor + +select swh_get_dbflavor() = 'read_replica' as dbflavor_read_replica \gset +select swh_get_dbflavor() != 'read_replica' as dbflavor_does_deduplication \gset +select swh_get_dbflavor() = 'mirror' as dbflavor_mirror \gset +select swh_get_dbflavor() = 'default' as dbflavor_default \gset + -- content create unique index concurrently content_pkey on content(sha1); -create unique index concurrently on content(sha1_git); +alter table content add primary key using index content_pkey; + +\if :dbflavor_does_deduplication + create unique index concurrently on content(sha1_git); +\else + create index concurrently on content(sha1_git); +\endif + create index concurrently on content(sha256); create index concurrently on content(blake2s256); -create index concurrently on content(ctime); -- TODO use a BRIN index here (postgres >= 9.5) -create unique index concurrently on content(object_id); - -alter table content add primary key using index content_pkey; +\if :dbflavor_default + create unique index concurrently on content(object_id); -- to be reviewed + create index concurrently on content(ctime); -- to be reviewed +\endif -- origin create unique index concurrently origin_pkey on origin(id); -create unique index concurrently on origin using btree(url); +alter table origin add primary key using index origin_pkey; + +\if :dbflavor_does_deduplication + create unique index concurrently on origin using btree(url); +\else + create index concurrently on origin using btree(url); +\endif + create index concurrently on origin using gin (url gin_trgm_ops); create index concurrently on origin using btree(digest(url, 'sha1')); -alter table origin add primary key using index origin_pkey; - -- skipped_content -alter table skipped_content add constraint skipped_content_sha1_sha1_git_sha256_key unique (sha1, sha1_git, sha256); +\if :dbflavor_does_deduplication + alter table skipped_content add constraint skipped_content_sha1_sha1_git_sha256_key unique (sha1, sha1_git, sha256); +\endif create index concurrently on skipped_content(sha1); create index concurrently on skipped_content(sha1_git); @@ -30,174 +51,228 @@ create index concurrently on skipped_content(blake2s256); create unique index concurrently on skipped_content(object_id); -alter table skipped_content add constraint skipped_content_origin_fkey foreign key (origin) references origin(id) not valid; -alter table skipped_content validate constraint skipped_content_origin_fkey; +\if :dbflavor_default + alter table skipped_content add constraint skipped_content_origin_fkey foreign key (origin) references origin(id) not valid; + alter table skipped_content validate constraint skipped_content_origin_fkey; +\endif -- directory - create unique index concurrently directory_pkey on directory(id); alter table directory add primary key using index directory_pkey; -create index concurrently on directory using gin (dir_entries); -create index concurrently on directory using gin (file_entries); -create index concurrently on directory using gin (rev_entries); -create unique index concurrently on directory(object_id); +\if :dbflavor_default + create index concurrently on directory using gin (dir_entries); -- to be reviewed + create index concurrently on directory using gin (file_entries); -- to be reviewed + create index concurrently on directory using gin (rev_entries); -- to be reviewed + create unique index concurrently on directory(object_id); -- to be reviewed +\endif -- directory_entry_dir create unique index concurrently directory_entry_dir_pkey on directory_entry_dir(id); alter table directory_entry_dir add primary key using index directory_entry_dir_pkey; -create unique index concurrently on directory_entry_dir(target, name, perms); +\if :dbflavor_does_deduplication + create unique index concurrently on directory_entry_dir(target, name, perms); +\endif -- directory_entry_file create unique index concurrently directory_entry_file_pkey on directory_entry_file(id); alter table directory_entry_file add primary key using index directory_entry_file_pkey; -create unique index concurrently on directory_entry_file(target, name, perms); +\if :dbflavor_does_deduplication + create unique index concurrently on directory_entry_file(target, name, perms); +\endif -- directory_entry_rev create unique index concurrently directory_entry_rev_pkey on directory_entry_rev(id); alter table directory_entry_rev add primary key using index directory_entry_rev_pkey; -create unique index concurrently on directory_entry_rev(target, name, perms); +\if :dbflavor_does_deduplication + create unique index concurrently on directory_entry_rev(target, name, perms); +\endif + -- person create unique index concurrently person_pkey on person(id); alter table person add primary key using index person_pkey; -create unique index concurrently on person(fullname); -create index concurrently on person(name); -create index concurrently on person(email); +\if :dbflavor_does_deduplication + create unique index concurrently on person(fullname); +\else + create index concurrently on person(fullname); -- to be reviewed +\endif + +\if :dbflavor_default + create index concurrently on person(name); -- to be reviewed + create index concurrently on person(email); -- to be reviewed +\endif -- revision create unique index concurrently revision_pkey on revision(id); alter table revision add primary key using index revision_pkey; -alter table revision add constraint revision_author_fkey foreign key (author) references person(id) not valid; -alter table revision validate constraint revision_author_fkey; -alter table revision add constraint revision_committer_fkey foreign key (committer) references person(id) not valid; -alter table revision validate constraint revision_committer_fkey; +\if :dbflavor_does_deduplication + alter table revision add constraint revision_author_fkey foreign key (author) references person(id) not valid; + alter table revision validate constraint revision_author_fkey; + alter table revision add constraint revision_committer_fkey foreign key (committer) references person(id) not valid; + alter table revision validate constraint revision_committer_fkey; -alter table revision + alter table revision add constraint revision_date_neg_utc_offset_not_null check (date is null or date_neg_utc_offset is not null) not valid; -alter table revision + alter table revision add constraint revision_committer_date_neg_utc_offset_not_null check (committer_date is null or committer_date_neg_utc_offset is not null) not valid; -alter table revision + alter table revision validate constraint revision_date_neg_utc_offset_not_null; -alter table revision + alter table revision validate constraint revision_committer_date_neg_utc_offset_not_null; +\endif -create index concurrently on revision(directory); -create unique index concurrently on revision(object_id); +\if :dbflavor_default + create index concurrently on revision(directory); -- to be reviewed + create unique index concurrently on revision(object_id); -- to be reviewed +\endif -- revision_history create unique index concurrently revision_history_pkey on revision_history(id, parent_rank); alter table revision_history add primary key using index revision_history_pkey; -create index concurrently on revision_history(parent_id); +\if :dbflavor_default + create index concurrently on revision_history(parent_id); -- to be reviewed +\endif -alter table revision_history add constraint revision_history_id_fkey foreign key (id) references revision(id) not valid; -alter table revision_history validate constraint revision_history_id_fkey; +\if :dbflavor_does_deduplication + alter table revision_history add constraint revision_history_id_fkey foreign key (id) references revision(id) not valid; + alter table revision_history validate constraint revision_history_id_fkey; +\endif -- snapshot create unique index concurrently snapshot_pkey on snapshot(object_id); alter table snapshot add primary key using index snapshot_pkey; -create unique index concurrently on snapshot(id); +\if :dbflavor_does_deduplication + create unique index concurrently on snapshot(id); +\else + create index concurrently on snapshot(id); +\endif -- snapshot_branch create unique index concurrently snapshot_branch_pkey on snapshot_branch(object_id); alter table snapshot_branch add primary key using index snapshot_branch_pkey; -create unique index concurrently on snapshot_branch (target_type, target, name); -alter table snapshot_branch add constraint snapshot_branch_target_check check ((target_type is null) = (target is null)) not valid; -alter table snapshot_branch validate constraint snapshot_branch_target_check; -alter table snapshot_branch add constraint snapshot_target_check check (target_type not in ('content', 'directory', 'revision', 'release', 'snapshot') or length(target) = 20) not valid; -alter table snapshot_branch validate constraint snapshot_target_check; +\if :dbflavor_does_deduplication + create unique index concurrently on snapshot_branch (target_type, target, name); + alter table snapshot_branch add constraint snapshot_branch_target_check check ((target_type is null) = (target is null)) not valid; + alter table snapshot_branch validate constraint snapshot_branch_target_check; + alter table snapshot_branch add constraint snapshot_target_check check (target_type not in ('content', 'directory', 'revision', 'release', 'snapshot') or length(target) = 20) not valid; + alter table snapshot_branch validate constraint snapshot_target_check; -create unique index concurrently on snapshot_branch (name) where target_type is null and target is null; + create unique index concurrently on snapshot_branch (name) where target_type is null and target is null; +\endif -- snapshot_branches create unique index concurrently snapshot_branches_pkey on snapshot_branches(snapshot_id, branch_id); alter table snapshot_branches add primary key using index snapshot_branches_pkey; -alter table snapshot_branches add constraint snapshot_branches_snapshot_id_fkey foreign key (snapshot_id) references snapshot(object_id) not valid; -alter table snapshot_branches validate constraint snapshot_branches_snapshot_id_fkey; +\if :dbflavor_does_deduplication + alter table snapshot_branches add constraint snapshot_branches_snapshot_id_fkey foreign key (snapshot_id) references snapshot(object_id) not valid; + alter table snapshot_branches validate constraint snapshot_branches_snapshot_id_fkey; -alter table snapshot_branches add constraint snapshot_branches_branch_id_fkey foreign key (branch_id) references snapshot_branch(object_id) not valid; -alter table snapshot_branches validate constraint snapshot_branches_branch_id_fkey; + alter table snapshot_branches add constraint snapshot_branches_branch_id_fkey foreign key (branch_id) references snapshot_branch(object_id) not valid; + alter table snapshot_branches validate constraint snapshot_branches_branch_id_fkey; +\endif -- origin_visit create unique index concurrently origin_visit_pkey on origin_visit(origin, visit); alter table origin_visit add primary key using index origin_visit_pkey; -create index concurrently on origin_visit(date); -create index concurrently origin_visit_type_date on origin_visit(type, date); +\if :dbflavor_default + create index concurrently on origin_visit(date); -- to be reviewed + create index concurrently origin_visit_type_date on origin_visit(type, date); -- to be reviewed +\endif -alter table origin_visit add constraint origin_visit_origin_fkey foreign key (origin) references origin(id) not valid; -alter table origin_visit validate constraint origin_visit_origin_fkey; +\if :dbflavor_does_deduplication + alter table origin_visit add constraint origin_visit_origin_fkey foreign key (origin) references origin(id) not valid; + alter table origin_visit validate constraint origin_visit_origin_fkey; +\endif -- origin_visit_status create unique index concurrently origin_visit_status_pkey on origin_visit_status(origin, visit, date); alter table origin_visit_status add primary key using index origin_visit_status_pkey; -alter table origin_visit_status - add constraint origin_visit_status_origin_visit_fkey - foreign key (origin, visit) - references origin_visit(origin, visit) not valid; -alter table origin_visit_status validate constraint origin_visit_status_origin_visit_fkey; +\if :dbflavor_default + alter table origin_visit_status + add constraint origin_visit_status_origin_visit_fkey + foreign key (origin, visit) + references origin_visit(origin, visit) not valid; + alter table origin_visit_status validate constraint origin_visit_status_origin_visit_fkey; +\endif -- release create unique index concurrently release_pkey on release(id); alter table release add primary key using index release_pkey; -create index concurrently on release(target, target_type); -create unique index concurrently on release(object_id); +\if :dbflavor_default + create index concurrently on release(target, target_type); -- to be reviewed + create unique index concurrently on release(object_id); -- to be reviewed +\endif -alter table release add constraint release_author_fkey foreign key (author) references person(id) not valid; -alter table release validate constraint release_author_fkey; +\if :dbflavor_does_deduplication + alter table release add constraint release_author_fkey foreign key (author) references person(id) not valid; + alter table release validate constraint release_author_fkey; -alter table release + alter table release add constraint release_date_neg_utc_offset_not_null check (date is null or date_neg_utc_offset is not null) not valid; -alter table release + alter table release validate constraint release_date_neg_utc_offset_not_null; --- if the author is null, then the date must be null -alter table release add constraint release_author_date_check check ((date is null) or (author is not null)) not valid; -alter table release validate constraint release_author_date_check; + -- if the author is null, then the date must be null + alter table release add constraint release_author_date_check check ((date is null) or (author is not null)) not valid; + alter table release validate constraint release_author_date_check; +\endif -- metadata_fetcher create unique index metadata_fetcher_pkey on metadata_fetcher(id); alter table metadata_fetcher add primary key using index metadata_fetcher_pkey; -create unique index metadata_fetcher_name_version on metadata_fetcher(name, version); +\if :dbflavor_does_deduplication + create unique index metadata_fetcher_name_version on metadata_fetcher(name, version); +\else + create index metadata_fetcher_name_version on metadata_fetcher(name, version); +\endif -- metadata_authority create unique index concurrently metadata_authority_pkey on metadata_authority(id); alter table metadata_authority add primary key using index metadata_authority_pkey; -create unique index metadata_authority_type_url on metadata_authority(type, url); +\if :dbflavor_does_deduplication + create unique index concurrently metadata_authority_type_url on metadata_authority(type, url); +\else + create index concurrently metadata_authority_type_url on metadata_authority(type, url); +\endif + -- raw_extrinsic_metadata create unique index concurrently raw_extrinsic_metadata_content_authority_date_fetcher on raw_extrinsic_metadata(id, authority_id, discovery_date, fetcher_id); -alter table raw_extrinsic_metadata add constraint raw_extrinsic_metadata_authority_fkey foreign key (authority_id) references metadata_authority(id) not valid; -alter table raw_extrinsic_metadata validate constraint raw_extrinsic_metadata_authority_fkey; +\if :dbflavor_default + alter table raw_extrinsic_metadata add constraint raw_extrinsic_metadata_authority_fkey foreign key (authority_id) references metadata_authority(id) not valid; + alter table raw_extrinsic_metadata validate constraint raw_extrinsic_metadata_authority_fkey; -alter table raw_extrinsic_metadata add constraint raw_extrinsic_metadata_fetcher_fkey foreign key (fetcher_id) references metadata_fetcher(id) not valid; -alter table raw_extrinsic_metadata validate constraint raw_extrinsic_metadata_fetcher_fkey; + alter table raw_extrinsic_metadata add constraint raw_extrinsic_metadata_fetcher_fkey foreign key (fetcher_id) references metadata_fetcher(id) not valid; + alter table raw_extrinsic_metadata validate constraint raw_extrinsic_metadata_fetcher_fkey; +\endif -- object_counts create unique index concurrently object_counts_pkey on object_counts(object_type); diff --git a/swh/storage/sql/logical_replication/replication_source.sql b/swh/storage/sql/logical_replication/replication_source.sql new file mode 100644 --- /dev/null +++ b/swh/storage/sql/logical_replication/replication_source.sql @@ -0,0 +1,25 @@ +-- This file contains the instructions to create a replication source for +-- PostgreSQL logical replication to another database. + +CREATE PUBLICATION softwareheritage; + +ALTER PUBLICATION softwareheritage ADD TABLE content; +ALTER PUBLICATION softwareheritage ADD TABLE skipped_content; +ALTER PUBLICATION softwareheritage ADD TABLE directory; +ALTER PUBLICATION softwareheritage ADD TABLE directory_entry_file; +ALTER PUBLICATION softwareheritage ADD TABLE directory_entry_dir; +ALTER PUBLICATION softwareheritage ADD TABLE directory_entry_rev; +ALTER PUBLICATION softwareheritage ADD TABLE person; +ALTER PUBLICATION softwareheritage ADD TABLE revision; +ALTER PUBLICATION softwareheritage ADD TABLE revision_history; +ALTER PUBLICATION softwareheritage ADD TABLE release; +ALTER PUBLICATION softwareheritage ADD TABLE snapshot; +ALTER PUBLICATION softwareheritage ADD TABLE snapshot_branch; +ALTER PUBLICATION softwareheritage ADD TABLE snapshot_branches; +ALTER PUBLICATION softwareheritage ADD TABLE origin; +ALTER PUBLICATION softwareheritage ADD TABLE origin_visit; +ALTER PUBLICATION softwareheritage ADD TABLE origin_visit_status; +ALTER PUBLICATION softwareheritage ADD TABLE metadata_fetcher; +ALTER PUBLICATION softwareheritage ADD TABLE metadata_authority; +ALTER PUBLICATION softwareheritage ADD TABLE raw_extrinsic_metadata; +ALTER PUBLICATION softwareheritage ADD TABLE object_counts;