Page MenuHomeSoftware Heritage

D6921.id25078.diff
No OneTemporary

D6921.id25078.diff

diff --git a/sql/updates/33.sql b/sql/updates/33.sql
new file mode 100644
--- /dev/null
+++ b/sql/updates/33.sql
@@ -0,0 +1,145 @@
+-- SWH DB schema upgrade
+-- from_version: 32
+-- to_version: 33
+-- description: Archive old task/task_run tasks
+
+insert into dbversion (version, release, description)
+ values (33, now(), 'Work In Progress');
+
+--------------------
+-- Schema adaptation
+--------------------
+
+-- This takes care of copying everything that matters from the old table to the new
+-- (sequences, comments...). Keeping what could slow down the copy on the side (indexes,
+-- constraints). Those will be installed at the end of the migration.
+
+create table new_task (
+ like task
+ including all
+ excluding indexes
+ excluding constraints
+);
+
+create table new_task_run (
+ like task_run
+ including all
+ excluding indexes
+ excluding constraints
+);
+
+------------------------
+-- Actual data migration
+------------------------
+
+insert into new_task(id, type, arguments, next_run, current_interval, status, policy,
+ retries_left, priority)
+select id, type, arguments, next_run, current_interval, status, policy,
+ retries_left, priority
+from task
+where (
+ policy='recurring' and (type = 'load-nixguix' or
+ type like 'list-%' or
+ type like 'index-%')
+ ) or (
+ policy = 'oneshot' and next_run > now() - interval '2 months'
+ );
+
+-- Keep only the necessary task runs (reusing the previous filtered task entries to
+-- create a consistency dataset faster)
+insert into new_task_run
+select id, task, backend_id, scheduled, started, ended, metadata, status
+from task_run where task in (
+ select distinct id from new_task
+);
+
+select last_value from task_id_seq;
+select last_value from task_run_id_seq;
+
+-----------
+-- Renaming
+-----------
+
+-- Rename current tables to archive_ prefixed names tables
+alter table task rename to archive_task;
+alter table task_run rename to archive_task_run;
+
+-- Rename new tables to standard tables
+alter table new_task rename to task;
+alter table new_task_run rename to task_run;
+
+-----------------------------
+-- Check sequence consistency
+-----------------------------
+
+select last_value from task_id_seq;
+select last_value from task_run_id_seq;
+
+--------------
+-- PKs and FKs
+--------------
+
+alter table task
+add primary key(id);
+
+alter table task
+alter column type set not null;
+
+alter table task
+add constraint task_type_fk
+foreign key (type) references task_type (type);
+
+alter table task
+add constraint task_priority_fk
+foreign key (priority) references priority_ratio (id);
+
+alter table task
+add constraint task_check_policy
+check (policy <> 'recurring' or current_interval is not null)
+not valid;
+
+alter table task
+ validate constraint task_check_policy;
+
+alter table task_run
+add primary key(id);
+
+alter table task_run
+alter column status set not null,
+alter column status set default 'scheduled';
+
+alter table task_run
+alter column task set not null,
+add constraint task_id_fk
+foreign key (task) references task (id);
+
+----------
+-- Indexes
+----------
+
+create index on task(type);
+create index on task(next_run);
+
+create index on task using btree(type, md5(arguments::text));
+create index on task(priority);
+
+create index on task_run(task);
+create index on task_run(backend_id);
+
+create index on task_run(task asc, started asc);
+
+create index on task(type, next_run)
+where status = 'next_run_not_scheduled'::task_status;
+
+----------
+-- Trigger
+----------
+
+-- Drop trigger on archive table
+drop trigger update_task_on_task_end on archive_task_run;
+
+create trigger update_task_on_task_end
+ after update of status on task_run
+ for each row
+ when (new.status NOT IN ('scheduled', 'started'))
+ execute procedure swh_scheduler_update_task_on_task_end ();
diff --git a/swh/scheduler/sql/30-schema.sql b/swh/scheduler/sql/30-schema.sql
--- a/swh/scheduler/sql/30-schema.sql
+++ b/swh/scheduler/sql/30-schema.sql
@@ -11,7 +11,7 @@
comment on column dbversion.description is 'Version description';
insert into dbversion (version, release, description)
- values (32, now(), 'Work In Progress');
+ values (33, now(), 'Work In Progress');
create table task_type (
type text primary key,

File Metadata

Mime Type
text/plain
Expires
Wed, Jul 2, 10:51 AM (1 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224831

Event Timeline