diff --git a/sql/updates/14.sql b/sql/updates/14.sql new file mode 100644 --- /dev/null +++ b/sql/updates/14.sql @@ -0,0 +1,50 @@ +insert into dbversion (version, release, description) + values (14, now(), 'Work In Progress'); + +drop index task_args; +drop index task_kwargs; + +create index on task using btree(type, md5(arguments::text)); + +create or replace function swh_scheduler_create_tasks_from_temp () + returns setof task + language plpgsql +as $$ +begin + -- update the default values in one go + -- this is separated from the insert/select to avoid too much + -- juggling + update tmp_task t + set current_interval = tt.default_interval, + retries_left = coalesce(retries_left, tt.num_retries, 0) + from task_type tt + where tt.type=t.type; + + insert into task (type, arguments, next_run, status, current_interval, policy, + retries_left, priority) + select type, arguments, next_run, status, current_interval, policy, + retries_left, priority + from tmp_task t + where not exists(select 1 + from task + where type = t.type and + md5(arguments::text) = md5(t.arguments::text) and + arguments = t.arguments and + policy = t.policy and + priority is not distinct from t.priority and + status = t.status); + + return query + select distinct t.* + from tmp_task tt inner join task t on ( + tt.type = t.type and + md5(tt.arguments::text) = md5(t.arguments::text) and + tt.arguments = t.arguments and + tt.policy = t.policy and + tt.priority is not distinct from t.priority and + tt.status = t.status + ); +end; +$$; + +comment on function swh_scheduler_create_tasks_from_temp () is 'Create tasks in bulk from the temporary table'; diff --git a/swh/scheduler/sql/30-swh-schema.sql b/swh/scheduler/sql/30-swh-schema.sql --- a/swh/scheduler/sql/30-swh-schema.sql +++ b/swh/scheduler/sql/30-swh-schema.sql @@ -11,7 +11,7 @@ comment on column dbversion.description is 'Version description'; insert into dbversion (version, release, description) - values (13, now(), 'Work In Progress'); + values (14, now(), 'Work In Progress'); create table task_type ( type text primary key, diff --git a/swh/scheduler/sql/40-swh-func.sql b/swh/scheduler/sql/40-swh-func.sql --- a/swh/scheduler/sql/40-swh-func.sql +++ b/swh/scheduler/sql/40-swh-func.sql @@ -34,8 +34,8 @@ where not exists(select 1 from task where type = t.type and - arguments->'args' = t.arguments->'args' and - arguments->'kwargs' = t.arguments->'kwargs' and + md5(arguments::text) = md5(t.arguments::text) and + arguments = t.arguments and policy = t.policy and priority is not distinct from t.priority and status = t.status); @@ -44,8 +44,8 @@ select distinct t.* from tmp_task tt inner join task t on ( tt.type = t.type and - tt.arguments->'args' = t.arguments->'args' and - tt.arguments->'kwargs' = t.arguments->'kwargs' and + md5(tt.arguments::text) = md5(t.arguments::text) and + tt.arguments = t.arguments and tt.policy = t.policy and tt.priority is not distinct from t.priority and tt.status = t.status diff --git a/swh/scheduler/sql/60-swh-indexes.sql b/swh/scheduler/sql/60-swh-indexes.sql --- a/swh/scheduler/sql/60-swh-indexes.sql +++ b/swh/scheduler/sql/60-swh-indexes.sql @@ -1,7 +1,9 @@ create index on task(type); create index on task(next_run); -create index task_args on task using btree ((arguments -> 'args')); -create index task_kwargs on task using gin ((arguments -> 'kwargs')); + +-- used for quick equality checking +create index on task using btree(type, md5(arguments::text)); + create index on task(priority); create index on task_run(task);