Page MenuHomeSoftware Heritage

D2448.diff
No OneTemporary

D2448.diff

diff --git a/sql/updates/14.sql b/sql/updates/14.sql
new file mode 100644
--- /dev/null
+++ b/sql/updates/14.sql
@@ -0,0 +1,50 @@
+insert into dbversion (version, release, description)
+ values (14, now(), 'Work In Progress');
+
+drop index task_args;
+drop index task_kwargs;
+
+create index on task using btree(type, md5(arguments::text));
+
+create or replace function swh_scheduler_create_tasks_from_temp ()
+ returns setof task
+ language plpgsql
+as $$
+begin
+ -- update the default values in one go
+ -- this is separated from the insert/select to avoid too much
+ -- juggling
+ update tmp_task t
+ set current_interval = tt.default_interval,
+ retries_left = coalesce(retries_left, tt.num_retries, 0)
+ from task_type tt
+ where tt.type=t.type;
+
+ insert into task (type, arguments, next_run, status, current_interval, policy,
+ retries_left, priority)
+ select type, arguments, next_run, status, current_interval, policy,
+ retries_left, priority
+ from tmp_task t
+ where not exists(select 1
+ from task
+ where type = t.type and
+ md5(arguments::text) = md5(t.arguments::text) and
+ arguments = t.arguments and
+ policy = t.policy and
+ priority is not distinct from t.priority and
+ status = t.status);
+
+ return query
+ select distinct t.*
+ from tmp_task tt inner join task t on (
+ tt.type = t.type and
+ md5(tt.arguments::text) = md5(t.arguments::text) and
+ tt.arguments = t.arguments and
+ tt.policy = t.policy and
+ tt.priority is not distinct from t.priority and
+ tt.status = t.status
+ );
+end;
+$$;
+
+comment on function swh_scheduler_create_tasks_from_temp () is 'Create tasks in bulk from the temporary table';
diff --git a/swh/scheduler/sql/30-swh-schema.sql b/swh/scheduler/sql/30-swh-schema.sql
--- a/swh/scheduler/sql/30-swh-schema.sql
+++ b/swh/scheduler/sql/30-swh-schema.sql
@@ -11,7 +11,7 @@
comment on column dbversion.description is 'Version description';
insert into dbversion (version, release, description)
- values (13, now(), 'Work In Progress');
+ values (14, now(), 'Work In Progress');
create table task_type (
type text primary key,
diff --git a/swh/scheduler/sql/40-swh-func.sql b/swh/scheduler/sql/40-swh-func.sql
--- a/swh/scheduler/sql/40-swh-func.sql
+++ b/swh/scheduler/sql/40-swh-func.sql
@@ -34,8 +34,8 @@
where not exists(select 1
from task
where type = t.type and
- arguments->'args' = t.arguments->'args' and
- arguments->'kwargs' = t.arguments->'kwargs' and
+ md5(arguments::text) = md5(t.arguments::text) and
+ arguments = t.arguments and
policy = t.policy and
priority is not distinct from t.priority and
status = t.status);
@@ -44,8 +44,8 @@
select distinct t.*
from tmp_task tt inner join task t on (
tt.type = t.type and
- tt.arguments->'args' = t.arguments->'args' and
- tt.arguments->'kwargs' = t.arguments->'kwargs' and
+ md5(tt.arguments::text) = md5(t.arguments::text) and
+ tt.arguments = t.arguments and
tt.policy = t.policy and
tt.priority is not distinct from t.priority and
tt.status = t.status
diff --git a/swh/scheduler/sql/60-swh-indexes.sql b/swh/scheduler/sql/60-swh-indexes.sql
--- a/swh/scheduler/sql/60-swh-indexes.sql
+++ b/swh/scheduler/sql/60-swh-indexes.sql
@@ -1,7 +1,9 @@
create index on task(type);
create index on task(next_run);
-create index task_args on task using btree ((arguments -> 'args'));
-create index task_kwargs on task using gin ((arguments -> 'kwargs'));
+
+-- used for quick equality checking
+create index on task using btree(type, md5(arguments::text));
+
create index on task(priority);
create index on task_run(task);

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 3:20 PM (6 d, 11 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3222689

Event Timeline