Page MenuHomeSoftware Heritage

D6812.id24687.diff
No OneTemporary

D6812.id24687.diff

diff --git a/sql/updates/32.sql b/sql/updates/32.sql
new file mode 100644
--- /dev/null
+++ b/sql/updates/32.sql
@@ -0,0 +1,57 @@
+-- SWH DB schema upgrade
+-- from_version: 31
+-- to_version: 32
+-- description: Use a temporary table to update scheduler_metrics
+
+insert into dbversion (version, release, description)
+ values (32, now(), 'Work In Progress');
+
+create or replace function update_metrics(lister_id uuid default NULL, ts timestamptz default now())
+ returns setof scheduler_metrics
+ language plpgsql
+as $$
+ begin
+ -- If we do the following select as a subquery in the insert statement below,
+ -- PostgreSQL prevents the use of parallel queries. So we do the select into a
+ -- temporary table, which doesn't suffer this limitation.
+
+ create temporary table tmp_update_metrics
+ on commit drop
+ as select
+ lo.lister_id, lo.visit_type, coalesce(ts, now()) as last_update,
+ count(*) as origins_known,
+ count(*) filter (where enabled) as origins_enabled,
+ count(*) filter (where
+ enabled and last_snapshot is NULL
+ ) as origins_never_visited,
+ count(*) filter (where
+ enabled and lo.last_update > last_successful
+ ) as origins_with_pending_changes
+ from listed_origins lo
+ left join origin_visit_stats ovs using (url, visit_type)
+ where
+ -- update only for the requested lister
+ update_metrics.lister_id = lo.lister_id
+ -- or for all listers if the function argument is null
+ or update_metrics.lister_id is null
+ group by (lo.lister_id, lo.visit_type);
+
+ return query
+ insert into scheduler_metrics (
+ lister_id, visit_type, last_update,
+ origins_known, origins_enabled,
+ origins_never_visited, origins_with_pending_changes
+ )
+ select * from tmp_update_metrics
+ on conflict on constraint scheduler_metrics_pkey do update
+ set
+ last_update = EXCLUDED.last_update,
+ origins_known = EXCLUDED.origins_known,
+ origins_enabled = EXCLUDED.origins_enabled,
+ origins_never_visited = EXCLUDED.origins_never_visited,
+ origins_with_pending_changes = EXCLUDED.origins_with_pending_changes
+ returning *;
+ end;
+$$;
+
+comment on function update_metrics(uuid, timestamptz) is 'Update metrics for the given lister_id';
diff --git a/swh/scheduler/sql/30-schema.sql b/swh/scheduler/sql/30-schema.sql
--- a/swh/scheduler/sql/30-schema.sql
+++ b/swh/scheduler/sql/30-schema.sql
@@ -11,7 +11,7 @@
comment on column dbversion.description is 'Version description';
insert into dbversion (version, release, description)
- values (31, now(), 'Work In Progress');
+ values (32, now(), 'Work In Progress');
create table task_type (
type text primary key,
diff --git a/swh/scheduler/sql/40-func.sql b/swh/scheduler/sql/40-func.sql
--- a/swh/scheduler/sql/40-func.sql
+++ b/swh/scheduler/sql/40-func.sql
@@ -363,14 +363,16 @@
create or replace function update_metrics(lister_id uuid default NULL, ts timestamptz default now())
returns setof scheduler_metrics
- language sql
+ language plpgsql
as $$
- insert into scheduler_metrics (
- lister_id, visit_type, last_update,
- origins_known, origins_enabled,
- origins_never_visited, origins_with_pending_changes
- )
- select
+ begin
+ -- If we do the following select as a subquery in the insert statement below,
+ -- PostgreSQL prevents the use of parallel queries. So we do the select into a
+ -- temporary table, which doesn't suffer this limitation.
+
+ create temporary table tmp_update_metrics
+ on commit drop
+ as select
lo.lister_id, lo.visit_type, coalesce(ts, now()) as last_update,
count(*) as origins_known,
count(*) filter (where enabled) as origins_enabled,
@@ -387,15 +389,24 @@
update_metrics.lister_id = lo.lister_id
-- or for all listers if the function argument is null
or update_metrics.lister_id is null
- group by (lister_id, visit_type)
- on conflict (lister_id, visit_type) do update
- set
- last_update = EXCLUDED.last_update,
- origins_known = EXCLUDED.origins_known,
- origins_enabled = EXCLUDED.origins_enabled,
- origins_never_visited = EXCLUDED.origins_never_visited,
- origins_with_pending_changes = EXCLUDED.origins_with_pending_changes
- returning *
+ group by (lo.lister_id, lo.visit_type);
+
+ return query
+ insert into scheduler_metrics (
+ lister_id, visit_type, last_update,
+ origins_known, origins_enabled,
+ origins_never_visited, origins_with_pending_changes
+ )
+ select * from tmp_update_metrics
+ on conflict on constraint scheduler_metrics_pkey do update
+ set
+ last_update = EXCLUDED.last_update,
+ origins_known = EXCLUDED.origins_known,
+ origins_enabled = EXCLUDED.origins_enabled,
+ origins_never_visited = EXCLUDED.origins_never_visited,
+ origins_with_pending_changes = EXCLUDED.origins_with_pending_changes
+ returning *;
+ end;
$$;
comment on function update_metrics(uuid, timestamptz) is 'Update metrics for the given lister_id';

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 8:12 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3215612

Event Timeline