Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124348
D6812.id24687.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D6812.id24687.diff
View Options
diff --git a/sql/updates/32.sql b/sql/updates/32.sql
new file mode 100644
--- /dev/null
+++ b/sql/updates/32.sql
@@ -0,0 +1,57 @@
+-- SWH DB schema upgrade
+-- from_version: 31
+-- to_version: 32
+-- description: Use a temporary table to update scheduler_metrics
+
+insert into dbversion (version, release, description)
+ values (32, now(), 'Work In Progress');
+
+create or replace function update_metrics(lister_id uuid default NULL, ts timestamptz default now())
+ returns setof scheduler_metrics
+ language plpgsql
+as $$
+ begin
+ -- If we do the following select as a subquery in the insert statement below,
+ -- PostgreSQL prevents the use of parallel queries. So we do the select into a
+ -- temporary table, which doesn't suffer this limitation.
+
+ create temporary table tmp_update_metrics
+ on commit drop
+ as select
+ lo.lister_id, lo.visit_type, coalesce(ts, now()) as last_update,
+ count(*) as origins_known,
+ count(*) filter (where enabled) as origins_enabled,
+ count(*) filter (where
+ enabled and last_snapshot is NULL
+ ) as origins_never_visited,
+ count(*) filter (where
+ enabled and lo.last_update > last_successful
+ ) as origins_with_pending_changes
+ from listed_origins lo
+ left join origin_visit_stats ovs using (url, visit_type)
+ where
+ -- update only for the requested lister
+ update_metrics.lister_id = lo.lister_id
+ -- or for all listers if the function argument is null
+ or update_metrics.lister_id is null
+ group by (lo.lister_id, lo.visit_type);
+
+ return query
+ insert into scheduler_metrics (
+ lister_id, visit_type, last_update,
+ origins_known, origins_enabled,
+ origins_never_visited, origins_with_pending_changes
+ )
+ select * from tmp_update_metrics
+ on conflict on constraint scheduler_metrics_pkey do update
+ set
+ last_update = EXCLUDED.last_update,
+ origins_known = EXCLUDED.origins_known,
+ origins_enabled = EXCLUDED.origins_enabled,
+ origins_never_visited = EXCLUDED.origins_never_visited,
+ origins_with_pending_changes = EXCLUDED.origins_with_pending_changes
+ returning *;
+ end;
+$$;
+
+comment on function update_metrics(uuid, timestamptz) is 'Update metrics for the given lister_id';
diff --git a/swh/scheduler/sql/30-schema.sql b/swh/scheduler/sql/30-schema.sql
--- a/swh/scheduler/sql/30-schema.sql
+++ b/swh/scheduler/sql/30-schema.sql
@@ -11,7 +11,7 @@
comment on column dbversion.description is 'Version description';
insert into dbversion (version, release, description)
- values (31, now(), 'Work In Progress');
+ values (32, now(), 'Work In Progress');
create table task_type (
type text primary key,
diff --git a/swh/scheduler/sql/40-func.sql b/swh/scheduler/sql/40-func.sql
--- a/swh/scheduler/sql/40-func.sql
+++ b/swh/scheduler/sql/40-func.sql
@@ -363,14 +363,16 @@
create or replace function update_metrics(lister_id uuid default NULL, ts timestamptz default now())
returns setof scheduler_metrics
- language sql
+ language plpgsql
as $$
- insert into scheduler_metrics (
- lister_id, visit_type, last_update,
- origins_known, origins_enabled,
- origins_never_visited, origins_with_pending_changes
- )
- select
+ begin
+ -- If we do the following select as a subquery in the insert statement below,
+ -- PostgreSQL prevents the use of parallel queries. So we do the select into a
+ -- temporary table, which doesn't suffer this limitation.
+
+ create temporary table tmp_update_metrics
+ on commit drop
+ as select
lo.lister_id, lo.visit_type, coalesce(ts, now()) as last_update,
count(*) as origins_known,
count(*) filter (where enabled) as origins_enabled,
@@ -387,15 +389,24 @@
update_metrics.lister_id = lo.lister_id
-- or for all listers if the function argument is null
or update_metrics.lister_id is null
- group by (lister_id, visit_type)
- on conflict (lister_id, visit_type) do update
- set
- last_update = EXCLUDED.last_update,
- origins_known = EXCLUDED.origins_known,
- origins_enabled = EXCLUDED.origins_enabled,
- origins_never_visited = EXCLUDED.origins_never_visited,
- origins_with_pending_changes = EXCLUDED.origins_with_pending_changes
- returning *
+ group by (lo.lister_id, lo.visit_type);
+
+ return query
+ insert into scheduler_metrics (
+ lister_id, visit_type, last_update,
+ origins_known, origins_enabled,
+ origins_never_visited, origins_with_pending_changes
+ )
+ select * from tmp_update_metrics
+ on conflict on constraint scheduler_metrics_pkey do update
+ set
+ last_update = EXCLUDED.last_update,
+ origins_known = EXCLUDED.origins_known,
+ origins_enabled = EXCLUDED.origins_enabled,
+ origins_never_visited = EXCLUDED.origins_never_visited,
+ origins_with_pending_changes = EXCLUDED.origins_with_pending_changes
+ returning *;
+ end;
$$;
comment on function update_metrics(uuid, timestamptz) is 'Update metrics for the given lister_id';
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 8:12 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3215612
Attached To
D6812: Use a temporary table to update scheduler metrics
Event Timeline
Log In to Comment