diff --git a/sql/upgrades/049.sql b/sql/upgrades/049.sql new file mode 100644 index 00000000..fdfd95bb --- /dev/null +++ b/sql/upgrades/049.sql @@ -0,0 +1,262 @@ +-- SWH DB schema upgrade +-- from_version: 48 +-- to_version: 49 +-- description: update the schemata for occurrence and occurrence_history + +insert into dbversion(version, release, description) + values(49, now(), 'Work In Progress'); + +CREATE TABLE origin_visit ( + origin bigint NOT NULL, + visit bigint NOT NULL, + "date" timestamp with time zone NOT NULL +); + +-- move occurrence_history to another table +alter table occurrence_history rename to old_occurrence_history; +alter index occurrence_history_pkey rename to old_occurrence_history_pkey; +alter index occurrence_history_origin_branch_idx rename to old_occurrence_history_origin_branch_idx; +alter index occurrence_history_target_target_type_idx rename to old_occurrence_history_target_target_type_idx; +alter table old_occurrence_history + rename constraint occurrence_history_authority_fkey to old_occurrence_history_authority_fkey; +alter table old_occurrence_history + rename constraint occurrence_history_origin_fkey to old_occurrence_history_origin_fkey; + +create table occurrence_history +( + origin bigint, + branch bytea, -- e.g., b"master" (for VCS), or b"sid" (for Debian) + target sha1_git, -- ref target, e.g., commit id + target_type object_type, -- ref target type + visits bigint[], + object_id bigserial -- short object identifier +); + +-- create origin_visit contents +with origins_visited as ( + select distinct origin, lower(validity) as date + from old_occurrence_history + where authority = '5f4d4c51-498a-4e28-88b3-b3e4e8396cba' -- swh + order by origin, date +) + insert into origin_visit (origin, date, visit) + select origin, date, row_number() over (partition by origin, date) + from origins_visited; + +ALTER TABLE origin_visit + ADD CONSTRAINT origin_visit_pkey PRIMARY KEY (origin, visit); + +ALTER TABLE origin_visit + ADD CONSTRAINT origin_visit_origin_fkey FOREIGN KEY (origin) REFERENCES origin(id); + +CREATE INDEX origin_visit_date_idx ON origin_visit USING btree (date); + +-- create new occurrence_history contents +insert into occurrence_history (origin, branch, target, target_type, object_id, visits) + select ooh.origin, branch, target, target_type, object_id, array[visit] + from old_occurrence_history ooh + left join origin_visit ov on ov.origin = ooh.origin and ov.date = lower(ooh.validity); + +ALTER TABLE occurrence_history + ADD CONSTRAINT occurrence_history_pkey PRIMARY KEY (object_id), + ADD CONSTRAINT occurrence_history_origin_fkey FOREIGN KEY (origin) REFERENCES origin(id); + +CREATE INDEX on occurrence_history(target, target_type); +CREATE INDEX on occurrence_history(origin, branch); + +-- drop table old_occurrence_history; + +-- create new occurrence contents +alter table occurrence + drop constraint occurrence_pkey, + drop constraint occurrence_origin_fkey; + +drop index if exists occurrence_target_target_type_idx; + + +create or replace function update_occurrence(origin_id bigint) returns void language sql as $$ + insert into occurrence (origin, branch, target, target_type) + select origin, branch, target, target_type from occurrence_history + where origin = origin_id + and (select visit from origin_visit + where origin = origin_id + order by date desc + limit 1) = any(visits); +$$; + +with updates as (select update_occurrence(id) from origin) select null; + +drop function update_occurrence(bigint); + +ALTER TABLE occurrence + ADD CONSTRAINT occurrence_pkey PRIMARY KEY (origin, branch), + ADD CONSTRAINT occurrence_origin_fkey FOREIGN KEY (origin) REFERENCES origin(id); + +CREATE INDEX occurrence_target_target_type_idx on occurrence(target, target_type); + + +CREATE OR REPLACE FUNCTION swh_mktemp_occurrence_history() RETURNS void + LANGUAGE sql + AS $$ + create temporary table tmp_occurrence_history( + like occurrence_history including defaults, + date timestamptz not null + ) on commit drop; + alter table tmp_occurrence_history + drop column visits, + drop column object_id; +$$; + +DROP FUNCTION swh_occurrence_get_by(bigint,bytea,timestamp with time zone); +CREATE OR REPLACE FUNCTION swh_occurrence_get_by(origin_id bigint, branch_name bytea = NULL::bytea, "date" timestamp with time zone = NULL::timestamp with time zone) RETURNS SETOF occurrence_history + LANGUAGE plpgsql + AS $$ +declare + filters text[] := array[] :: text[]; -- AND-clauses used to filter content + visit_id bigint; + q text; +begin + if origin_id is not null then + filters := filters || format('origin = %L', origin_id); + end if; + if branch_name is not null then + filters := filters || format('branch = %L', branch_name); + end if; + if date is not null then + if origin_id is null then + raise exception 'Needs an origin_id to filter by date.'; + end if; + select visit from swh_visit_find_by_date(origin_id, date) into visit_id; + if visit_id is null then + return; + end if; + filters := filters || format('%L = any(visits)', visit_id); + end if; + + if cardinality(filters) = 0 then + raise exception 'At least one filter amongst (origin_id, branch_name, validity) is needed'; + else + q = format('select * ' || + 'from occurrence_history ' || + 'where %s', + array_to_string(filters, ' and ')); + return query execute q; + end if; +end +$$; + +CREATE OR REPLACE FUNCTION swh_occurrence_history_add() RETURNS void + LANGUAGE plpgsql + AS $$ +declare + origin_id origin.id%type; +begin + -- Create new visits + with current_visits as ( + select distinct origin, date from tmp_occurrence_history + ), + new_visits as ( + select origin, date, (select coalesce(max(visit), 0) + from origin_visit ov + where ov.origin = origin) + + row_number() + over(partition by origin + order by origin, date) + from current_visits cv + where not exists (select 1 from origin_visit ov + where ov.origin = cv.origin and + ov.date = cv.date) + ) + insert into origin_visit (origin, date, visit) + select * from new_visits; + + -- Create or update occurrence_history + with occurrence_history_id_visit as ( + select tmp_occurrence_history.*, object_id, visits, visit from tmp_occurrence_history + left join occurrence_history using(origin, target, target_type) + left join origin_visit using(origin, date) + ), + occurrences_to_update as ( + select object_id, visit from occurrence_history_id_visit where object_id is not null + ), + update_occurrences as ( + update occurrence_history + set visits = array(select unnest(occurrence_history.visits) as e + union + select occurrences_to_update.visit as e + order by e) + from occurrences_to_update + where occurrence_history.object_id = occurrences_to_update.object_id + ) + insert into occurrence_history (origin, branch, target, target_type, visits) + select origin, branch, target, target_type, ARRAY[visit] + from occurrence_history_id_visit + where object_id is null; + + -- update occurrence + for origin_id in + select distinct origin from tmp_occurrence_history + loop + delete from occurrence where origin = origin_id; + insert into occurrence (origin, branch, target, target_type) + select origin, branch, target, target_type + from occurrence_history + where origin = origin_id and + (select visit from origin_visit + where origin = origin_id + order by date desc + limit 1) = any(visits); + end loop; + + return; +end +$$; + +CREATE OR REPLACE FUNCTION swh_revision_find_occurrence(revision_id sha1_git) RETURNS occurrence + LANGUAGE sql STABLE + AS $$ + select origin, branch, target, target_type + from swh_revision_list_children(ARRAY[revision_id] :: bytea[]) as rev_list + left join occurrence_history occ_hist + on rev_list.id = occ_hist.target + where occ_hist.origin is not null and + occ_hist.target_type = 'revision' + limit 1; +$$; + +DROP FUNCTION swh_revision_get_by(bigint,bytea,timestamp with time zone); +CREATE OR REPLACE FUNCTION swh_revision_get_by(origin_id bigint, branch_name bytea = NULL::bytea, "date" timestamp with time zone = NULL::timestamp with time zone) RETURNS SETOF revision_entry + LANGUAGE sql STABLE + AS $$ + select r.id, r.date, r.date_offset, + r.committer_date, r.committer_date_offset, + r.type, r.directory, r.message, + a.name, a.email, c.name, c.email, r.metadata, r.synthetic, + array(select rh.parent_id::bytea + from revision_history rh + where rh.id = r.id + order by rh.parent_rank + ) as parents + from swh_occurrence_get_by(origin_id, branch_name, date) as occ + inner join revision r on occ.target = r.id + left join person a on a.id = r.author + left join person c on c.id = r.committer; +$$; + +CREATE OR REPLACE FUNCTION swh_visit_find_by_date(origin bigint, visit_date timestamp with time zone = now()) RETURNS origin_visit + LANGUAGE sql STABLE + AS $$ + with closest_two_visits as (( + select origin_visit, (date - visit_date) as interval + from origin_visit + where date >= visit_date + order by date asc + limit 1 + ) union ( + select origin_visit, (visit_date - date) as interval + from origin_visit + where date < visit_date + order by date desc + limit 1 + )) select (origin_visit).* from closest_two_visits order by interval limit 1 +$$;