diff --git a/swh/provenance/sql/30-schema.sql b/swh/provenance/sql/30-schema.sql --- a/swh/provenance/sql/30-schema.sql +++ b/swh/provenance/sql/30-schema.sql @@ -24,13 +24,6 @@ -- UNIX path (absolute, relative, individual path component, etc.) create domain unix_path as bytea; --- a relation destination ID (used for denormalized flavors: with-path vs. without-path) -\if :dbflavor_with_path -create type rel_dst as (id bigint, loc bigint); -\else -create domain rel_dst as bigint; -\endif - -- relation filter options for querying create type rel_flt as enum ( 'filter-src', @@ -97,9 +90,10 @@ content bigint not null, -- internal identifier of the content blob \if :dbflavor_norm revision bigint not null, -- internal identifier of the revision where the blob appears for the first time - location bigint -- location of the content relative to the revision root directory + location bigint -- location of the content relative to the revision's root directory \else - revision rel_dst[] -- internal reference of the revision (and location) where the blob appears for the first time + revision bigint[], -- internal identifiers of the revisions where the blob appears for the first time + location bigint[] -- locations of the content relative to the revisions' root directory \endif -- foreign key (content) references content (id), -- foreign key (revision) references revision (id), @@ -120,7 +114,8 @@ directory bigint not null, -- internal identifier of the directory containing the blob location bigint -- location of the content relative to its parent directory in the isochrone frontier \else - directory rel_dst[] -- internal reference of the directory (and location) containing the blob + directory bigint[], -- internal reference of the directories containing the blob + location bigint[] -- locations of the content relative to its parent directories in the isochrone frontier \endif -- foreign key (content) references content (id), -- foreign key (directory) references directory (id), @@ -139,9 +134,10 @@ directory bigint not null, -- internal identifier of the directory appearing in the revision \if :dbflavor_norm revision bigint not null, -- internal identifier of the revision containing the directory - location bigint -- location of the directory relative to the revision root directory + location bigint -- location of the directory relative to the revision's root directory \else - revision rel_dst[] -- internal reference of the revision (and location) containing the directory + revision bigint[], -- internal identifiers of the revisions containing the directory + location bigint[] -- locations of the directory relative to the revisions' root directory \endif -- foreign key (directory) references directory (id), -- foreign key (revision) references revision (id), diff --git a/swh/provenance/sql/40-funcs.sql b/swh/provenance/sql/40-funcs.sql --- a/swh/provenance/sql/40-funcs.sql +++ b/swh/provenance/sql/40-funcs.sql @@ -331,13 +331,14 @@ L.path as path from ( select C.sha1 as sha1, - unnest(CR.revision) as revision + unnest(CR.revision) as revision, + unnest(CR.location) as location from content_in_revision as CR inner join content as C on (C.id = CR.content) where C.sha1 = content_id ) as CL - inner join revision as R on (R.id = (CL.revision).id) - inner join location as L on (L.id = (CL.revision).loc) + inner join revision as R on (R.id = CL.revision) + inner join location as L on (L.id = CL.location) left join origin as O on (O.id = R.origin) order by date, revision, origin, path asc limit 1 $$; @@ -356,7 +357,8 @@ (with cntrev as ( select C.sha1 as sha1, - unnest(CR.revision) as revision + unnest(CR.revision) as revision, + unnest(CR.location) as location from content_in_revision as CR inner join content as C on (C.id = CR.content) where C.sha1 = content_id) @@ -366,24 +368,26 @@ O.url as origin, L.path as path from cntrev as CR - inner join revision as R on (R.id = (CR.revision).id) - inner join location as L on (L.id = (CR.revision).loc) + inner join revision as R on (R.id = CR.revision) + inner join location as L on (L.id = CR.location) left join origin as O on (O.id = R.origin)) union (with cntdir as ( select C.sha1 as sha1, - unnest(CD.directory) as directory + unnest(CD.directory) as directory, + unnest(CD.location) as location from content as C inner join content_in_directory as CD on (CD.content = C.id) where C.sha1 = content_id), cntrev as ( select CD.sha1 as sha1, L.path as path, - unnest(DR.revision) as revision + unnest(DR.revision) as revision, + unnest(DR.location) as location from cntdir as CD - inner join directory_in_revision as DR on (DR.directory = (CD.directory).id) - inner join location as L on (L.id = (CD.directory).loc)) + inner join directory_in_revision as DR on (DR.directory = CD.directory) + inner join location as L on (L.id = CD.location)) select CR.sha1 as content, R.sha1 as revision, R.date as date, @@ -394,8 +398,8 @@ else (DL.path || '/' || CR.path)::unix_path end as path from cntrev as CR - inner join revision as R on (R.id = (CR.revision).id) - inner join location as DL on (DL.id = (CR.revision).loc) + inner join revision as R on (R.id = CR.revision) + inner join location as DL on (DL.id = CR.location) left join origin as O on (O.id = R.origin)) order by date, revision, origin, path limit early_cut $$; @@ -414,17 +418,21 @@ on_conflict text; begin if src_table in ('content'::regclass, 'directory'::regclass) then - select_fields := 'array_agg((D.id, L.id)::rel_dst)'; + select_fields := 'array_agg(D.id), array_agg(L.id)'; join_location := 'inner join location as L on (L.path = V.path)'; group_entries := 'group by S.id'; on_conflict := format(' (%s) do update - set %s=array( - select distinct unnest( - %s.' || dst_table::text || ' || excluded.' || dst_table::text || ' - ) + set (%s, location) = ( + with pairs as ( + select distinct * from unnest( + %s.' || dst_table::text || ' || excluded.' || dst_table::text || ', + %s.location || excluded.location + ) as pair(dst, loc) + ) + select array(select pairs.dst from pairs), array(select pairs.loc from pairs) )', - src_table, dst_table, rel_table, rel_table, rel_table + src_table, dst_table, rel_table, rel_table, rel_table, rel_table ); else select_fields := 'D.id'; @@ -477,9 +485,9 @@ end if; if src_table in ('content'::regclass, 'directory'::regclass) then - proj_unnested := 'unnest(R.' || dst_field || ') as dst'; - proj_dst_id := '(CL.dst).id'; - join_location := 'inner join location as L on (L.id = (CL.dst).loc)'; + proj_unnested := 'unnest(R.' || dst_field || ') as dst, unnest(R.location) as loc'; + proj_dst_id := 'CL.dst'; + join_location := 'inner join location as L on (L.id = CL.loc)'; proj_location := 'L.path'; else proj_unnested := 'R.' || dst_field || ' as dst';