Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F8394332
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
26 KB
Subscribers
None
View Options
diff --git a/sql/upgrades/003.sql b/sql/upgrades/003.sql
index ab706b3..06a2980 100644
--- a/sql/upgrades/003.sql
+++ b/sql/upgrades/003.sql
@@ -1,16 +1,121 @@
-- SWH DB schema upgrade
-- from_version: 2
-- to_version: 3
-- description: keep unique indices for origins and locations in a hash column
insert into dbversion(version, release, description)
values(3, now(), 'Work In Progress');
alter table location
drop constraint if exists location_path_key;
create unique index on location(digest(path, 'sha1'));
alter table origin
drop constraint if exists origin_url_key;
+
+
+-- psql variables to get the current database flavor
+select position('denormalized' in swh_get_dbflavor()::text) = 0 as dbflavor_norm \gset
+select position('without-path' in swh_get_dbflavor()::text) = 0 as dbflavor_with_path \gset
+
+\if :dbflavor_with_path
+
+\if :dbflavor_norm
+--
+-- with path and normalized
+--
+
+create or replace function swh_provenance_relation_add_from_temp(
+ rel_table regclass, src_table regclass, dst_table regclass
+)
+ returns void
+ language plpgsql
+ volatile
+as $$
+ declare
+ select_fields text;
+ join_location text;
+ begin
+ if src_table in ('content'::regclass, 'directory'::regclass) then
+ select_fields := 'D.id, L.id';
+ join_location := 'inner join location as L on (digest(L.path,''sha1'') = digest(V.path,''sha1''))';
+ else
+ select_fields := 'D.id';
+ join_location := '';
+ end if;
+
+ execute format(
+ 'insert into %s
+ select S.id, ' || select_fields || '
+ from tmp_relation_add as V
+ inner join %s as S on (S.sha1 = V.src)
+ inner join %s as D on (D.sha1 = V.dst)
+ ' || join_location || '
+ on conflict do nothing',
+ rel_table, src_table, dst_table
+ );
+ end;
+$$;
+
+\else
+--
+-- with path and denormalized
+--
+
+create or replace function swh_provenance_relation_add_from_temp(
+ rel_table regclass, src_table regclass, dst_table regclass
+)
+ returns void
+ language plpgsql
+ volatile
+as $$
+ declare
+ select_fields text;
+ join_location text;
+ group_entries text;
+ on_conflict text;
+ begin
+ if src_table in ('content'::regclass, 'directory'::regclass) then
+ select_fields := 'array_agg(D.id), array_agg(L.id)';
+ join_location := 'inner join location as L on (digest(L.path,''sha1'') = digest(V.path,''sha1''))';
+ group_entries := 'group by S.id';
+ on_conflict := format('
+ (%s) do update
+ set (%s, location) = (
+ with pairs as (
+ select distinct * from unnest(
+ %s.' || dst_table::text || ' || excluded.' || dst_table::text || ',
+ %s.location || excluded.location
+ ) as pair(dst, loc)
+ )
+ select array(select pairs.dst from pairs), array(select pairs.loc from pairs)
+ )',
+ src_table, dst_table, rel_table, rel_table, rel_table, rel_table
+ );
+ else
+ select_fields := 'D.id';
+ join_location := '';
+ group_entries := '';
+ on_conflict := 'do nothing';
+ end if;
+
+ execute format(
+ 'insert into %s
+ select S.id, ' || select_fields || '
+ from tmp_relation_add as V
+ inner join %s as S on (S.sha1 = V.src)
+ inner join %s as D on (D.sha1 = V.dst)
+ ' || join_location || '
+ ' || group_entries || '
+ on conflict ' || on_conflict,
+ rel_table, src_table, dst_table
+ );
+ end;
+$$;
+
+\endif
+-- :dbflavor_norm
+\endif
+-- :dbflavor_with_path
diff --git a/swh/provenance/sql/40-funcs.sql b/swh/provenance/sql/40-funcs.sql
index 8ecd884..8ef93e9 100644
--- a/swh/provenance/sql/40-funcs.sql
+++ b/swh/provenance/sql/40-funcs.sql
@@ -1,709 +1,709 @@
-- psql variables to get the current database flavor
select position('denormalized' in swh_get_dbflavor()::text) = 0 as dbflavor_norm \gset
select position('with-path' in swh_get_dbflavor()::text) != 0 as dbflavor_with_path \gset
create or replace function swh_mktemp_relation_add() returns void
language sql
as $$
create temp table tmp_relation_add (
src sha1_git not null,
dst sha1_git not null,
path unix_path
) on commit drop
$$;
\if :dbflavor_norm
\if :dbflavor_with_path
--
-- with path and normalized
--
create or replace function swh_provenance_content_find_first(content_id sha1_git)
returns table (
content sha1_git,
revision sha1_git,
date timestamptz,
origin text,
path unix_path
)
language sql
stable
as $$
select C.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
L.path as path
from content as C
inner join content_in_revision as CR on (CR.content = C.id)
inner join location as L on (L.id = CR.location)
inner join revision as R on (R.id = CR.revision)
left join origin as O on (O.id = R.origin)
where C.sha1 = content_id
order by date, revision, origin, path asc limit 1
$$;
create or replace function swh_provenance_content_find_all(content_id sha1_git, early_cut int)
returns table (
content sha1_git,
revision sha1_git,
date timestamptz,
origin text,
path unix_path
)
language sql
stable
as $$
(select C.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
L.path as path
from content as C
inner join content_in_revision as CR on (CR.content = C.id)
inner join location as L on (L.id = CR.location)
inner join revision as R on (R.id = CR.revision)
left join origin as O on (O.id = R.origin)
where C.sha1 = content_id)
union
(select C.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
case DL.path
when '' then CL.path
when '.' then CL.path
else (DL.path || '/' || CL.path)::unix_path
end as path
from content as C
inner join content_in_directory as CD on (CD.content = C.id)
inner join directory_in_revision as DR on (DR.directory = CD.directory)
inner join revision as R on (R.id = DR.revision)
inner join location as CL on (CL.id = CD.location)
inner join location as DL on (DL.id = DR.location)
left join origin as O on (O.id = R.origin)
where C.sha1 = content_id)
order by date, revision, origin, path limit early_cut
$$;
create or replace function swh_provenance_relation_add_from_temp(
rel_table regclass, src_table regclass, dst_table regclass
)
returns void
language plpgsql
volatile
as $$
declare
select_fields text;
join_location text;
begin
if src_table in ('content'::regclass, 'directory'::regclass) then
select_fields := 'D.id, L.id';
- join_location := 'inner join location as L on (L.path = V.path)';
+ join_location := 'inner join location as L on (digest(L.path,''sha1'') = digest(V.path,''sha1''))';
else
select_fields := 'D.id';
join_location := '';
end if;
execute format(
'insert into %s
select S.id, ' || select_fields || '
from tmp_relation_add as V
inner join %s as S on (S.sha1 = V.src)
inner join %s as D on (D.sha1 = V.dst)
' || join_location || '
on conflict do nothing',
rel_table, src_table, dst_table
);
end;
$$;
create or replace function swh_provenance_relation_get(
rel_table regclass, src_table regclass, dst_table regclass, filter rel_flt, sha1s sha1_git[]
)
returns table (
src sha1_git,
dst sha1_git,
path unix_path
)
language plpgsql
stable
as $$
declare
src_field text;
dst_field text;
join_location text;
proj_location text;
filter_result text;
begin
if rel_table = 'revision_before_revision'::regclass then
src_field := 'prev';
dst_field := 'next';
else
src_field := src_table::text;
dst_field := dst_table::text;
end if;
if src_table in ('content'::regclass, 'directory'::regclass) then
join_location := 'inner join location as L on (L.id = R.location)';
proj_location := 'L.path';
else
join_location := '';
proj_location := 'NULL::unix_path';
end if;
case filter
when 'filter-src'::rel_flt then
filter_result := 'where S.sha1 = any($1)';
when 'filter-dst'::rel_flt then
filter_result := 'where D.sha1 = any($1)';
else
filter_result := '';
end case;
return query execute format(
'select S.sha1 as src, D.sha1 as dst, ' || proj_location || ' as path
from %s as R
inner join %s as S on (S.id = R.' || src_field || ')
inner join %s as D on (D.id = R.' || dst_field || ')
' || join_location || '
' || filter_result,
rel_table, src_table, dst_table
) using sha1s;
end;
$$;
\else
--
-- without path and normalized
--
create or replace function swh_provenance_content_find_first(content_id sha1_git)
returns table (
content sha1_git,
revision sha1_git,
date timestamptz,
origin text,
path unix_path
)
language sql
stable
as $$
select C.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
'\x'::unix_path as path
from content as C
inner join content_in_revision as CR on (CR.content = C.id)
inner join revision as R on (R.id = CR.revision)
left join origin as O on (O.id = R.origin)
where C.sha1 = content_id
order by date, revision, origin asc limit 1
$$;
create or replace function swh_provenance_content_find_all(content_id sha1_git, early_cut int)
returns table (
content sha1_git,
revision sha1_git,
date timestamptz,
origin text,
path unix_path
)
language sql
stable
as $$
(select C.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
'\x'::unix_path as path
from content as C
inner join content_in_revision as CR on (CR.content = C.id)
inner join revision as R on (R.id = CR.revision)
left join origin as O on (O.id = R.origin)
where C.sha1 = content_id)
union
(select C.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
'\x'::unix_path as path
from content as C
inner join content_in_directory as CD on (CD.content = C.id)
inner join directory_in_revision as DR on (DR.directory = CD.directory)
inner join revision as R on (R.id = DR.revision)
left join origin as O on (O.id = R.origin)
where C.sha1 = content_id)
order by date, revision, origin, path limit early_cut
$$;
create or replace function swh_provenance_relation_add_from_temp(
rel_table regclass, src_table regclass, dst_table regclass
)
returns void
language plpgsql
volatile
as $$
begin
execute format(
'insert into %s
select S.id, D.id
from tmp_relation_add as V
inner join %s as S on (S.sha1 = V.src)
inner join %s as D on (D.sha1 = V.dst)
on conflict do nothing',
rel_table, src_table, dst_table
);
end;
$$;
create or replace function swh_provenance_relation_get(
rel_table regclass, src_table regclass, dst_table regclass, filter rel_flt, sha1s sha1_git[]
)
returns table (
src sha1_git,
dst sha1_git,
path unix_path
)
language plpgsql
stable
as $$
declare
src_field text;
dst_field text;
filter_result text;
begin
if rel_table = 'revision_before_revision'::regclass then
src_field := 'prev';
dst_field := 'next';
else
src_field := src_table::text;
dst_field := dst_table::text;
end if;
case filter
when 'filter-src'::rel_flt then
filter_result := 'where S.sha1 = any($1)';
when 'filter-dst'::rel_flt then
filter_result := 'where D.sha1 = any($1)';
else
filter_result := '';
end case;
return query execute format(
'select S.sha1 as src, D.sha1 as dst, NULL::unix_path as path
from %s as R
inner join %s as S on (S.id = R.' || src_field || ')
inner join %s as D on (D.id = R.' || dst_field || ')
' || filter_result,
rel_table, src_table, dst_table
) using sha1s;
end;
$$;
-- :dbflavor_with_path
\endif
-- :dbflavor_norm
\else
\if :dbflavor_with_path
--
-- with path and denormalized
--
create or replace function swh_provenance_content_find_first(content_id sha1_git)
returns table (
content sha1_git,
revision sha1_git,
date timestamptz,
origin text,
path unix_path
)
language sql
stable
as $$
select CL.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
L.path as path
from (
select C.sha1 as sha1,
unnest(CR.revision) as revision,
unnest(CR.location) as location
from content_in_revision as CR
inner join content as C on (C.id = CR.content)
where C.sha1 = content_id
) as CL
inner join revision as R on (R.id = CL.revision)
inner join location as L on (L.id = CL.location)
left join origin as O on (O.id = R.origin)
order by date, revision, origin, path asc limit 1
$$;
create or replace function swh_provenance_content_find_all(content_id sha1_git, early_cut int)
returns table (
content sha1_git,
revision sha1_git,
date timestamptz,
origin text,
path unix_path
)
language sql
stable
as $$
(with
cntrev as (
select C.sha1 as sha1,
unnest(CR.revision) as revision,
unnest(CR.location) as location
from content_in_revision as CR
inner join content as C on (C.id = CR.content)
where C.sha1 = content_id)
select CR.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
L.path as path
from cntrev as CR
inner join revision as R on (R.id = CR.revision)
inner join location as L on (L.id = CR.location)
left join origin as O on (O.id = R.origin))
union
(with
cntdir as (
select C.sha1 as sha1,
unnest(CD.directory) as directory,
unnest(CD.location) as location
from content as C
inner join content_in_directory as CD on (CD.content = C.id)
where C.sha1 = content_id),
cntrev as (
select CD.sha1 as sha1,
L.path as path,
unnest(DR.revision) as revision,
unnest(DR.location) as location
from cntdir as CD
inner join directory_in_revision as DR on (DR.directory = CD.directory)
inner join location as L on (L.id = CD.location))
select CR.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
case DL.path
when '' then CR.path
when '.' then CR.path
else (DL.path || '/' || CR.path)::unix_path
end as path
from cntrev as CR
inner join revision as R on (R.id = CR.revision)
inner join location as DL on (DL.id = CR.location)
left join origin as O on (O.id = R.origin))
order by date, revision, origin, path limit early_cut
$$;
create or replace function swh_provenance_relation_add_from_temp(
rel_table regclass, src_table regclass, dst_table regclass
)
returns void
language plpgsql
volatile
as $$
declare
select_fields text;
join_location text;
group_entries text;
on_conflict text;
begin
if src_table in ('content'::regclass, 'directory'::regclass) then
select_fields := 'array_agg(D.id), array_agg(L.id)';
- join_location := 'inner join location as L on (L.path = V.path)';
+ join_location := 'inner join location as L on (digest(L.path,''sha1'') = digest(V.path,''sha1''))';
group_entries := 'group by S.id';
on_conflict := format('
(%s) do update
set (%s, location) = (
with pairs as (
select distinct * from unnest(
%s.' || dst_table::text || ' || excluded.' || dst_table::text || ',
%s.location || excluded.location
) as pair(dst, loc)
)
select array(select pairs.dst from pairs), array(select pairs.loc from pairs)
)',
src_table, dst_table, rel_table, rel_table, rel_table, rel_table
);
else
select_fields := 'D.id';
join_location := '';
group_entries := '';
on_conflict := 'do nothing';
end if;
execute format(
'insert into %s
select S.id, ' || select_fields || '
from tmp_relation_add as V
inner join %s as S on (S.sha1 = V.src)
inner join %s as D on (D.sha1 = V.dst)
' || join_location || '
' || group_entries || '
on conflict ' || on_conflict,
rel_table, src_table, dst_table
);
end;
$$;
create or replace function swh_provenance_relation_get(
rel_table regclass, src_table regclass, dst_table regclass, filter rel_flt, sha1s sha1_git[]
)
returns table (
src sha1_git,
dst sha1_git,
path unix_path
)
language plpgsql
stable
as $$
declare
src_field text;
dst_field text;
proj_dst_id text;
proj_unnested text;
proj_location text;
join_location text;
filter_inner_result text;
filter_outer_result text;
begin
if rel_table = 'revision_before_revision'::regclass then
src_field := 'prev';
dst_field := 'next';
else
src_field := src_table::text;
dst_field := dst_table::text;
end if;
if src_table in ('content'::regclass, 'directory'::regclass) then
proj_unnested := 'unnest(R.' || dst_field || ') as dst, unnest(R.location) as loc';
proj_dst_id := 'CL.dst';
join_location := 'inner join location as L on (L.id = CL.loc)';
proj_location := 'L.path';
else
proj_unnested := 'R.' || dst_field || ' as dst';
proj_dst_id := 'CL.dst';
join_location := '';
proj_location := 'NULL::unix_path';
end if;
case filter
when 'filter-src'::rel_flt then
filter_inner_result := 'where S.sha1 = any($1)';
filter_outer_result := '';
when 'filter-dst'::rel_flt then
filter_inner_result := '';
filter_outer_result := 'where D.sha1 = any($1)';
else
filter_inner_result := '';
filter_outer_result := '';
end case;
return query execute format(
'select CL.src, D.sha1 as dst, ' || proj_location || ' as path
from (select S.sha1 as src, ' || proj_unnested || '
from %s as R
inner join %s as S on (S.id = R.' || src_field || ')
' || filter_inner_result || ') as CL
inner join %s as D on (D.id = ' || proj_dst_id || ')
' || join_location || '
' || filter_outer_result,
rel_table, src_table, dst_table
) using sha1s;
end;
$$;
\else
--
-- without path and denormalized
--
create or replace function swh_provenance_content_find_first(content_id sha1_git)
returns table (
content sha1_git,
revision sha1_git,
date timestamptz,
origin text,
path unix_path
)
language sql
stable
as $$
select CL.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
'\x'::unix_path as path
from (
select C.sha1, unnest(revision) as revision
from content_in_revision as CR
inner join content as C on (C.id = CR.content)
where C.sha1=content_id
) as CL
inner join revision as R on (R.id = CL.revision)
left join origin as O on (O.id = R.origin)
order by date, revision, origin, path asc limit 1
$$;
create or replace function swh_provenance_content_find_all(content_id sha1_git, early_cut int)
returns table (
content sha1_git,
revision sha1_git,
date timestamptz,
origin text,
path unix_path
)
language sql
stable
as $$
(with
cntrev as (
select C.sha1 as sha1,
unnest(CR.revision) as revision
from content_in_revision as CR
inner join content as C on (C.id = CR.content)
where C.sha1 = content_id)
select CR.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
'\x'::unix_path as path
from cntrev as CR
inner join revision as R on (R.id = CR.revision)
left join origin as O on (O.id = R.origin))
union
(with
cntdir as (
select C.sha1 as sha1,
unnest(CD.directory) as directory
from content as C
inner join content_in_directory as CD on (CD.content = C.id)
where C.sha1 = content_id),
cntrev as (
select CD.sha1 as sha1,
unnest(DR.revision) as revision
from cntdir as CD
inner join directory_in_revision as DR on (DR.directory = CD.directory))
select CR.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
'\x'::unix_path as path
from cntrev as CR
inner join revision as R on (R.id = CR.revision)
left join origin as O on (O.id = R.origin))
order by date, revision, origin, path limit early_cut
$$;
create or replace function swh_provenance_relation_add_from_temp(
rel_table regclass, src_table regclass, dst_table regclass
)
returns void
language plpgsql
volatile
as $$
declare
select_fields text;
group_entries text;
on_conflict text;
begin
if src_table in ('content'::regclass, 'directory'::regclass) then
select_fields := 'array_agg(D.id)';
group_entries := 'group by S.id';
on_conflict := format('
(%s) do update
set %s=array(
select distinct unnest(
%s.' || dst_table::text || ' || excluded.' || dst_table::text || '
)
)',
src_table, dst_table, rel_table, rel_table
);
else
select_fields := 'D.id';
group_entries := '';
on_conflict := 'do nothing';
end if;
execute format(
'insert into %s
select S.id, ' || select_fields || '
from tmp_relation_add as V
inner join %s as S on (S.sha1 = V.src)
inner join %s as D on (D.sha1 = V.dst)
' || group_entries || '
on conflict ' || on_conflict,
rel_table, src_table, dst_table
);
end;
$$;
create or replace function swh_provenance_relation_get(
rel_table regclass, src_table regclass, dst_table regclass, filter rel_flt, sha1s sha1_git[]
)
returns table (
src sha1_git,
dst sha1_git,
path unix_path
)
language plpgsql
stable
as $$
declare
src_field text;
dst_field text;
proj_unnested text;
filter_inner_result text;
filter_outer_result text;
begin
if rel_table = 'revision_before_revision'::regclass then
src_field := 'prev';
dst_field := 'next';
else
src_field := src_table::text;
dst_field := dst_table::text;
end if;
if src_table in ('content'::regclass, 'directory'::regclass) then
proj_unnested := 'unnest(R.' || dst_field || ') as dst';
else
proj_unnested := 'R.' || dst_field || ' as dst';
end if;
case filter
when 'filter-src'::rel_flt then
filter_inner_result := 'where S.sha1 = any($1)';
filter_outer_result := '';
when 'filter-dst'::rel_flt then
filter_inner_result := '';
filter_outer_result := 'where D.sha1 = any($1)';
else
filter_inner_result := '';
filter_outer_result := '';
end case;
return query execute format(
'select CL.src, D.sha1 as dst, NULL::unix_path as path
from (select S.sha1 as src, ' || proj_unnested || '
from %s as R
inner join %s as S on (S.id = R.' || src_field || ')
' || filter_inner_result || ') as CL
inner join %s as D on (D.id = CL.dst)
' || filter_outer_result,
rel_table, src_table, dst_table
) using sha1s;
end;
$$;
\endif
-- :dbflavor_with_path
\endif
-- :dbflavor_norm
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Wed, Jun 4, 7:23 PM (1 w, 14 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3320096
Attached To
rDPROV Provenance database
Event Timeline
Log In to Comment