Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/sql/upgrades/003.sql b/sql/upgrades/003.sql
index ab706b3..06a2980 100644
--- a/sql/upgrades/003.sql
+++ b/sql/upgrades/003.sql
@@ -1,16 +1,121 @@
-- SWH DB schema upgrade
-- from_version: 2
-- to_version: 3
-- description: keep unique indices for origins and locations in a hash column
insert into dbversion(version, release, description)
values(3, now(), 'Work In Progress');
alter table location
drop constraint if exists location_path_key;
create unique index on location(digest(path, 'sha1'));
alter table origin
drop constraint if exists origin_url_key;
+
+
+-- psql variables to get the current database flavor
+select position('denormalized' in swh_get_dbflavor()::text) = 0 as dbflavor_norm \gset
+select position('without-path' in swh_get_dbflavor()::text) = 0 as dbflavor_with_path \gset
+
+\if :dbflavor_with_path
+
+\if :dbflavor_norm
+--
+-- with path and normalized
+--
+
+create or replace function swh_provenance_relation_add_from_temp(
+ rel_table regclass, src_table regclass, dst_table regclass
+)
+ returns void
+ language plpgsql
+ volatile
+as $$
+ declare
+ select_fields text;
+ join_location text;
+ begin
+ if src_table in ('content'::regclass, 'directory'::regclass) then
+ select_fields := 'D.id, L.id';
+ join_location := 'inner join location as L on (digest(L.path,''sha1'') = digest(V.path,''sha1''))';
+ else
+ select_fields := 'D.id';
+ join_location := '';
+ end if;
+
+ execute format(
+ 'insert into %s
+ select S.id, ' || select_fields || '
+ from tmp_relation_add as V
+ inner join %s as S on (S.sha1 = V.src)
+ inner join %s as D on (D.sha1 = V.dst)
+ ' || join_location || '
+ on conflict do nothing',
+ rel_table, src_table, dst_table
+ );
+ end;
+$$;
+
+\else
+--
+-- with path and denormalized
+--
+
+create or replace function swh_provenance_relation_add_from_temp(
+ rel_table regclass, src_table regclass, dst_table regclass
+)
+ returns void
+ language plpgsql
+ volatile
+as $$
+ declare
+ select_fields text;
+ join_location text;
+ group_entries text;
+ on_conflict text;
+ begin
+ if src_table in ('content'::regclass, 'directory'::regclass) then
+ select_fields := 'array_agg(D.id), array_agg(L.id)';
+ join_location := 'inner join location as L on (digest(L.path,''sha1'') = digest(V.path,''sha1''))';
+ group_entries := 'group by S.id';
+ on_conflict := format('
+ (%s) do update
+ set (%s, location) = (
+ with pairs as (
+ select distinct * from unnest(
+ %s.' || dst_table::text || ' || excluded.' || dst_table::text || ',
+ %s.location || excluded.location
+ ) as pair(dst, loc)
+ )
+ select array(select pairs.dst from pairs), array(select pairs.loc from pairs)
+ )',
+ src_table, dst_table, rel_table, rel_table, rel_table, rel_table
+ );
+ else
+ select_fields := 'D.id';
+ join_location := '';
+ group_entries := '';
+ on_conflict := 'do nothing';
+ end if;
+
+ execute format(
+ 'insert into %s
+ select S.id, ' || select_fields || '
+ from tmp_relation_add as V
+ inner join %s as S on (S.sha1 = V.src)
+ inner join %s as D on (D.sha1 = V.dst)
+ ' || join_location || '
+ ' || group_entries || '
+ on conflict ' || on_conflict,
+ rel_table, src_table, dst_table
+ );
+ end;
+$$;
+
+\endif
+-- :dbflavor_norm
+\endif
+-- :dbflavor_with_path
diff --git a/swh/provenance/sql/40-funcs.sql b/swh/provenance/sql/40-funcs.sql
index 8ecd884..8ef93e9 100644
--- a/swh/provenance/sql/40-funcs.sql
+++ b/swh/provenance/sql/40-funcs.sql
@@ -1,709 +1,709 @@
-- psql variables to get the current database flavor
select position('denormalized' in swh_get_dbflavor()::text) = 0 as dbflavor_norm \gset
select position('with-path' in swh_get_dbflavor()::text) != 0 as dbflavor_with_path \gset
create or replace function swh_mktemp_relation_add() returns void
language sql
as $$
create temp table tmp_relation_add (
src sha1_git not null,
dst sha1_git not null,
path unix_path
) on commit drop
$$;
\if :dbflavor_norm
\if :dbflavor_with_path
--
-- with path and normalized
--
create or replace function swh_provenance_content_find_first(content_id sha1_git)
returns table (
content sha1_git,
revision sha1_git,
date timestamptz,
origin text,
path unix_path
)
language sql
stable
as $$
select C.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
L.path as path
from content as C
inner join content_in_revision as CR on (CR.content = C.id)
inner join location as L on (L.id = CR.location)
inner join revision as R on (R.id = CR.revision)
left join origin as O on (O.id = R.origin)
where C.sha1 = content_id
order by date, revision, origin, path asc limit 1
$$;
create or replace function swh_provenance_content_find_all(content_id sha1_git, early_cut int)
returns table (
content sha1_git,
revision sha1_git,
date timestamptz,
origin text,
path unix_path
)
language sql
stable
as $$
(select C.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
L.path as path
from content as C
inner join content_in_revision as CR on (CR.content = C.id)
inner join location as L on (L.id = CR.location)
inner join revision as R on (R.id = CR.revision)
left join origin as O on (O.id = R.origin)
where C.sha1 = content_id)
union
(select C.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
case DL.path
when '' then CL.path
when '.' then CL.path
else (DL.path || '/' || CL.path)::unix_path
end as path
from content as C
inner join content_in_directory as CD on (CD.content = C.id)
inner join directory_in_revision as DR on (DR.directory = CD.directory)
inner join revision as R on (R.id = DR.revision)
inner join location as CL on (CL.id = CD.location)
inner join location as DL on (DL.id = DR.location)
left join origin as O on (O.id = R.origin)
where C.sha1 = content_id)
order by date, revision, origin, path limit early_cut
$$;
create or replace function swh_provenance_relation_add_from_temp(
rel_table regclass, src_table regclass, dst_table regclass
)
returns void
language plpgsql
volatile
as $$
declare
select_fields text;
join_location text;
begin
if src_table in ('content'::regclass, 'directory'::regclass) then
select_fields := 'D.id, L.id';
- join_location := 'inner join location as L on (L.path = V.path)';
+ join_location := 'inner join location as L on (digest(L.path,''sha1'') = digest(V.path,''sha1''))';
else
select_fields := 'D.id';
join_location := '';
end if;
execute format(
'insert into %s
select S.id, ' || select_fields || '
from tmp_relation_add as V
inner join %s as S on (S.sha1 = V.src)
inner join %s as D on (D.sha1 = V.dst)
' || join_location || '
on conflict do nothing',
rel_table, src_table, dst_table
);
end;
$$;
create or replace function swh_provenance_relation_get(
rel_table regclass, src_table regclass, dst_table regclass, filter rel_flt, sha1s sha1_git[]
)
returns table (
src sha1_git,
dst sha1_git,
path unix_path
)
language plpgsql
stable
as $$
declare
src_field text;
dst_field text;
join_location text;
proj_location text;
filter_result text;
begin
if rel_table = 'revision_before_revision'::regclass then
src_field := 'prev';
dst_field := 'next';
else
src_field := src_table::text;
dst_field := dst_table::text;
end if;
if src_table in ('content'::regclass, 'directory'::regclass) then
join_location := 'inner join location as L on (L.id = R.location)';
proj_location := 'L.path';
else
join_location := '';
proj_location := 'NULL::unix_path';
end if;
case filter
when 'filter-src'::rel_flt then
filter_result := 'where S.sha1 = any($1)';
when 'filter-dst'::rel_flt then
filter_result := 'where D.sha1 = any($1)';
else
filter_result := '';
end case;
return query execute format(
'select S.sha1 as src, D.sha1 as dst, ' || proj_location || ' as path
from %s as R
inner join %s as S on (S.id = R.' || src_field || ')
inner join %s as D on (D.id = R.' || dst_field || ')
' || join_location || '
' || filter_result,
rel_table, src_table, dst_table
) using sha1s;
end;
$$;
\else
--
-- without path and normalized
--
create or replace function swh_provenance_content_find_first(content_id sha1_git)
returns table (
content sha1_git,
revision sha1_git,
date timestamptz,
origin text,
path unix_path
)
language sql
stable
as $$
select C.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
'\x'::unix_path as path
from content as C
inner join content_in_revision as CR on (CR.content = C.id)
inner join revision as R on (R.id = CR.revision)
left join origin as O on (O.id = R.origin)
where C.sha1 = content_id
order by date, revision, origin asc limit 1
$$;
create or replace function swh_provenance_content_find_all(content_id sha1_git, early_cut int)
returns table (
content sha1_git,
revision sha1_git,
date timestamptz,
origin text,
path unix_path
)
language sql
stable
as $$
(select C.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
'\x'::unix_path as path
from content as C
inner join content_in_revision as CR on (CR.content = C.id)
inner join revision as R on (R.id = CR.revision)
left join origin as O on (O.id = R.origin)
where C.sha1 = content_id)
union
(select C.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
'\x'::unix_path as path
from content as C
inner join content_in_directory as CD on (CD.content = C.id)
inner join directory_in_revision as DR on (DR.directory = CD.directory)
inner join revision as R on (R.id = DR.revision)
left join origin as O on (O.id = R.origin)
where C.sha1 = content_id)
order by date, revision, origin, path limit early_cut
$$;
create or replace function swh_provenance_relation_add_from_temp(
rel_table regclass, src_table regclass, dst_table regclass
)
returns void
language plpgsql
volatile
as $$
begin
execute format(
'insert into %s
select S.id, D.id
from tmp_relation_add as V
inner join %s as S on (S.sha1 = V.src)
inner join %s as D on (D.sha1 = V.dst)
on conflict do nothing',
rel_table, src_table, dst_table
);
end;
$$;
create or replace function swh_provenance_relation_get(
rel_table regclass, src_table regclass, dst_table regclass, filter rel_flt, sha1s sha1_git[]
)
returns table (
src sha1_git,
dst sha1_git,
path unix_path
)
language plpgsql
stable
as $$
declare
src_field text;
dst_field text;
filter_result text;
begin
if rel_table = 'revision_before_revision'::regclass then
src_field := 'prev';
dst_field := 'next';
else
src_field := src_table::text;
dst_field := dst_table::text;
end if;
case filter
when 'filter-src'::rel_flt then
filter_result := 'where S.sha1 = any($1)';
when 'filter-dst'::rel_flt then
filter_result := 'where D.sha1 = any($1)';
else
filter_result := '';
end case;
return query execute format(
'select S.sha1 as src, D.sha1 as dst, NULL::unix_path as path
from %s as R
inner join %s as S on (S.id = R.' || src_field || ')
inner join %s as D on (D.id = R.' || dst_field || ')
' || filter_result,
rel_table, src_table, dst_table
) using sha1s;
end;
$$;
-- :dbflavor_with_path
\endif
-- :dbflavor_norm
\else
\if :dbflavor_with_path
--
-- with path and denormalized
--
create or replace function swh_provenance_content_find_first(content_id sha1_git)
returns table (
content sha1_git,
revision sha1_git,
date timestamptz,
origin text,
path unix_path
)
language sql
stable
as $$
select CL.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
L.path as path
from (
select C.sha1 as sha1,
unnest(CR.revision) as revision,
unnest(CR.location) as location
from content_in_revision as CR
inner join content as C on (C.id = CR.content)
where C.sha1 = content_id
) as CL
inner join revision as R on (R.id = CL.revision)
inner join location as L on (L.id = CL.location)
left join origin as O on (O.id = R.origin)
order by date, revision, origin, path asc limit 1
$$;
create or replace function swh_provenance_content_find_all(content_id sha1_git, early_cut int)
returns table (
content sha1_git,
revision sha1_git,
date timestamptz,
origin text,
path unix_path
)
language sql
stable
as $$
(with
cntrev as (
select C.sha1 as sha1,
unnest(CR.revision) as revision,
unnest(CR.location) as location
from content_in_revision as CR
inner join content as C on (C.id = CR.content)
where C.sha1 = content_id)
select CR.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
L.path as path
from cntrev as CR
inner join revision as R on (R.id = CR.revision)
inner join location as L on (L.id = CR.location)
left join origin as O on (O.id = R.origin))
union
(with
cntdir as (
select C.sha1 as sha1,
unnest(CD.directory) as directory,
unnest(CD.location) as location
from content as C
inner join content_in_directory as CD on (CD.content = C.id)
where C.sha1 = content_id),
cntrev as (
select CD.sha1 as sha1,
L.path as path,
unnest(DR.revision) as revision,
unnest(DR.location) as location
from cntdir as CD
inner join directory_in_revision as DR on (DR.directory = CD.directory)
inner join location as L on (L.id = CD.location))
select CR.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
case DL.path
when '' then CR.path
when '.' then CR.path
else (DL.path || '/' || CR.path)::unix_path
end as path
from cntrev as CR
inner join revision as R on (R.id = CR.revision)
inner join location as DL on (DL.id = CR.location)
left join origin as O on (O.id = R.origin))
order by date, revision, origin, path limit early_cut
$$;
create or replace function swh_provenance_relation_add_from_temp(
rel_table regclass, src_table regclass, dst_table regclass
)
returns void
language plpgsql
volatile
as $$
declare
select_fields text;
join_location text;
group_entries text;
on_conflict text;
begin
if src_table in ('content'::regclass, 'directory'::regclass) then
select_fields := 'array_agg(D.id), array_agg(L.id)';
- join_location := 'inner join location as L on (L.path = V.path)';
+ join_location := 'inner join location as L on (digest(L.path,''sha1'') = digest(V.path,''sha1''))';
group_entries := 'group by S.id';
on_conflict := format('
(%s) do update
set (%s, location) = (
with pairs as (
select distinct * from unnest(
%s.' || dst_table::text || ' || excluded.' || dst_table::text || ',
%s.location || excluded.location
) as pair(dst, loc)
)
select array(select pairs.dst from pairs), array(select pairs.loc from pairs)
)',
src_table, dst_table, rel_table, rel_table, rel_table, rel_table
);
else
select_fields := 'D.id';
join_location := '';
group_entries := '';
on_conflict := 'do nothing';
end if;
execute format(
'insert into %s
select S.id, ' || select_fields || '
from tmp_relation_add as V
inner join %s as S on (S.sha1 = V.src)
inner join %s as D on (D.sha1 = V.dst)
' || join_location || '
' || group_entries || '
on conflict ' || on_conflict,
rel_table, src_table, dst_table
);
end;
$$;
create or replace function swh_provenance_relation_get(
rel_table regclass, src_table regclass, dst_table regclass, filter rel_flt, sha1s sha1_git[]
)
returns table (
src sha1_git,
dst sha1_git,
path unix_path
)
language plpgsql
stable
as $$
declare
src_field text;
dst_field text;
proj_dst_id text;
proj_unnested text;
proj_location text;
join_location text;
filter_inner_result text;
filter_outer_result text;
begin
if rel_table = 'revision_before_revision'::regclass then
src_field := 'prev';
dst_field := 'next';
else
src_field := src_table::text;
dst_field := dst_table::text;
end if;
if src_table in ('content'::regclass, 'directory'::regclass) then
proj_unnested := 'unnest(R.' || dst_field || ') as dst, unnest(R.location) as loc';
proj_dst_id := 'CL.dst';
join_location := 'inner join location as L on (L.id = CL.loc)';
proj_location := 'L.path';
else
proj_unnested := 'R.' || dst_field || ' as dst';
proj_dst_id := 'CL.dst';
join_location := '';
proj_location := 'NULL::unix_path';
end if;
case filter
when 'filter-src'::rel_flt then
filter_inner_result := 'where S.sha1 = any($1)';
filter_outer_result := '';
when 'filter-dst'::rel_flt then
filter_inner_result := '';
filter_outer_result := 'where D.sha1 = any($1)';
else
filter_inner_result := '';
filter_outer_result := '';
end case;
return query execute format(
'select CL.src, D.sha1 as dst, ' || proj_location || ' as path
from (select S.sha1 as src, ' || proj_unnested || '
from %s as R
inner join %s as S on (S.id = R.' || src_field || ')
' || filter_inner_result || ') as CL
inner join %s as D on (D.id = ' || proj_dst_id || ')
' || join_location || '
' || filter_outer_result,
rel_table, src_table, dst_table
) using sha1s;
end;
$$;
\else
--
-- without path and denormalized
--
create or replace function swh_provenance_content_find_first(content_id sha1_git)
returns table (
content sha1_git,
revision sha1_git,
date timestamptz,
origin text,
path unix_path
)
language sql
stable
as $$
select CL.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
'\x'::unix_path as path
from (
select C.sha1, unnest(revision) as revision
from content_in_revision as CR
inner join content as C on (C.id = CR.content)
where C.sha1=content_id
) as CL
inner join revision as R on (R.id = CL.revision)
left join origin as O on (O.id = R.origin)
order by date, revision, origin, path asc limit 1
$$;
create or replace function swh_provenance_content_find_all(content_id sha1_git, early_cut int)
returns table (
content sha1_git,
revision sha1_git,
date timestamptz,
origin text,
path unix_path
)
language sql
stable
as $$
(with
cntrev as (
select C.sha1 as sha1,
unnest(CR.revision) as revision
from content_in_revision as CR
inner join content as C on (C.id = CR.content)
where C.sha1 = content_id)
select CR.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
'\x'::unix_path as path
from cntrev as CR
inner join revision as R on (R.id = CR.revision)
left join origin as O on (O.id = R.origin))
union
(with
cntdir as (
select C.sha1 as sha1,
unnest(CD.directory) as directory
from content as C
inner join content_in_directory as CD on (CD.content = C.id)
where C.sha1 = content_id),
cntrev as (
select CD.sha1 as sha1,
unnest(DR.revision) as revision
from cntdir as CD
inner join directory_in_revision as DR on (DR.directory = CD.directory))
select CR.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
'\x'::unix_path as path
from cntrev as CR
inner join revision as R on (R.id = CR.revision)
left join origin as O on (O.id = R.origin))
order by date, revision, origin, path limit early_cut
$$;
create or replace function swh_provenance_relation_add_from_temp(
rel_table regclass, src_table regclass, dst_table regclass
)
returns void
language plpgsql
volatile
as $$
declare
select_fields text;
group_entries text;
on_conflict text;
begin
if src_table in ('content'::regclass, 'directory'::regclass) then
select_fields := 'array_agg(D.id)';
group_entries := 'group by S.id';
on_conflict := format('
(%s) do update
set %s=array(
select distinct unnest(
%s.' || dst_table::text || ' || excluded.' || dst_table::text || '
)
)',
src_table, dst_table, rel_table, rel_table
);
else
select_fields := 'D.id';
group_entries := '';
on_conflict := 'do nothing';
end if;
execute format(
'insert into %s
select S.id, ' || select_fields || '
from tmp_relation_add as V
inner join %s as S on (S.sha1 = V.src)
inner join %s as D on (D.sha1 = V.dst)
' || group_entries || '
on conflict ' || on_conflict,
rel_table, src_table, dst_table
);
end;
$$;
create or replace function swh_provenance_relation_get(
rel_table regclass, src_table regclass, dst_table regclass, filter rel_flt, sha1s sha1_git[]
)
returns table (
src sha1_git,
dst sha1_git,
path unix_path
)
language plpgsql
stable
as $$
declare
src_field text;
dst_field text;
proj_unnested text;
filter_inner_result text;
filter_outer_result text;
begin
if rel_table = 'revision_before_revision'::regclass then
src_field := 'prev';
dst_field := 'next';
else
src_field := src_table::text;
dst_field := dst_table::text;
end if;
if src_table in ('content'::regclass, 'directory'::regclass) then
proj_unnested := 'unnest(R.' || dst_field || ') as dst';
else
proj_unnested := 'R.' || dst_field || ' as dst';
end if;
case filter
when 'filter-src'::rel_flt then
filter_inner_result := 'where S.sha1 = any($1)';
filter_outer_result := '';
when 'filter-dst'::rel_flt then
filter_inner_result := '';
filter_outer_result := 'where D.sha1 = any($1)';
else
filter_inner_result := '';
filter_outer_result := '';
end case;
return query execute format(
'select CL.src, D.sha1 as dst, NULL::unix_path as path
from (select S.sha1 as src, ' || proj_unnested || '
from %s as R
inner join %s as S on (S.id = R.' || src_field || ')
' || filter_inner_result || ') as CL
inner join %s as D on (D.id = CL.dst)
' || filter_outer_result,
rel_table, src_table, dst_table
) using sha1s;
end;
$$;
\endif
-- :dbflavor_with_path
\endif
-- :dbflavor_norm

File Metadata

Mime Type
text/x-diff
Expires
Wed, Jun 4, 7:23 PM (1 w, 14 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3320096

Event Timeline