Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/sql/50-func.sql
Show All 34 Lines | |||||
as $$ | as $$ | ||||
create temporary table if not exists tmp_content_mimetype ( | create temporary table if not exists tmp_content_mimetype ( | ||||
like content_mimetype including defaults | like content_mimetype including defaults | ||||
) on commit delete rows; | ) on commit delete rows; | ||||
$$; | $$; | ||||
comment on function swh_mktemp_content_mimetype() IS 'Helper table to add mimetype information'; | comment on function swh_mktemp_content_mimetype() IS 'Helper table to add mimetype information'; | ||||
-- add tmp_content_mimetype entries to content_mimetype, overwriting | -- add tmp_content_mimetype entries to content_mimetype, overwriting duplicates. | ||||
-- duplicates if conflict_update is true, skipping duplicates otherwise. | |||||
-- | -- | ||||
-- If filtering duplicates is in order, the call to | -- If filtering duplicates is in order, the call to | ||||
-- swh_content_mimetype_missing must take place before calling this | -- swh_content_mimetype_missing must take place before calling this | ||||
-- function. | -- function. | ||||
-- | -- | ||||
-- operates in bulk: 0. swh_mktemp(content_mimetype), 1. COPY to tmp_content_mimetype, | -- operates in bulk: 0. swh_mktemp(content_mimetype), 1. COPY to tmp_content_mimetype, | ||||
-- 2. call this function | -- 2. call this function | ||||
create or replace function swh_content_mimetype_add(conflict_update boolean) | create or replace function swh_content_mimetype_add() | ||||
returns bigint | returns bigint | ||||
language plpgsql | language plpgsql | ||||
as $$ | as $$ | ||||
declare | declare | ||||
res bigint; | res bigint; | ||||
begin | begin | ||||
if conflict_update then | |||||
insert into content_mimetype (id, mimetype, encoding, indexer_configuration_id) | insert into content_mimetype (id, mimetype, encoding, indexer_configuration_id) | ||||
select id, mimetype, encoding, indexer_configuration_id | select id, mimetype, encoding, indexer_configuration_id | ||||
from tmp_content_mimetype tcm | from tmp_content_mimetype tcm | ||||
on conflict(id, indexer_configuration_id) | on conflict(id, indexer_configuration_id) | ||||
do update set mimetype = excluded.mimetype, | do update set mimetype = excluded.mimetype, | ||||
encoding = excluded.encoding; | encoding = excluded.encoding; | ||||
else | |||||
insert into content_mimetype (id, mimetype, encoding, indexer_configuration_id) | |||||
select id, mimetype, encoding, indexer_configuration_id | |||||
from tmp_content_mimetype tcm | |||||
on conflict(id, indexer_configuration_id) | |||||
do nothing; | |||||
end if; | |||||
get diagnostics res = ROW_COUNT; | get diagnostics res = ROW_COUNT; | ||||
return res; | return res; | ||||
end | end | ||||
$$; | $$; | ||||
comment on function swh_content_mimetype_add(boolean) IS 'Add new content mimetypes'; | comment on function swh_content_mimetype_add() IS 'Add new content mimetypes'; | ||||
-- add tmp_content_language entries to content_language, overwriting | -- add tmp_content_language entries to content_language, overwriting duplicates. | ||||
-- duplicates if conflict_update is true, skipping duplicates otherwise. | |||||
-- | -- | ||||
-- If filtering duplicates is in order, the call to | -- If filtering duplicates is in order, the call to | ||||
-- swh_content_language_missing must take place before calling this | -- swh_content_language_missing must take place before calling this | ||||
-- function. | -- function. | ||||
-- | -- | ||||
-- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to | -- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to | ||||
-- tmp_content_language, 2. call this function | -- tmp_content_language, 2. call this function | ||||
create or replace function swh_content_language_add(conflict_update boolean) | create or replace function swh_content_language_add() | ||||
returns bigint | returns bigint | ||||
language plpgsql | language plpgsql | ||||
as $$ | as $$ | ||||
declare | declare | ||||
res bigint; | res bigint; | ||||
begin | begin | ||||
if conflict_update then | |||||
insert into content_language (id, lang, indexer_configuration_id) | insert into content_language (id, lang, indexer_configuration_id) | ||||
select id, lang, indexer_configuration_id | select id, lang, indexer_configuration_id | ||||
from tmp_content_language tcl | from tmp_content_language tcl | ||||
on conflict(id, indexer_configuration_id) | on conflict(id, indexer_configuration_id) | ||||
do update set lang = excluded.lang; | do update set lang = excluded.lang; | ||||
else | |||||
insert into content_language (id, lang, indexer_configuration_id) | |||||
select id, lang, indexer_configuration_id | |||||
from tmp_content_language tcl | |||||
on conflict(id, indexer_configuration_id) | |||||
do nothing; | |||||
end if; | |||||
get diagnostics res = ROW_COUNT; | get diagnostics res = ROW_COUNT; | ||||
return res; | return res; | ||||
end | end | ||||
$$; | $$; | ||||
comment on function swh_content_language_add(boolean) IS 'Add new content languages'; | comment on function swh_content_language_add() IS 'Add new content languages'; | ||||
-- create a temporary table for retrieving content_language | -- create a temporary table for retrieving content_language | ||||
create or replace function swh_mktemp_content_language() | create or replace function swh_mktemp_content_language() | ||||
returns void | returns void | ||||
language sql | language sql | ||||
as $$ | as $$ | ||||
create temporary table if not exists tmp_content_language ( | create temporary table if not exists tmp_content_language ( | ||||
like content_language including defaults | like content_language including defaults | ||||
Show All 11 Lines | as $$ | ||||
create temporary table if not exists tmp_content_ctags ( | create temporary table if not exists tmp_content_ctags ( | ||||
like content_ctags including defaults | like content_ctags including defaults | ||||
) on commit delete rows; | ) on commit delete rows; | ||||
$$; | $$; | ||||
comment on function swh_mktemp_content_ctags() is 'Helper table to add content ctags'; | comment on function swh_mktemp_content_ctags() is 'Helper table to add content ctags'; | ||||
-- add tmp_content_ctags entries to content_ctags, overwriting | -- add tmp_content_ctags entries to content_ctags, overwriting duplicates | ||||
-- duplicates if conflict_update is true, skipping duplicates otherwise. | |||||
-- | -- | ||||
-- operates in bulk: 0. swh_mktemp(content_ctags), 1. COPY to tmp_content_ctags, | -- operates in bulk: 0. swh_mktemp(content_ctags), 1. COPY to tmp_content_ctags, | ||||
-- 2. call this function | -- 2. call this function | ||||
create or replace function swh_content_ctags_add(conflict_update boolean) | create or replace function swh_content_ctags_add() | ||||
returns bigint | returns bigint | ||||
language plpgsql | language plpgsql | ||||
as $$ | as $$ | ||||
declare | declare | ||||
res bigint; | res bigint; | ||||
begin | begin | ||||
if conflict_update then | |||||
delete from content_ctags | |||||
where id in (select tmp.id | |||||
from tmp_content_ctags tmp | |||||
inner join indexer_configuration i on i.id=tmp.indexer_configuration_id); | |||||
end if; | |||||
insert into content_ctags (id, name, kind, line, lang, indexer_configuration_id) | insert into content_ctags (id, name, kind, line, lang, indexer_configuration_id) | ||||
select id, name, kind, line, lang, indexer_configuration_id | select id, name, kind, line, lang, indexer_configuration_id | ||||
from tmp_content_ctags tct | from tmp_content_ctags tct | ||||
on conflict(id, hash_sha1(name), kind, line, lang, indexer_configuration_id) | on conflict(id, hash_sha1(name), kind, line, lang, indexer_configuration_id) | ||||
do nothing; | do nothing; | ||||
get diagnostics res = ROW_COUNT; | get diagnostics res = ROW_COUNT; | ||||
return res; | return res; | ||||
end | end | ||||
$$; | $$; | ||||
comment on function swh_content_ctags_add(boolean) IS 'Add new ctags symbols per content'; | comment on function swh_content_ctags_add() IS 'Add new ctags symbols per content'; | ||||
create type content_ctags_signature as ( | create type content_ctags_signature as ( | ||||
id sha1, | id sha1, | ||||
name text, | name text, | ||||
kind text, | kind text, | ||||
line bigint, | line bigint, | ||||
lang ctags_languages, | lang ctags_languages, | ||||
tool_id integer, | tool_id integer, | ||||
Show All 33 Lines | create temporary table if not exists tmp_content_fossology_license ( | ||||
id sha1, | id sha1, | ||||
license text, | license text, | ||||
indexer_configuration_id integer | indexer_configuration_id integer | ||||
) on commit delete rows; | ) on commit delete rows; | ||||
$$; | $$; | ||||
comment on function swh_mktemp_content_fossology_license() is 'Helper table to add content license'; | comment on function swh_mktemp_content_fossology_license() is 'Helper table to add content license'; | ||||
-- add tmp_content_fossology_license entries to content_fossology_license, overwriting | -- add tmp_content_fossology_license entries to content_fossology_license, | ||||
-- duplicates if conflict_update is true, skipping duplicates otherwise. | -- overwriting duplicates. | ||||
-- | -- | ||||
-- operates in bulk: 0. swh_mktemp(content_fossology_license), 1. COPY to | -- operates in bulk: 0. swh_mktemp(content_fossology_license), 1. COPY to | ||||
-- tmp_content_fossology_license, 2. call this function | -- tmp_content_fossology_license, 2. call this function | ||||
create or replace function swh_content_fossology_license_add(conflict_update boolean) | create or replace function swh_content_fossology_license_add() | ||||
returns bigint | returns bigint | ||||
language plpgsql | language plpgsql | ||||
as $$ | as $$ | ||||
declare | declare | ||||
res bigint; | res bigint; | ||||
begin | begin | ||||
-- insert unknown licenses first | -- insert unknown licenses first | ||||
insert into fossology_license (name) | insert into fossology_license (name) | ||||
select distinct license from tmp_content_fossology_license tmp | select distinct license from tmp_content_fossology_license tmp | ||||
where not exists (select 1 from fossology_license where name=tmp.license) | where not exists (select 1 from fossology_license where name=tmp.license) | ||||
on conflict(name) do nothing; | on conflict(name) do nothing; | ||||
if conflict_update then | |||||
insert into content_fossology_license (id, license_id, indexer_configuration_id) | insert into content_fossology_license (id, license_id, indexer_configuration_id) | ||||
select tcl.id, | select tcl.id, | ||||
(select id from fossology_license where name = tcl.license) as license, | (select id from fossology_license where name = tcl.license) as license, | ||||
indexer_configuration_id | indexer_configuration_id | ||||
from tmp_content_fossology_license tcl | from tmp_content_fossology_license tcl | ||||
on conflict(id, license_id, indexer_configuration_id) | on conflict(id, license_id, indexer_configuration_id) | ||||
do update set license_id = excluded.license_id; | do update set license_id = excluded.license_id; | ||||
else | |||||
insert into content_fossology_license (id, license_id, indexer_configuration_id) | |||||
select tcl.id, | |||||
(select id from fossology_license where name = tcl.license) as license, | |||||
indexer_configuration_id | |||||
from tmp_content_fossology_license tcl | |||||
on conflict(id, license_id, indexer_configuration_id) | |||||
do nothing; | |||||
end if; | |||||
get diagnostics res = ROW_COUNT; | get diagnostics res = ROW_COUNT; | ||||
return res; | return res; | ||||
end | end | ||||
$$; | $$; | ||||
comment on function swh_content_fossology_license_add(boolean) IS 'Add new content licenses'; | comment on function swh_content_fossology_license_add() IS 'Add new content licenses'; | ||||
-- content_metadata functions | -- content_metadata functions | ||||
-- add tmp_content_metadata entries to content_metadata, overwriting | -- add tmp_content_metadata entries to content_metadata, overwriting duplicates | ||||
-- duplicates if conflict_update is true, skipping duplicates otherwise. | |||||
-- | -- | ||||
-- If filtering duplicates is in order, the call to | -- If filtering duplicates is in order, the call to | ||||
-- swh_content_metadata_missing must take place before calling this | -- swh_content_metadata_missing must take place before calling this | ||||
-- function. | -- function. | ||||
-- | -- | ||||
-- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to | -- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to | ||||
-- tmp_content_metadata, 2. call this function | -- tmp_content_metadata, 2. call this function | ||||
create or replace function swh_content_metadata_add(conflict_update boolean) | create or replace function swh_content_metadata_add() | ||||
returns bigint | returns bigint | ||||
language plpgsql | language plpgsql | ||||
as $$ | as $$ | ||||
declare | declare | ||||
res bigint; | res bigint; | ||||
begin | begin | ||||
if conflict_update then | |||||
insert into content_metadata (id, metadata, indexer_configuration_id) | insert into content_metadata (id, metadata, indexer_configuration_id) | ||||
select id, metadata, indexer_configuration_id | select id, metadata, indexer_configuration_id | ||||
from tmp_content_metadata tcm | from tmp_content_metadata tcm | ||||
on conflict(id, indexer_configuration_id) | on conflict(id, indexer_configuration_id) | ||||
do update set metadata = excluded.metadata; | do update set metadata = excluded.metadata; | ||||
else | |||||
insert into content_metadata (id, metadata, indexer_configuration_id) | |||||
select id, metadata, indexer_configuration_id | |||||
from tmp_content_metadata tcm | |||||
on conflict(id, indexer_configuration_id) | |||||
do nothing; | |||||
end if; | |||||
get diagnostics res = ROW_COUNT; | get diagnostics res = ROW_COUNT; | ||||
return res; | return res; | ||||
end | end | ||||
$$; | $$; | ||||
comment on function swh_content_metadata_add(boolean) IS 'Add new content metadata'; | comment on function swh_content_metadata_add() IS 'Add new content metadata'; | ||||
-- create a temporary table for retrieving content_metadata | -- create a temporary table for retrieving content_metadata | ||||
create or replace function swh_mktemp_content_metadata() | create or replace function swh_mktemp_content_metadata() | ||||
returns void | returns void | ||||
language sql | language sql | ||||
as $$ | as $$ | ||||
create temporary table if not exists tmp_content_metadata ( | create temporary table if not exists tmp_content_metadata ( | ||||
like content_metadata including defaults | like content_metadata including defaults | ||||
) on commit delete rows; | ) on commit delete rows; | ||||
$$; | $$; | ||||
comment on function swh_mktemp_content_metadata() is 'Helper table to add content metadata'; | comment on function swh_mktemp_content_metadata() is 'Helper table to add content metadata'; | ||||
-- end content_metadata functions | -- end content_metadata functions | ||||
-- add tmp_revision_intrinsic_metadata entries to revision_intrinsic_metadata, | -- add tmp_revision_intrinsic_metadata entries to revision_intrinsic_metadata, | ||||
-- overwriting duplicates if conflict_update is true, skipping duplicates | -- overwriting duplicates. | ||||
-- otherwise. | |||||
-- | -- | ||||
-- If filtering duplicates is in order, the call to | -- If filtering duplicates is in order, the call to | ||||
-- swh_revision_intrinsic_metadata_missing must take place before calling this | -- swh_revision_intrinsic_metadata_missing must take place before calling this | ||||
-- function. | -- function. | ||||
-- | -- | ||||
-- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to | -- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to | ||||
-- tmp_revision_intrinsic_metadata, 2. call this function | -- tmp_revision_intrinsic_metadata, 2. call this function | ||||
create or replace function swh_revision_intrinsic_metadata_add(conflict_update boolean) | create or replace function swh_revision_intrinsic_metadata_add() | ||||
returns bigint | returns bigint | ||||
language plpgsql | language plpgsql | ||||
as $$ | as $$ | ||||
declare | declare | ||||
res bigint; | res bigint; | ||||
begin | begin | ||||
if conflict_update then | |||||
insert into revision_intrinsic_metadata (id, metadata, mappings, indexer_configuration_id) | insert into revision_intrinsic_metadata (id, metadata, mappings, indexer_configuration_id) | ||||
select id, metadata, mappings, indexer_configuration_id | select id, metadata, mappings, indexer_configuration_id | ||||
from tmp_revision_intrinsic_metadata tcm | from tmp_revision_intrinsic_metadata tcm | ||||
on conflict(id, indexer_configuration_id) | on conflict(id, indexer_configuration_id) | ||||
do update set | do update set | ||||
metadata = excluded.metadata, | metadata = excluded.metadata, | ||||
mappings = excluded.mappings; | mappings = excluded.mappings; | ||||
else | |||||
insert into revision_intrinsic_metadata (id, metadata, mappings, indexer_configuration_id) | |||||
select id, metadata, mappings, indexer_configuration_id | |||||
from tmp_revision_intrinsic_metadata tcm | |||||
on conflict(id, indexer_configuration_id) | |||||
do nothing; | |||||
end if; | |||||
get diagnostics res = ROW_COUNT; | get diagnostics res = ROW_COUNT; | ||||
return res; | return res; | ||||
end | end | ||||
$$; | $$; | ||||
comment on function swh_revision_intrinsic_metadata_add(boolean) IS 'Add new revision intrinsic metadata'; | comment on function swh_revision_intrinsic_metadata_add() IS 'Add new revision intrinsic metadata'; | ||||
-- create a temporary table for retrieving revision_intrinsic_metadata | -- create a temporary table for retrieving revision_intrinsic_metadata | ||||
create or replace function swh_mktemp_revision_intrinsic_metadata() | create or replace function swh_mktemp_revision_intrinsic_metadata() | ||||
returns void | returns void | ||||
language sql | language sql | ||||
as $$ | as $$ | ||||
create temporary table if not exists tmp_revision_intrinsic_metadata ( | create temporary table if not exists tmp_revision_intrinsic_metadata ( | ||||
like revision_intrinsic_metadata including defaults | like revision_intrinsic_metadata including defaults | ||||
Show All 21 Lines | as $$ | ||||
create temporary table if not exists tmp_indexer_configuration ( | create temporary table if not exists tmp_indexer_configuration ( | ||||
like indexer_configuration including defaults | like indexer_configuration including defaults | ||||
) on commit delete rows; | ) on commit delete rows; | ||||
alter table tmp_indexer_configuration drop column if exists id; | alter table tmp_indexer_configuration drop column if exists id; | ||||
$$; | $$; | ||||
-- add tmp_indexer_configuration entries to indexer_configuration, | -- add tmp_indexer_configuration entries to indexer_configuration, | ||||
-- skipping duplicates if any. | -- overwriting duplicates if any. | ||||
-- | -- | ||||
-- operates in bulk: 0. create temporary tmp_indexer_configuration, 1. COPY to | -- operates in bulk: 0. create temporary tmp_indexer_configuration, 1. COPY to | ||||
-- it, 2. call this function to insert and filtering out duplicates | -- it, 2. call this function to insert and filtering out duplicates | ||||
create or replace function swh_indexer_configuration_add() | create or replace function swh_indexer_configuration_add() | ||||
returns setof indexer_configuration | returns setof indexer_configuration | ||||
language plpgsql | language plpgsql | ||||
as $$ | as $$ | ||||
begin | begin | ||||
insert into indexer_configuration(tool_name, tool_version, tool_configuration) | insert into indexer_configuration(tool_name, tool_version, tool_configuration) | ||||
select tool_name, tool_version, tool_configuration from tmp_indexer_configuration tmp | select tool_name, tool_version, tool_configuration from tmp_indexer_configuration tmp | ||||
on conflict(tool_name, tool_version, tool_configuration) do nothing; | on conflict(tool_name, tool_version, tool_configuration) do nothing; | ||||
return query | return query | ||||
select id, tool_name, tool_version, tool_configuration | select id, tool_name, tool_version, tool_configuration | ||||
from tmp_indexer_configuration join indexer_configuration | from tmp_indexer_configuration join indexer_configuration | ||||
using(tool_name, tool_version, tool_configuration); | using(tool_name, tool_version, tool_configuration); | ||||
return; | return; | ||||
end | end | ||||
$$; | $$; | ||||
-- add tmp_origin_intrinsic_metadata entries to origin_intrinsic_metadata, | -- add tmp_origin_intrinsic_metadata entries to origin_intrinsic_metadata, | ||||
-- overwriting duplicates if conflict_update is true, skipping duplicates | -- overwriting duplicates. | ||||
-- otherwise. | |||||
-- | -- | ||||
-- If filtering duplicates is in order, the call to | -- If filtering duplicates is in order, the call to | ||||
-- swh_origin_intrinsic_metadata_missing must take place before calling this | -- swh_origin_intrinsic_metadata_missing must take place before calling this | ||||
-- function. | -- function. | ||||
-- | -- | ||||
-- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to | -- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to | ||||
-- tmp_origin_intrinsic_metadata, 2. call this function | -- tmp_origin_intrinsic_metadata, 2. call this function | ||||
create or replace function swh_origin_intrinsic_metadata_add( | create or replace function swh_origin_intrinsic_metadata_add() | ||||
conflict_update boolean) | |||||
returns bigint | returns bigint | ||||
language plpgsql | language plpgsql | ||||
as $$ | as $$ | ||||
declare | declare | ||||
res bigint; | res bigint; | ||||
begin | begin | ||||
perform swh_origin_intrinsic_metadata_compute_tsvector(); | perform swh_origin_intrinsic_metadata_compute_tsvector(); | ||||
if conflict_update then | |||||
insert into origin_intrinsic_metadata (id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings) | insert into origin_intrinsic_metadata (id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings) | ||||
select id, metadata, indexer_configuration_id, from_revision, | select id, metadata, indexer_configuration_id, from_revision, | ||||
metadata_tsvector, mappings | metadata_tsvector, mappings | ||||
from tmp_origin_intrinsic_metadata | from tmp_origin_intrinsic_metadata | ||||
on conflict(id, indexer_configuration_id) | on conflict(id, indexer_configuration_id) | ||||
do update set | do update set | ||||
metadata = excluded.metadata, | metadata = excluded.metadata, | ||||
metadata_tsvector = excluded.metadata_tsvector, | metadata_tsvector = excluded.metadata_tsvector, | ||||
mappings = excluded.mappings, | mappings = excluded.mappings, | ||||
from_revision = excluded.from_revision; | from_revision = excluded.from_revision; | ||||
else | |||||
insert into origin_intrinsic_metadata (id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings) | |||||
select id, metadata, indexer_configuration_id, from_revision, | |||||
metadata_tsvector, mappings | |||||
from tmp_origin_intrinsic_metadata | |||||
on conflict(id, indexer_configuration_id) | |||||
do nothing; | |||||
end if; | |||||
get diagnostics res = ROW_COUNT; | get diagnostics res = ROW_COUNT; | ||||
return res; | return res; | ||||
end | end | ||||
$$; | $$; | ||||
comment on function swh_origin_intrinsic_metadata_add(boolean) IS 'Add new origin intrinsic metadata'; | comment on function swh_origin_intrinsic_metadata_add() IS 'Add new origin intrinsic metadata'; | ||||
-- Compute the metadata_tsvector column in tmp_origin_intrinsic_metadata. | -- Compute the metadata_tsvector column in tmp_origin_intrinsic_metadata. | ||||
-- | -- | ||||
-- It uses the "pg_catalog.simple" dictionary, as it has no stopword, | -- It uses the "pg_catalog.simple" dictionary, as it has no stopword, | ||||
-- so it should be suitable for proper names and non-English text. | -- so it should be suitable for proper names and non-English text. | ||||
create or replace function swh_origin_intrinsic_metadata_compute_tsvector() | create or replace function swh_origin_intrinsic_metadata_compute_tsvector() | ||||
returns void | returns void | ||||
language plpgsql | language plpgsql | ||||
as $$ | as $$ | ||||
begin | begin | ||||
update tmp_origin_intrinsic_metadata | update tmp_origin_intrinsic_metadata | ||||
set metadata_tsvector = to_tsvector('pg_catalog.simple', metadata); | set metadata_tsvector = to_tsvector('pg_catalog.simple', metadata); | ||||
end | end | ||||
$$; | $$; |