diff --git a/sql/upgrades/106.sql b/sql/upgrades/106.sql new file mode 100644 index 0000000..fb5a341 --- /dev/null +++ b/sql/upgrades/106.sql @@ -0,0 +1,392 @@ +-- SWH DB schema upgrade +-- from_version: 105 +-- to_version: 106 +-- description: Improve indexer endpoints function to use identifier + +insert into dbversion(version, release, description) + values(106, now(), 'Work In Progress'); + +drop type content_mimetype_signature cascade; +create type content_mimetype_signature as( + id sha1, + mimetype bytea, + encoding bytea, + tool_id integer, + tool_name text, + tool_version text, + tool_configuration jsonb +); + +drop type content_language_signature cascade; +create type content_language_signature as ( + id sha1, + lang languages, + tool_id integer, + tool_name text, + tool_version text, + tool_configuration jsonb +); + +drop type content_ctags_signature cascade; +create type content_ctags_signature as ( + id sha1, + name text, + kind text, + line bigint, + lang ctags_languages, + tool_id integer, + tool_name text, + tool_version text, + tool_configuration jsonb +); + +drop type content_fossology_license_signature cascade; +create type content_fossology_license_signature as ( + id sha1, + tool_id integer, + tool_name text, + tool_version text, + tool_configuration jsonb, + licenses text[] +); + +CREATE OR REPLACE FUNCTION swh_content_ctags_add(conflict_update boolean) RETURNS void + LANGUAGE plpgsql + AS $$ +begin + if conflict_update then + delete from content_ctags + where id in (select tmp.id + from tmp_content_ctags tmp + inner join indexer_configuration i on i.id=tmp.indexer_configuration_id); + end if; + + insert into content_ctags (id, name, kind, line, lang, indexer_configuration_id) + select id, name, kind, line, lang, indexer_configuration_id + from tmp_content_ctags tct + on conflict(id, hash_sha1(name), kind, line, lang, indexer_configuration_id) + do nothing; + return; +end +$$; + +COMMENT ON FUNCTION swh_content_ctags_add(conflict_update boolean) IS 'Add new ctags symbols per content'; + +CREATE OR REPLACE FUNCTION swh_content_ctags_get() RETURNS SETOF content_ctags_signature + LANGUAGE plpgsql + AS $$ +begin + return query + select c.id, c.name, c.kind, c.line, c.lang, + i.id as tool_id, i.tool_name, i.tool_version, i.tool_configuration + from tmp_bytea t + inner join content_ctags c using(id) + inner join indexer_configuration i on i.id = c.indexer_configuration_id + order by line; + return; +end +$$; + +COMMENT ON FUNCTION swh_content_ctags_get() IS 'List content ctags'; + +CREATE OR REPLACE FUNCTION swh_content_ctags_missing() RETURNS SETOF sha1 + LANGUAGE plpgsql + AS $$ +begin + return query + (select id::sha1 from tmp_content_ctags_missing as tmp + where not exists + (select 1 from content_ctags as c + where c.id = tmp.id and c.indexer_configuration_id=tmp.indexer_configuration_id + limit 1)); + return; +end +$$; + +COMMENT ON FUNCTION swh_content_ctags_missing() IS 'Filter missing content ctags'; + +CREATE OR REPLACE FUNCTION swh_content_ctags_search(expression text, l integer default 10, last_sha1 sha1 = '\x0000000000000000000000000000000000000000'::bytea) RETURNS SETOF content_ctags_signature + LANGUAGE sql + AS $$ + select c.id, name, kind, line, lang, + i.id as tool_id, tool_name, tool_version, tool_configuration + from content_ctags c + inner join indexer_configuration i on i.id = c.indexer_configuration_id + where hash_sha1(name) = hash_sha1(expression) + and c.id > last_sha1 + order by id + limit l; +$$; + +COMMENT ON FUNCTION swh_content_ctags_search(expression text, l integer, last_sha1 sha1) IS 'Equality search through ctags'' symbols'; + +CREATE OR REPLACE FUNCTION swh_content_fossology_license_add(conflict_update boolean) RETURNS void + LANGUAGE plpgsql + AS $$ +begin + if conflict_update then + -- delete from content_fossology_license c + -- using tmp_content_fossology_license tmp, indexer_configuration i + -- where c.id = tmp.id and i.id=tmp.indexer_configuration_id + delete from content_fossology_license + where id in (select tmp.id + from tmp_content_fossology_license tmp + inner join indexer_configuration i on i.id=tmp.indexer_configuration_id); + end if; + + insert into content_fossology_license (id, license_id, indexer_configuration_id) + select tcl.id, + (select id from fossology_license where name = tcl.license) as license, + indexer_configuration_id + from tmp_content_fossology_license tcl + on conflict(id, license_id, indexer_configuration_id) + do nothing; + return; +end +$$; + +COMMENT ON FUNCTION swh_content_fossology_license_add(conflict_update boolean) IS 'Add new content licenses'; + +CREATE OR REPLACE FUNCTION swh_content_fossology_license_get() RETURNS SETOF content_fossology_license_signature + LANGUAGE plpgsql + AS $$ +begin + return query + select cl.id, + ic.id as tool_id, + ic.tool_name, + ic.tool_version, + ic.tool_configuration, + array(select name + from fossology_license + where id = ANY(array_agg(cl.license_id))) as licenses + from tmp_bytea tcl + inner join content_fossology_license cl using(id) + inner join indexer_configuration ic on ic.id=cl.indexer_configuration_id + group by cl.id, ic.id, ic.tool_name, ic.tool_version, ic.tool_configuration; + return; +end +$$; + +COMMENT ON FUNCTION swh_content_fossology_license_get() IS 'List content licenses'; + +CREATE OR REPLACE FUNCTION swh_content_fossology_license_missing() RETURNS SETOF sha1 + LANGUAGE plpgsql + AS $$ +begin + return query + (select id::sha1 from tmp_content_fossology_license_missing as tmp + where not exists + (select 1 from content_fossology_license as c + where c.id = tmp.id and c.indexer_configuration_id = tmp.indexer_configuration_id)); + return; +end +$$; + +COMMENT ON FUNCTION swh_content_fossology_license_missing() IS 'Filter missing content licenses'; + +CREATE OR REPLACE FUNCTION swh_content_language_add(conflict_update boolean) RETURNS void + LANGUAGE plpgsql + AS $$ +begin + if conflict_update then + insert into content_language (id, lang, indexer_configuration_id) + select id, lang, indexer_configuration_id + from tmp_content_language tcl + on conflict(id, indexer_configuration_id) + do update set lang = excluded.lang; + + else + insert into content_language (id, lang, indexer_configuration_id) + select id, lang, indexer_configuration_id + from tmp_content_language tcl + on conflict(id, indexer_configuration_id) + do nothing; + end if; + return; +end +$$; + +COMMENT ON FUNCTION swh_content_language_add(conflict_update boolean) IS 'Add new content languages'; + +CREATE OR REPLACE FUNCTION swh_content_language_get() RETURNS SETOF content_language_signature + LANGUAGE plpgsql + AS $$ +begin + return query + select c.id, lang, i.id as tool_id, tool_name, tool_version, tool_configuration + from tmp_bytea t + inner join content_language c on c.id = t.id + inner join indexer_configuration i on i.id=c.indexer_configuration_id; + return; +end +$$; + +COMMENT ON FUNCTION swh_content_language_get() IS 'List content''s language'; + +CREATE OR REPLACE FUNCTION swh_content_language_missing() RETURNS SETOF sha1 + LANGUAGE plpgsql + AS $$ +begin + return query + select id::sha1 from tmp_content_language_missing as tmp + where not exists + (select 1 from content_language as c + where c.id = tmp.id and c.indexer_configuration_id = tmp.indexer_configuration_id); + return; +end +$$; + +COMMENT ON FUNCTION swh_content_language_missing() IS 'Filter missing content languages'; + +CREATE OR REPLACE FUNCTION swh_content_mimetype_add(conflict_update boolean) RETURNS void + LANGUAGE plpgsql + AS $$ +begin + if conflict_update then + insert into content_mimetype (id, mimetype, encoding, indexer_configuration_id) + select id, mimetype, encoding, indexer_configuration_id + from tmp_content_mimetype tcm + on conflict(id, indexer_configuration_id) + do update set mimetype = excluded.mimetype, + encoding = excluded.encoding; + + else + insert into content_mimetype (id, mimetype, encoding, indexer_configuration_id) + select id, mimetype, encoding, indexer_configuration_id + from tmp_content_mimetype tcm + on conflict(id, indexer_configuration_id) do nothing; + end if; + return; +end +$$; + +COMMENT ON FUNCTION swh_content_mimetype_add(conflict_update boolean) IS 'Add new content mimetypes'; + +CREATE OR REPLACE FUNCTION swh_content_mimetype_get() RETURNS SETOF content_mimetype_signature + LANGUAGE plpgsql + AS $$ +begin + return query + select c.id, mimetype, encoding, + i.id as tool_id, tool_name, tool_version, tool_configuration + from tmp_bytea t + inner join content_mimetype c on c.id=t.id + inner join indexer_configuration i on c.indexer_configuration_id=i.id; + return; +end +$$; + +COMMENT ON FUNCTION swh_content_mimetype_get() IS 'List content''s mimetypes'; + +CREATE OR REPLACE FUNCTION swh_content_mimetype_missing() RETURNS SETOF sha1 + LANGUAGE plpgsql + AS $$ +begin + return query + (select id::sha1 from tmp_content_mimetype_missing as tmp + where not exists + (select 1 from content_mimetype as c + where c.id = tmp.id and c.indexer_configuration_id = tmp.indexer_configuration_id)); + return; +end +$$; + +COMMENT ON FUNCTION swh_content_mimetype_missing() IS 'Filter existing mimetype information'; + +CREATE OR REPLACE FUNCTION swh_mktemp_content_ctags() RETURNS void + LANGUAGE sql + AS $$ + create temporary table tmp_content_ctags ( + like content_ctags including defaults + ) on commit drop; +$$; + +COMMENT ON FUNCTION swh_mktemp_content_ctags() IS 'Helper table to add content ctags'; + +CREATE OR REPLACE FUNCTION swh_mktemp_content_ctags_missing() RETURNS void + LANGUAGE sql + AS $$ + create temporary table tmp_content_ctags_missing ( + id sha1, + indexer_configuration_id integer + ) on commit drop; +$$; + +COMMENT ON FUNCTION swh_mktemp_content_ctags_missing() IS 'Helper table to filter missing content ctags'; + +CREATE OR REPLACE FUNCTION swh_mktemp_content_fossology_license() RETURNS void + LANGUAGE sql + AS $$ + create temporary table tmp_content_fossology_license ( + id sha1, + license text, + indexer_configuration_id integer + ) on commit drop; +$$; + +COMMENT ON FUNCTION swh_mktemp_content_fossology_license() IS 'Helper table to add content license'; + +CREATE OR REPLACE FUNCTION swh_mktemp_content_fossology_license_missing() RETURNS void + LANGUAGE sql + AS $$ + create temporary table tmp_content_fossology_license_missing ( + id bytea, + indexer_configuration_id integer + ) on commit drop; +$$; + +COMMENT ON FUNCTION swh_mktemp_content_fossology_license_missing() IS 'Helper table to add content license'; + +CREATE OR REPLACE FUNCTION swh_mktemp_content_language() RETURNS void + LANGUAGE sql + AS $$ + create temporary table tmp_content_language ( + like content_language including defaults + ) on commit drop; +$$; + +COMMENT ON FUNCTION swh_mktemp_content_language() IS 'Helper table to add content language'; + +CREATE OR REPLACE FUNCTION swh_mktemp_content_language_missing() RETURNS void + LANGUAGE sql + AS $$ + create temporary table tmp_content_language_missing ( + id sha1, + indexer_configuration_id integer + ) on commit drop; +$$; + +COMMENT ON FUNCTION swh_mktemp_content_language_missing() IS 'Helper table to filter missing language'; + +CREATE OR REPLACE FUNCTION swh_mktemp_content_mimetype() RETURNS void + LANGUAGE sql + AS $$ + create temporary table tmp_content_mimetype ( + like content_mimetype including defaults + ) on commit drop; +$$; + +COMMENT ON FUNCTION swh_mktemp_content_mimetype() IS 'Helper table to add mimetype information'; + +CREATE OR REPLACE FUNCTION swh_mktemp_content_mimetype_missing() RETURNS void + LANGUAGE sql + AS $$ + create temporary table tmp_content_mimetype_missing ( + id sha1, + indexer_configuration_id bigint + ) on commit drop; +$$; + +COMMENT ON FUNCTION swh_mktemp_content_mimetype_missing() IS 'Helper table to filter existing mimetype information'; + +-- Update indexes + +DROP INDEX indexer_configuration_tool_name_tool_version_idx; + +CREATE UNIQUE INDEX indexer_configuration_tool_name_tool_version_tool_configura_idx ON indexer_configuration USING btree (tool_name, tool_version, tool_configuration); + +-- Update data on indexer configuration + +insert into indexer_configuration(tool_name, tool_version, tool_configuration) +values ('pygments', '2.0.1+dfsg-1.1+deb8u1', '{"type": "library", "debian-package": "python3-pygments", "max_content_size": 10240}');