Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7343014
D8888.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
16 KB
Subscribers
None
D8888.id.diff
View Options
diff --git a/swh/indexer/sql/20-enums.sql b/swh/indexer/sql/20-enums.sql
--- a/swh/indexer/sql/20-enums.sql
+++ b/swh/indexer/sql/20-enums.sql
@@ -1,100 +0,0 @@
-create type languages as enum ( 'abap', 'abnf', 'actionscript',
- 'actionscript-3', 'ada', 'adl', 'agda', 'alloy', 'ambienttalk',
- 'antlr', 'antlr-with-actionscript-target', 'antlr-with-c#-target',
- 'antlr-with-cpp-target', 'antlr-with-java-target',
- 'antlr-with-objectivec-target', 'antlr-with-perl-target',
- 'antlr-with-python-target', 'antlr-with-ruby-target', 'apacheconf',
- 'apl', 'applescript', 'arduino', 'aspectj', 'aspx-cs', 'aspx-vb',
- 'asymptote', 'autohotkey', 'autoit', 'awk', 'base-makefile', 'bash',
- 'bash-session', 'batchfile', 'bbcode', 'bc', 'befunge',
- 'blitzbasic', 'blitzmax', 'bnf', 'boo', 'boogie', 'brainfuck',
- 'bro', 'bugs', 'c', 'c#', 'c++', 'c-objdump', 'ca65-assembler',
- 'cadl', 'camkes', 'cbm-basic-v2', 'ceylon', 'cfengine3',
- 'cfstatement', 'chaiscript', 'chapel', 'cheetah', 'cirru', 'clay',
- 'clojure', 'clojurescript', 'cmake', 'cobol', 'cobolfree',
- 'coffeescript', 'coldfusion-cfc', 'coldfusion-html', 'common-lisp',
- 'component-pascal', 'coq', 'cpp-objdump', 'cpsa', 'crmsh', 'croc',
- 'cryptol', 'csound-document', 'csound-orchestra', 'csound-score',
- 'css', 'css+django/jinja', 'css+genshi-text', 'css+lasso',
- 'css+mako', 'css+mozpreproc', 'css+myghty', 'css+php', 'css+ruby',
- 'css+smarty', 'cuda', 'cypher', 'cython', 'd', 'd-objdump',
- 'darcs-patch', 'dart', 'debian-control-file', 'debian-sourcelist',
- 'delphi', 'dg', 'diff', 'django/jinja', 'docker', 'dtd', 'duel',
- 'dylan', 'dylan-session', 'dylanlid', 'earl-grey', 'easytrieve',
- 'ebnf', 'ec', 'ecl', 'eiffel', 'elixir', 'elixir-iex-session',
- 'elm', 'emacslisp', 'embedded-ragel', 'erb', 'erlang',
- 'erlang-erl-session', 'evoque', 'ezhil', 'factor', 'fancy',
- 'fantom', 'felix', 'fish', 'fortran', 'fortranfixed', 'foxpro',
- 'fsharp', 'gap', 'gas', 'genshi', 'genshi-text', 'gettext-catalog',
- 'gherkin', 'glsl', 'gnuplot', 'go', 'golo', 'gooddata-cl', 'gosu',
- 'gosu-template', 'groff', 'groovy', 'haml', 'handlebars', 'haskell',
- 'haxe', 'hexdump', 'html', 'html+cheetah', 'html+django/jinja',
- 'html+evoque', 'html+genshi', 'html+handlebars', 'html+lasso',
- 'html+mako', 'html+myghty', 'html+php', 'html+smarty', 'html+twig',
- 'html+velocity', 'http', 'hxml', 'hy', 'hybris', 'idl', 'idris',
- 'igor', 'inform-6', 'inform-6-template', 'inform-7', 'ini', 'io',
- 'ioke', 'irc-logs', 'isabelle', 'j', 'jade', 'jags', 'jasmin',
- 'java', 'java-server-page', 'javascript', 'javascript+cheetah',
- 'javascript+django/jinja', 'javascript+genshi-text',
- 'javascript+lasso', 'javascript+mako', 'javascript+mozpreproc',
- 'javascript+myghty', 'javascript+php', 'javascript+ruby',
- 'javascript+smarty', 'jcl', 'json', 'json-ld', 'julia',
- 'julia-console', 'kal', 'kconfig', 'koka', 'kotlin', 'lasso',
- 'lean', 'lesscss', 'lighttpd-configuration-file', 'limbo', 'liquid',
- 'literate-agda', 'literate-cryptol', 'literate-haskell',
- 'literate-idris', 'livescript', 'llvm', 'logos', 'logtalk', 'lsl',
- 'lua', 'makefile', 'mako', 'maql', 'mask', 'mason', 'mathematica',
- 'matlab', 'matlab-session', 'minid', 'modelica', 'modula-2',
- 'moinmoin/trac-wiki-markup', 'monkey', 'moocode', 'moonscript',
- 'mozhashpreproc', 'mozpercentpreproc', 'mql', 'mscgen',
- 'msdos-session', 'mupad', 'mxml', 'myghty', 'mysql', 'nasm',
- 'nemerle', 'nesc', 'newlisp', 'newspeak',
- 'nginx-configuration-file', 'nimrod', 'nit', 'nix', 'nsis', 'numpy',
- 'objdump', 'objdump-nasm', 'objective-c', 'objective-c++',
- 'objective-j', 'ocaml', 'octave', 'odin', 'ooc', 'opa',
- 'openedge-abl', 'pacmanconf', 'pan', 'parasail', 'pawn', 'perl',
- 'perl6', 'php', 'pig', 'pike', 'pkgconfig', 'pl/pgsql',
- 'postgresql-console-(psql)', 'postgresql-sql-dialect', 'postscript',
- 'povray', 'powershell', 'powershell-session', 'praat', 'prolog',
- 'properties', 'protocol-buffer', 'puppet', 'pypy-log', 'python',
- 'python-3', 'python-3.0-traceback', 'python-console-session',
- 'python-traceback', 'qbasic', 'qml', 'qvto', 'racket', 'ragel',
- 'ragel-in-c-host', 'ragel-in-cpp-host', 'ragel-in-d-host',
- 'ragel-in-java-host', 'ragel-in-objective-c-host',
- 'ragel-in-ruby-host', 'raw-token-data', 'rconsole', 'rd', 'rebol',
- 'red', 'redcode', 'reg', 'resourcebundle', 'restructuredtext',
- 'rexx', 'rhtml', 'roboconf-graph', 'roboconf-instances',
- 'robotframework', 'rpmspec', 'rql', 'rsl', 'ruby',
- 'ruby-irb-session', 'rust', 's', 'sass', 'scala',
- 'scalate-server-page', 'scaml', 'scheme', 'scilab', 'scss', 'shen',
- 'slim', 'smali', 'smalltalk', 'smarty', 'snobol', 'sourcepawn',
- 'sparql', 'sql', 'sqlite3con', 'squidconf', 'stan', 'standard-ml',
- 'supercollider', 'swift', 'swig', 'systemverilog', 'tads-3', 'tap',
- 'tcl', 'tcsh', 'tcsh-session', 'tea', 'termcap', 'terminfo',
- 'terraform', 'tex', 'text-only', 'thrift', 'todotxt',
- 'trafficscript', 'treetop', 'turtle', 'twig', 'typescript',
- 'urbiscript', 'vala', 'vb.net', 'vctreestatus', 'velocity',
- 'verilog', 'vgl', 'vhdl', 'viml', 'x10', 'xml', 'xml+cheetah',
- 'xml+django/jinja', 'xml+evoque', 'xml+lasso', 'xml+mako',
- 'xml+myghty', 'xml+php', 'xml+ruby', 'xml+smarty', 'xml+velocity',
- 'xquery', 'xslt', 'xtend', 'xul+mozpreproc', 'yaml', 'yaml+jinja',
- 'zephir', 'unknown'
-);
-comment on type languages is 'Languages recognized by language indexer';
-
-create type ctags_languages as enum ( 'Ada', 'AnsiblePlaybook', 'Ant',
- 'Asm', 'Asp', 'Autoconf', 'Automake', 'Awk', 'Basic', 'BETA', 'C',
- 'C#', 'C++', 'Clojure', 'Cobol', 'CoffeeScript [disabled]', 'CSS',
- 'ctags', 'D', 'DBusIntrospect', 'Diff', 'DosBatch', 'DTS', 'Eiffel',
- 'Erlang', 'Falcon', 'Flex', 'Fortran', 'gdbinit [disabled]',
- 'Glade', 'Go', 'HTML', 'Iniconf', 'Java', 'JavaProperties',
- 'JavaScript', 'JSON', 'Lisp', 'Lua', 'M4', 'Make', 'man [disabled]',
- 'MatLab', 'Maven2', 'Myrddin', 'ObjectiveC', 'OCaml', 'OldC
- [disabled]', 'OldC++ [disabled]', 'Pascal', 'Perl', 'Perl6', 'PHP',
- 'PlistXML', 'pod', 'Protobuf', 'Python', 'PythonLoggingConfig', 'R',
- 'RelaxNG', 'reStructuredText', 'REXX', 'RpmSpec', 'Ruby', 'Rust',
- 'Scheme', 'Sh', 'SLang', 'SML', 'SQL', 'SVG', 'SystemdUnit',
- 'SystemVerilog', 'Tcl', 'Tex', 'TTCN', 'Vera', 'Verilog', 'VHDL',
- 'Vim', 'WindRes', 'XSLT', 'YACC', 'Yaml', 'YumRepo', 'Zephir'
-);
-comment on type ctags_languages is 'Languages recognized by ctags indexer';
diff --git a/swh/indexer/sql/30-schema.sql b/swh/indexer/sql/30-schema.sql
--- a/swh/indexer/sql/30-schema.sql
+++ b/swh/indexer/sql/30-schema.sql
@@ -36,35 +36,6 @@
comment on column content_mimetype.encoding is 'Raw content encoding';
comment on column content_mimetype.indexer_configuration_id is 'Tool used to compute the information';
--- Language metadata
-create table content_language (
- id sha1 not null,
- lang languages not null,
- indexer_configuration_id bigint not null
-);
-
-comment on table content_language is 'Language information on a raw content';
-comment on column content_language.lang is 'Language information';
-comment on column content_language.indexer_configuration_id is 'Tool used to compute the information';
-
--- ctags information per content
-create table content_ctags (
- id sha1 not null,
- name text not null,
- kind text not null,
- line bigint not null,
- lang ctags_languages not null,
- indexer_configuration_id bigint not null
-);
-
-comment on table content_ctags is 'Ctags information on a raw content';
-comment on column content_ctags.id is 'Content identifier';
-comment on column content_ctags.name is 'Symbol name';
-comment on column content_ctags.kind is 'Symbol kind (function, class, variable, const...)';
-comment on column content_ctags.line is 'Symbol line';
-comment on column content_ctags.lang is 'Language information for that content';
-comment on column content_ctags.indexer_configuration_id is 'Tool used to compute the information';
-
create table fossology_license(
id smallserial,
name text not null
diff --git a/swh/indexer/sql/50-func.sql b/swh/indexer/sql/50-func.sql
--- a/swh/indexer/sql/50-func.sql
+++ b/swh/indexer/sql/50-func.sql
@@ -70,120 +70,6 @@
comment on function swh_content_mimetype_add() IS 'Add new content mimetypes';
--- add tmp_content_language entries to content_language, overwriting duplicates.
---
--- If filtering duplicates is in order, the call to
--- swh_content_language_missing must take place before calling this
--- function.
---
--- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to
--- tmp_content_language, 2. call this function
-create or replace function swh_content_language_add()
- returns bigint
- language plpgsql
-as $$
-declare
- res bigint;
-begin
- insert into content_language (id, lang, indexer_configuration_id)
- select id, lang, indexer_configuration_id
- from tmp_content_language tcl
- order by id, indexer_configuration_id
- on conflict(id, indexer_configuration_id)
- do update set lang = excluded.lang;
-
- get diagnostics res = ROW_COUNT;
- return res;
-end
-$$;
-
-comment on function swh_content_language_add() IS 'Add new content languages';
-
--- create a temporary table for retrieving content_language
-create or replace function swh_mktemp_content_language()
- returns void
- language sql
-as $$
- create temporary table if not exists tmp_content_language (
- like content_language including defaults
- ) on commit delete rows;
-$$;
-
-comment on function swh_mktemp_content_language() is 'Helper table to add content language';
-
-
--- create a temporary table for content_ctags tmp_content_ctags,
-create or replace function swh_mktemp_content_ctags()
- returns void
- language sql
-as $$
- create temporary table if not exists tmp_content_ctags (
- like content_ctags including defaults
- ) on commit delete rows;
-$$;
-
-comment on function swh_mktemp_content_ctags() is 'Helper table to add content ctags';
-
-
--- add tmp_content_ctags entries to content_ctags, overwriting duplicates
---
--- operates in bulk: 0. swh_mktemp(content_ctags), 1. COPY to tmp_content_ctags,
--- 2. call this function
-create or replace function swh_content_ctags_add()
- returns bigint
- language plpgsql
-as $$
-declare
- res bigint;
-begin
- insert into content_ctags (id, name, kind, line, lang, indexer_configuration_id)
- select id, name, kind, line, lang, indexer_configuration_id
- from tmp_content_ctags tct
- order by id, hash_sha1(name), kind, line, lang, indexer_configuration_id
- on conflict(id, hash_sha1(name), kind, line, lang, indexer_configuration_id)
- do nothing;
-
- get diagnostics res = ROW_COUNT;
- return res;
-end
-$$;
-
-comment on function swh_content_ctags_add() IS 'Add new ctags symbols per content';
-
-create type content_ctags_signature as (
- id sha1,
- name text,
- kind text,
- line bigint,
- lang ctags_languages,
- tool_id integer,
- tool_name text,
- tool_version text,
- tool_configuration jsonb
-);
-
--- Search within ctags content.
---
-create or replace function swh_content_ctags_search(
- expression text,
- l integer default 10,
- last_sha1 sha1 default '\x0000000000000000000000000000000000000000')
- returns setof content_ctags_signature
- language sql
-as $$
- select c.id, name, kind, line, lang,
- i.id as tool_id, tool_name, tool_version, tool_configuration
- from content_ctags c
- inner join indexer_configuration i on i.id = c.indexer_configuration_id
- where hash_sha1(name) = hash_sha1(expression)
- and c.id > last_sha1
- order by id
- limit l;
-$$;
-
-comment on function swh_content_ctags_search(text, integer, sha1) IS 'Equality search through ctags'' symbols';
-
-
-- create a temporary table for content_fossology_license tmp_content_fossology_license,
create or replace function swh_mktemp_content_fossology_license()
returns void
@@ -241,7 +127,7 @@
-- swh_content_metadata_missing must take place before calling this
-- function.
--
--- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to
+-- operates in bulk: 0. swh_mktemp(content_metadata), 1. COPY to
-- tmp_content_metadata, 2. call this function
create or replace function swh_content_metadata_add()
returns bigint
@@ -285,7 +171,7 @@
-- swh_directory_intrinsic_metadata_missing must take place before calling this
-- function.
--
--- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to
+-- operates in bulk: 0. swh_mktemp(directory_intrinsic_metadata), 1. COPY to
-- tmp_directory_intrinsic_metadata, 2. call this function
create or replace function swh_directory_intrinsic_metadata_add()
returns bigint
@@ -351,7 +237,7 @@
-- swh_origin_intrinsic_metadata_missing must take place before calling this
-- function.
--
--- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to
+-- operates in bulk: 0. swh_mktemp(origin_intrinsic_metadata), 1. COPY to
-- tmp_origin_intrinsic_metadata, 2. call this function
create or replace function swh_origin_intrinsic_metadata_add()
returns bigint
@@ -425,7 +311,7 @@
-- swh_origin_extrinsic_metadata_missing must take place before calling this
-- function.
--
--- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to
+-- operates in bulk: 0. swh_mktemp(origin_extrinsic_metadata), 1. COPY to
-- tmp_origin_extrinsic_metadata, 2. call this function
create or replace function swh_origin_extrinsic_metadata_add()
returns bigint
diff --git a/swh/indexer/sql/60-indexes.sql b/swh/indexer/sql/60-indexes.sql
--- a/swh/indexer/sql/60-indexes.sql
+++ b/swh/indexer/sql/60-indexes.sql
@@ -10,14 +10,6 @@
create unique index on indexer_configuration(tool_name, tool_version, tool_configuration);
--- content_ctags
-create index on content_ctags(id);
-create index on content_ctags(hash_sha1(name));
-create unique index on content_ctags(id, hash_sha1(name), kind, line, lang, indexer_configuration_id);
-
-alter table content_ctags add constraint content_ctags_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
-alter table content_ctags validate constraint content_ctags_indexer_configuration_id_fkey;
-
-- content_metadata
create unique index content_metadata_pkey on content_metadata(id, indexer_configuration_id);
alter table content_metadata add primary key using index content_metadata_pkey;
@@ -41,13 +33,6 @@
create index on content_mimetype(id) where mimetype like 'text/%';
--- content_language
-create unique index content_language_pkey on content_language(id, indexer_configuration_id);
-alter table content_language add primary key using index content_language_pkey;
-
-alter table content_language add constraint content_language_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
-alter table content_language validate constraint content_language_indexer_configuration_id_fkey;
-
-- content_fossology_license
create unique index content_fossology_license_pkey on content_fossology_license(id, license_id, indexer_configuration_id);
alter table content_fossology_license add primary key using index content_fossology_license_pkey;
diff --git a/swh/indexer/sql/upgrades/137.sql b/swh/indexer/sql/upgrades/137.sql
new file mode 100644
--- /dev/null
+++ b/swh/indexer/sql/upgrades/137.sql
@@ -0,0 +1,23 @@
+-- SWH Indexer DB schema upgrade
+-- from_version: 136
+-- to_version: 137
+-- description: Drop content_language and content_ctags tables and related functions
+
+insert into dbversion(version, release, description)
+ values(137, now(), 'Work In Progress');
+
+drop function swh_content_language_add;
+drop function swh_mktemp_content_language();
+drop function swh_mktemp_content_ctags();
+drop function swh_content_ctags_add();
+drop function swh_content_ctags_search;
+
+drop index content_language_pkey;
+
+drop table content_language;
+drop table content_ctags;
+
+drop type languages;
+drop type ctags_languages;
+drop type content_ctags_signature;
+
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -137,7 +137,7 @@
class IndexerStorage:
"""SWH Indexer Storage Datastore"""
- current_version = 136
+ current_version = 137
def __init__(self, db, min_pool_conns=1, max_pool_conns=10, journal_writer=None):
"""
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mar 17 2025, 6:44 PM (7 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224833
Attached To
D8888: Drop content_language and content_ctags tables and related SQL functions
Event Timeline
Log In to Comment