diff --git a/MANIFEST.in b/MANIFEST.in index e7c46fc..3ddecee 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,6 @@ +include README.md include Makefile include requirements.txt include requirements-swh.txt include version.txt +recursive-include swh/indexer/tests/data *.sql diff --git a/swh/indexer/tests/__init__.py b/swh/indexer/tests/__init__.py index e69de29..dc81528 100644 --- a/swh/indexer/tests/__init__.py +++ b/swh/indexer/tests/__init__.py @@ -0,0 +1,2 @@ +from os import path +DATA_DIR = path.join(path.dirname(__file__), 'data') diff --git a/swh/indexer/tests/data/dumps/swh-indexer.sql b/swh/indexer/tests/data/dumps/swh-indexer.sql new file mode 100644 index 0000000..98d59c1 --- /dev/null +++ b/swh/indexer/tests/data/dumps/swh-indexer.sql @@ -0,0 +1,1801 @@ +-- +-- PostgreSQL database dump +-- + +-- Dumped from database version 10.4 (Debian 10.4-2) +-- Dumped by pg_dump version 10.4 (Debian 10.4-2) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET client_min_messages = warning; +SET row_security = off; + +-- +-- Name: plpgsql; Type: EXTENSION; Schema: -; Owner: - +-- + +CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog; + + +-- +-- Name: EXTENSION plpgsql; Type: COMMENT; Schema: -; Owner: - +-- + +COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language'; + + +-- +-- Name: btree_gist; Type: EXTENSION; Schema: -; Owner: - +-- + +CREATE EXTENSION IF NOT EXISTS btree_gist WITH SCHEMA public; + + +-- +-- Name: EXTENSION btree_gist; Type: COMMENT; Schema: -; Owner: - +-- + +COMMENT ON EXTENSION btree_gist IS 'support for indexing common datatypes in GiST'; + + +-- +-- Name: pgcrypto; Type: EXTENSION; Schema: -; Owner: - +-- + +CREATE EXTENSION IF NOT EXISTS pgcrypto WITH SCHEMA public; + + +-- +-- Name: EXTENSION pgcrypto; Type: COMMENT; Schema: -; Owner: - +-- + +COMMENT ON EXTENSION pgcrypto IS 'cryptographic functions'; + + +-- +-- Name: ctags_languages; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.ctags_languages AS ENUM ( + 'Ada', + 'AnsiblePlaybook', + 'Ant', + 'Asm', + 'Asp', + 'Autoconf', + 'Automake', + 'Awk', + 'Basic', + 'BETA', + 'C', + 'C#', + 'C++', + 'Clojure', + 'Cobol', + 'CoffeeScript [disabled]', + 'CSS', + 'ctags', + 'D', + 'DBusIntrospect', + 'Diff', + 'DosBatch', + 'DTS', + 'Eiffel', + 'Erlang', + 'Falcon', + 'Flex', + 'Fortran', + 'gdbinit [disabled]', + 'Glade', + 'Go', + 'HTML', + 'Iniconf', + 'Java', + 'JavaProperties', + 'JavaScript', + 'JSON', + 'Lisp', + 'Lua', + 'M4', + 'Make', + 'man [disabled]', + 'MatLab', + 'Maven2', + 'Myrddin', + 'ObjectiveC', + 'OCaml', + 'OldC + [disabled]', + 'OldC++ [disabled]', + 'Pascal', + 'Perl', + 'Perl6', + 'PHP', + 'PlistXML', + 'pod', + 'Protobuf', + 'Python', + 'PythonLoggingConfig', + 'R', + 'RelaxNG', + 'reStructuredText', + 'REXX', + 'RpmSpec', + 'Ruby', + 'Rust', + 'Scheme', + 'Sh', + 'SLang', + 'SML', + 'SQL', + 'SVG', + 'SystemdUnit', + 'SystemVerilog', + 'Tcl', + 'Tex', + 'TTCN', + 'Vera', + 'Verilog', + 'VHDL', + 'Vim', + 'WindRes', + 'XSLT', + 'YACC', + 'Yaml', + 'YumRepo', + 'Zephir' +); + + +-- +-- Name: TYPE ctags_languages; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON TYPE public.ctags_languages IS 'Languages recognized by ctags indexer'; + + +-- +-- Name: sha1; Type: DOMAIN; Schema: public; Owner: - +-- + +CREATE DOMAIN public.sha1 AS bytea + CONSTRAINT sha1_check CHECK ((length(VALUE) = 20)); + + +-- +-- Name: content_ctags_signature; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.content_ctags_signature AS ( + id public.sha1, + name text, + kind text, + line bigint, + lang public.ctags_languages, + tool_id integer, + tool_name text, + tool_version text, + tool_configuration jsonb +); + + +-- +-- Name: languages; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.languages AS ENUM ( + 'abap', + 'abnf', + 'actionscript', + 'actionscript-3', + 'ada', + 'adl', + 'agda', + 'alloy', + 'ambienttalk', + 'antlr', + 'antlr-with-actionscript-target', + 'antlr-with-c#-target', + 'antlr-with-cpp-target', + 'antlr-with-java-target', + 'antlr-with-objectivec-target', + 'antlr-with-perl-target', + 'antlr-with-python-target', + 'antlr-with-ruby-target', + 'apacheconf', + 'apl', + 'applescript', + 'arduino', + 'aspectj', + 'aspx-cs', + 'aspx-vb', + 'asymptote', + 'autohotkey', + 'autoit', + 'awk', + 'base-makefile', + 'bash', + 'bash-session', + 'batchfile', + 'bbcode', + 'bc', + 'befunge', + 'blitzbasic', + 'blitzmax', + 'bnf', + 'boo', + 'boogie', + 'brainfuck', + 'bro', + 'bugs', + 'c', + 'c#', + 'c++', + 'c-objdump', + 'ca65-assembler', + 'cadl', + 'camkes', + 'cbm-basic-v2', + 'ceylon', + 'cfengine3', + 'cfstatement', + 'chaiscript', + 'chapel', + 'cheetah', + 'cirru', + 'clay', + 'clojure', + 'clojurescript', + 'cmake', + 'cobol', + 'cobolfree', + 'coffeescript', + 'coldfusion-cfc', + 'coldfusion-html', + 'common-lisp', + 'component-pascal', + 'coq', + 'cpp-objdump', + 'cpsa', + 'crmsh', + 'croc', + 'cryptol', + 'csound-document', + 'csound-orchestra', + 'csound-score', + 'css', + 'css+django/jinja', + 'css+genshi-text', + 'css+lasso', + 'css+mako', + 'css+mozpreproc', + 'css+myghty', + 'css+php', + 'css+ruby', + 'css+smarty', + 'cuda', + 'cypher', + 'cython', + 'd', + 'd-objdump', + 'darcs-patch', + 'dart', + 'debian-control-file', + 'debian-sourcelist', + 'delphi', + 'dg', + 'diff', + 'django/jinja', + 'docker', + 'dtd', + 'duel', + 'dylan', + 'dylan-session', + 'dylanlid', + 'earl-grey', + 'easytrieve', + 'ebnf', + 'ec', + 'ecl', + 'eiffel', + 'elixir', + 'elixir-iex-session', + 'elm', + 'emacslisp', + 'embedded-ragel', + 'erb', + 'erlang', + 'erlang-erl-session', + 'evoque', + 'ezhil', + 'factor', + 'fancy', + 'fantom', + 'felix', + 'fish', + 'fortran', + 'fortranfixed', + 'foxpro', + 'fsharp', + 'gap', + 'gas', + 'genshi', + 'genshi-text', + 'gettext-catalog', + 'gherkin', + 'glsl', + 'gnuplot', + 'go', + 'golo', + 'gooddata-cl', + 'gosu', + 'gosu-template', + 'groff', + 'groovy', + 'haml', + 'handlebars', + 'haskell', + 'haxe', + 'hexdump', + 'html', + 'html+cheetah', + 'html+django/jinja', + 'html+evoque', + 'html+genshi', + 'html+handlebars', + 'html+lasso', + 'html+mako', + 'html+myghty', + 'html+php', + 'html+smarty', + 'html+twig', + 'html+velocity', + 'http', + 'hxml', + 'hy', + 'hybris', + 'idl', + 'idris', + 'igor', + 'inform-6', + 'inform-6-template', + 'inform-7', + 'ini', + 'io', + 'ioke', + 'irc-logs', + 'isabelle', + 'j', + 'jade', + 'jags', + 'jasmin', + 'java', + 'java-server-page', + 'javascript', + 'javascript+cheetah', + 'javascript+django/jinja', + 'javascript+genshi-text', + 'javascript+lasso', + 'javascript+mako', + 'javascript+mozpreproc', + 'javascript+myghty', + 'javascript+php', + 'javascript+ruby', + 'javascript+smarty', + 'jcl', + 'json', + 'json-ld', + 'julia', + 'julia-console', + 'kal', + 'kconfig', + 'koka', + 'kotlin', + 'lasso', + 'lean', + 'lesscss', + 'lighttpd-configuration-file', + 'limbo', + 'liquid', + 'literate-agda', + 'literate-cryptol', + 'literate-haskell', + 'literate-idris', + 'livescript', + 'llvm', + 'logos', + 'logtalk', + 'lsl', + 'lua', + 'makefile', + 'mako', + 'maql', + 'mask', + 'mason', + 'mathematica', + 'matlab', + 'matlab-session', + 'minid', + 'modelica', + 'modula-2', + 'moinmoin/trac-wiki-markup', + 'monkey', + 'moocode', + 'moonscript', + 'mozhashpreproc', + 'mozpercentpreproc', + 'mql', + 'mscgen', + 'msdos-session', + 'mupad', + 'mxml', + 'myghty', + 'mysql', + 'nasm', + 'nemerle', + 'nesc', + 'newlisp', + 'newspeak', + 'nginx-configuration-file', + 'nimrod', + 'nit', + 'nix', + 'nsis', + 'numpy', + 'objdump', + 'objdump-nasm', + 'objective-c', + 'objective-c++', + 'objective-j', + 'ocaml', + 'octave', + 'odin', + 'ooc', + 'opa', + 'openedge-abl', + 'pacmanconf', + 'pan', + 'parasail', + 'pawn', + 'perl', + 'perl6', + 'php', + 'pig', + 'pike', + 'pkgconfig', + 'pl/pgsql', + 'postgresql-console-(psql)', + 'postgresql-sql-dialect', + 'postscript', + 'povray', + 'powershell', + 'powershell-session', + 'praat', + 'prolog', + 'properties', + 'protocol-buffer', + 'puppet', + 'pypy-log', + 'python', + 'python-3', + 'python-3.0-traceback', + 'python-console-session', + 'python-traceback', + 'qbasic', + 'qml', + 'qvto', + 'racket', + 'ragel', + 'ragel-in-c-host', + 'ragel-in-cpp-host', + 'ragel-in-d-host', + 'ragel-in-java-host', + 'ragel-in-objective-c-host', + 'ragel-in-ruby-host', + 'raw-token-data', + 'rconsole', + 'rd', + 'rebol', + 'red', + 'redcode', + 'reg', + 'resourcebundle', + 'restructuredtext', + 'rexx', + 'rhtml', + 'roboconf-graph', + 'roboconf-instances', + 'robotframework', + 'rpmspec', + 'rql', + 'rsl', + 'ruby', + 'ruby-irb-session', + 'rust', + 's', + 'sass', + 'scala', + 'scalate-server-page', + 'scaml', + 'scheme', + 'scilab', + 'scss', + 'shen', + 'slim', + 'smali', + 'smalltalk', + 'smarty', + 'snobol', + 'sourcepawn', + 'sparql', + 'sql', + 'sqlite3con', + 'squidconf', + 'stan', + 'standard-ml', + 'supercollider', + 'swift', + 'swig', + 'systemverilog', + 'tads-3', + 'tap', + 'tcl', + 'tcsh', + 'tcsh-session', + 'tea', + 'termcap', + 'terminfo', + 'terraform', + 'tex', + 'text-only', + 'thrift', + 'todotxt', + 'trafficscript', + 'treetop', + 'turtle', + 'twig', + 'typescript', + 'urbiscript', + 'vala', + 'vb.net', + 'vctreestatus', + 'velocity', + 'verilog', + 'vgl', + 'vhdl', + 'viml', + 'x10', + 'xml', + 'xml+cheetah', + 'xml+django/jinja', + 'xml+evoque', + 'xml+lasso', + 'xml+mako', + 'xml+myghty', + 'xml+php', + 'xml+ruby', + 'xml+smarty', + 'xml+velocity', + 'xquery', + 'xslt', + 'xtend', + 'xul+mozpreproc', + 'yaml', + 'yaml+jinja', + 'zephir', + 'unknown' +); + + +-- +-- Name: TYPE languages; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON TYPE public.languages IS 'Languages recognized by language indexer'; + + +-- +-- Name: sha1_git; Type: DOMAIN; Schema: public; Owner: - +-- + +CREATE DOMAIN public.sha1_git AS bytea + CONSTRAINT sha1_git_check CHECK ((length(VALUE) = 20)); + + +-- +-- Name: hash_sha1(text); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.hash_sha1(text) RETURNS text + LANGUAGE sql IMMUTABLE STRICT + AS $_$ + select encode(public.digest($1, 'sha1'), 'hex') +$_$; + + +-- +-- Name: FUNCTION hash_sha1(text); Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON FUNCTION public.hash_sha1(text) IS 'Compute sha1 hash as text'; + + +-- +-- Name: swh_content_ctags_add(boolean); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.swh_content_ctags_add(conflict_update boolean) RETURNS void + LANGUAGE plpgsql + AS $$ +begin + if conflict_update then + delete from content_ctags + where id in (select tmp.id + from tmp_content_ctags tmp + inner join indexer_configuration i on i.id=tmp.indexer_configuration_id); + end if; + + insert into content_ctags (id, name, kind, line, lang, indexer_configuration_id) + select id, name, kind, line, lang, indexer_configuration_id + from tmp_content_ctags tct + on conflict(id, hash_sha1(name), kind, line, lang, indexer_configuration_id) + do nothing; + return; +end +$$; + + +-- +-- Name: FUNCTION swh_content_ctags_add(conflict_update boolean); Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON FUNCTION public.swh_content_ctags_add(conflict_update boolean) IS 'Add new ctags symbols per content'; + + +-- +-- Name: swh_content_ctags_search(text, integer, public.sha1); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.swh_content_ctags_search(expression text, l integer DEFAULT 10, last_sha1 public.sha1 DEFAULT '\x0000000000000000000000000000000000000000'::bytea) RETURNS SETOF public.content_ctags_signature + LANGUAGE sql + AS $$ + select c.id, name, kind, line, lang, + i.id as tool_id, tool_name, tool_version, tool_configuration + from content_ctags c + inner join indexer_configuration i on i.id = c.indexer_configuration_id + where hash_sha1(name) = hash_sha1(expression) + and c.id > last_sha1 + order by id + limit l; +$$; + + +-- +-- Name: FUNCTION swh_content_ctags_search(expression text, l integer, last_sha1 public.sha1); Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON FUNCTION public.swh_content_ctags_search(expression text, l integer, last_sha1 public.sha1) IS 'Equality search through ctags'' symbols'; + + +-- +-- Name: swh_content_fossology_license_add(boolean); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.swh_content_fossology_license_add(conflict_update boolean) RETURNS void + LANGUAGE plpgsql + AS $$ +begin + -- insert unknown licenses first + insert into fossology_license (name) + select distinct license from tmp_content_fossology_license tmp + where not exists (select 1 from fossology_license where name=tmp.license) + on conflict(name) do nothing; + + if conflict_update then + -- delete from content_fossology_license c + -- using tmp_content_fossology_license tmp, indexer_configuration i + -- where c.id = tmp.id and i.id=tmp.indexer_configuration_id + delete from content_fossology_license + where id in (select tmp.id + from tmp_content_fossology_license tmp + inner join indexer_configuration i on i.id=tmp.indexer_configuration_id); + end if; + + insert into content_fossology_license (id, license_id, indexer_configuration_id) + select tcl.id, + (select id from fossology_license where name = tcl.license) as license, + indexer_configuration_id + from tmp_content_fossology_license tcl + on conflict(id, license_id, indexer_configuration_id) + do nothing; + return; +end +$$; + + +-- +-- Name: FUNCTION swh_content_fossology_license_add(conflict_update boolean); Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON FUNCTION public.swh_content_fossology_license_add(conflict_update boolean) IS 'Add new content licenses'; + + +-- +-- Name: swh_content_language_add(boolean); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.swh_content_language_add(conflict_update boolean) RETURNS void + LANGUAGE plpgsql + AS $$ +begin + if conflict_update then + insert into content_language (id, lang, indexer_configuration_id) + select id, lang, indexer_configuration_id + from tmp_content_language tcl + on conflict(id, indexer_configuration_id) + do update set lang = excluded.lang; + + else + insert into content_language (id, lang, indexer_configuration_id) + select id, lang, indexer_configuration_id + from tmp_content_language tcl + on conflict(id, indexer_configuration_id) + do nothing; + end if; + return; +end +$$; + + +-- +-- Name: FUNCTION swh_content_language_add(conflict_update boolean); Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON FUNCTION public.swh_content_language_add(conflict_update boolean) IS 'Add new content languages'; + + +-- +-- Name: swh_content_metadata_add(boolean); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.swh_content_metadata_add(conflict_update boolean) RETURNS void + LANGUAGE plpgsql + AS $$ +begin + if conflict_update then + insert into content_metadata (id, translated_metadata, indexer_configuration_id) + select id, translated_metadata, indexer_configuration_id + from tmp_content_metadata tcm + on conflict(id, indexer_configuration_id) + do update set translated_metadata = excluded.translated_metadata; + + else + insert into content_metadata (id, translated_metadata, indexer_configuration_id) + select id, translated_metadata, indexer_configuration_id + from tmp_content_metadata tcm + on conflict(id, indexer_configuration_id) + do nothing; + end if; + return; +end +$$; + + +-- +-- Name: FUNCTION swh_content_metadata_add(conflict_update boolean); Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON FUNCTION public.swh_content_metadata_add(conflict_update boolean) IS 'Add new content metadata'; + + +-- +-- Name: swh_content_mimetype_add(boolean); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.swh_content_mimetype_add(conflict_update boolean) RETURNS void + LANGUAGE plpgsql + AS $$ +begin + if conflict_update then + insert into content_mimetype (id, mimetype, encoding, indexer_configuration_id) + select id, mimetype, encoding, indexer_configuration_id + from tmp_content_mimetype tcm + on conflict(id, indexer_configuration_id) + do update set mimetype = excluded.mimetype, + encoding = excluded.encoding; + + else + insert into content_mimetype (id, mimetype, encoding, indexer_configuration_id) + select id, mimetype, encoding, indexer_configuration_id + from tmp_content_mimetype tcm + on conflict(id, indexer_configuration_id) do nothing; + end if; + return; +end +$$; + + +-- +-- Name: FUNCTION swh_content_mimetype_add(conflict_update boolean); Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON FUNCTION public.swh_content_mimetype_add(conflict_update boolean) IS 'Add new content mimetypes'; + + +SET default_tablespace = ''; + +SET default_with_oids = false; + +-- +-- Name: indexer_configuration; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.indexer_configuration ( + id integer NOT NULL, + tool_name text NOT NULL, + tool_version text NOT NULL, + tool_configuration jsonb +); + + +-- +-- Name: TABLE indexer_configuration; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON TABLE public.indexer_configuration IS 'Indexer''s configuration version'; + + +-- +-- Name: COLUMN indexer_configuration.id; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.indexer_configuration.id IS 'Tool identifier'; + + +-- +-- Name: COLUMN indexer_configuration.tool_version; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.indexer_configuration.tool_version IS 'Tool version'; + + +-- +-- Name: COLUMN indexer_configuration.tool_configuration; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.indexer_configuration.tool_configuration IS 'Tool configuration: command line, flags, etc...'; + + +-- +-- Name: swh_indexer_configuration_add(); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.swh_indexer_configuration_add() RETURNS SETOF public.indexer_configuration + LANGUAGE plpgsql + AS $$ +begin + insert into indexer_configuration(tool_name, tool_version, tool_configuration) + select tool_name, tool_version, tool_configuration from tmp_indexer_configuration tmp + on conflict(tool_name, tool_version, tool_configuration) do nothing; + + return query + select id, tool_name, tool_version, tool_configuration + from tmp_indexer_configuration join indexer_configuration + using(tool_name, tool_version, tool_configuration); + + return; +end +$$; + + +-- +-- Name: swh_mktemp(regclass); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.swh_mktemp(tblname regclass) RETURNS void + LANGUAGE plpgsql + AS $_$ +begin + execute format(' + create temporary table tmp_%1$I + (like %1$I including defaults) + on commit drop; + alter table tmp_%1$I drop column if exists object_id; + ', tblname); + return; +end +$_$; + + +-- +-- Name: swh_mktemp_content_ctags(); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.swh_mktemp_content_ctags() RETURNS void + LANGUAGE sql + AS $$ + create temporary table tmp_content_ctags ( + like content_ctags including defaults + ) on commit drop; +$$; + + +-- +-- Name: FUNCTION swh_mktemp_content_ctags(); Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON FUNCTION public.swh_mktemp_content_ctags() IS 'Helper table to add content ctags'; + + +-- +-- Name: swh_mktemp_content_fossology_license(); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.swh_mktemp_content_fossology_license() RETURNS void + LANGUAGE sql + AS $$ + create temporary table tmp_content_fossology_license ( + id sha1, + license text, + indexer_configuration_id integer + ) on commit drop; +$$; + + +-- +-- Name: FUNCTION swh_mktemp_content_fossology_license(); Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON FUNCTION public.swh_mktemp_content_fossology_license() IS 'Helper table to add content license'; + + +-- +-- Name: swh_mktemp_content_language(); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.swh_mktemp_content_language() RETURNS void + LANGUAGE sql + AS $$ + create temporary table tmp_content_language ( + like content_language including defaults + ) on commit drop; +$$; + + +-- +-- Name: FUNCTION swh_mktemp_content_language(); Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON FUNCTION public.swh_mktemp_content_language() IS 'Helper table to add content language'; + + +-- +-- Name: swh_mktemp_content_metadata(); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.swh_mktemp_content_metadata() RETURNS void + LANGUAGE sql + AS $$ + create temporary table tmp_content_metadata ( + like content_metadata including defaults + ) on commit drop; +$$; + + +-- +-- Name: FUNCTION swh_mktemp_content_metadata(); Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON FUNCTION public.swh_mktemp_content_metadata() IS 'Helper table to add content metadata'; + + +-- +-- Name: swh_mktemp_content_mimetype(); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.swh_mktemp_content_mimetype() RETURNS void + LANGUAGE sql + AS $$ + create temporary table tmp_content_mimetype ( + like content_mimetype including defaults + ) on commit drop; +$$; + + +-- +-- Name: FUNCTION swh_mktemp_content_mimetype(); Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON FUNCTION public.swh_mktemp_content_mimetype() IS 'Helper table to add mimetype information'; + + +-- +-- Name: swh_mktemp_indexer_configuration(); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.swh_mktemp_indexer_configuration() RETURNS void + LANGUAGE sql + AS $$ + create temporary table tmp_indexer_configuration ( + like indexer_configuration including defaults + ) on commit drop; + alter table tmp_indexer_configuration drop column id; +$$; + + +-- +-- Name: swh_mktemp_revision_metadata(); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.swh_mktemp_revision_metadata() RETURNS void + LANGUAGE sql + AS $$ + create temporary table tmp_revision_metadata ( + like revision_metadata including defaults + ) on commit drop; +$$; + + +-- +-- Name: FUNCTION swh_mktemp_revision_metadata(); Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON FUNCTION public.swh_mktemp_revision_metadata() IS 'Helper table to add revision metadata'; + + +-- +-- Name: swh_revision_metadata_add(boolean); Type: FUNCTION; Schema: public; Owner: - +-- + +CREATE FUNCTION public.swh_revision_metadata_add(conflict_update boolean) RETURNS void + LANGUAGE plpgsql + AS $$ +begin + if conflict_update then + insert into revision_metadata (id, translated_metadata, indexer_configuration_id) + select id, translated_metadata, indexer_configuration_id + from tmp_revision_metadata tcm + on conflict(id, indexer_configuration_id) + do update set translated_metadata = excluded.translated_metadata; + + else + insert into revision_metadata (id, translated_metadata, indexer_configuration_id) + select id, translated_metadata, indexer_configuration_id + from tmp_revision_metadata tcm + on conflict(id, indexer_configuration_id) + do nothing; + end if; + return; +end +$$; + + +-- +-- Name: FUNCTION swh_revision_metadata_add(conflict_update boolean); Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON FUNCTION public.swh_revision_metadata_add(conflict_update boolean) IS 'Add new revision metadata'; + + +-- +-- Name: content_ctags; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.content_ctags ( + id public.sha1 NOT NULL, + name text NOT NULL, + kind text NOT NULL, + line bigint NOT NULL, + lang public.ctags_languages NOT NULL, + indexer_configuration_id bigint NOT NULL +); + + +-- +-- Name: TABLE content_ctags; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON TABLE public.content_ctags IS 'Ctags information on a raw content'; + + +-- +-- Name: COLUMN content_ctags.id; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_ctags.id IS 'Content identifier'; + + +-- +-- Name: COLUMN content_ctags.name; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_ctags.name IS 'Symbol name'; + + +-- +-- Name: COLUMN content_ctags.kind; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_ctags.kind IS 'Symbol kind (function, class, variable, const...)'; + + +-- +-- Name: COLUMN content_ctags.line; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_ctags.line IS 'Symbol line'; + + +-- +-- Name: COLUMN content_ctags.lang; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_ctags.lang IS 'Language information for that content'; + + +-- +-- Name: COLUMN content_ctags.indexer_configuration_id; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_ctags.indexer_configuration_id IS 'Tool used to compute the information'; + + +-- +-- Name: content_fossology_license; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.content_fossology_license ( + id public.sha1 NOT NULL, + license_id smallint NOT NULL, + indexer_configuration_id bigint NOT NULL +); + + +-- +-- Name: TABLE content_fossology_license; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON TABLE public.content_fossology_license IS 'license associated to a raw content'; + + +-- +-- Name: COLUMN content_fossology_license.id; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_fossology_license.id IS 'Raw content identifier'; + + +-- +-- Name: COLUMN content_fossology_license.license_id; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_fossology_license.license_id IS 'One of the content''s license identifier'; + + +-- +-- Name: COLUMN content_fossology_license.indexer_configuration_id; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_fossology_license.indexer_configuration_id IS 'Tool used to compute the information'; + + +-- +-- Name: content_fossology_license_license_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.content_fossology_license_license_id_seq + AS smallint + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: content_fossology_license_license_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.content_fossology_license_license_id_seq OWNED BY public.content_fossology_license.license_id; + + +-- +-- Name: content_language; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.content_language ( + id public.sha1 NOT NULL, + lang public.languages NOT NULL, + indexer_configuration_id bigint NOT NULL +); + + +-- +-- Name: TABLE content_language; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON TABLE public.content_language IS 'Language information on a raw content'; + + +-- +-- Name: COLUMN content_language.lang; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_language.lang IS 'Language information'; + + +-- +-- Name: COLUMN content_language.indexer_configuration_id; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_language.indexer_configuration_id IS 'Tool used to compute the information'; + + +-- +-- Name: content_metadata; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.content_metadata ( + id public.sha1 NOT NULL, + translated_metadata jsonb NOT NULL, + indexer_configuration_id bigint NOT NULL +); + + +-- +-- Name: TABLE content_metadata; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON TABLE public.content_metadata IS 'metadata semantically translated from a content file'; + + +-- +-- Name: COLUMN content_metadata.id; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_metadata.id IS 'sha1 of content file'; + + +-- +-- Name: COLUMN content_metadata.translated_metadata; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_metadata.translated_metadata IS 'result of translation with defined format'; + + +-- +-- Name: COLUMN content_metadata.indexer_configuration_id; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_metadata.indexer_configuration_id IS 'tool used for translation'; + + +-- +-- Name: content_mimetype; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.content_mimetype ( + id public.sha1 NOT NULL, + mimetype bytea NOT NULL, + encoding bytea NOT NULL, + indexer_configuration_id bigint NOT NULL +); + + +-- +-- Name: TABLE content_mimetype; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON TABLE public.content_mimetype IS 'Metadata associated to a raw content'; + + +-- +-- Name: COLUMN content_mimetype.mimetype; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_mimetype.mimetype IS 'Raw content Mimetype'; + + +-- +-- Name: COLUMN content_mimetype.encoding; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_mimetype.encoding IS 'Raw content encoding'; + + +-- +-- Name: COLUMN content_mimetype.indexer_configuration_id; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.content_mimetype.indexer_configuration_id IS 'Tool used to compute the information'; + + +-- +-- Name: dbversion; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.dbversion ( + version integer NOT NULL, + release timestamp with time zone, + description text +); + + +-- +-- Name: fossology_license; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.fossology_license ( + id smallint NOT NULL, + name text NOT NULL +); + + +-- +-- Name: TABLE fossology_license; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON TABLE public.fossology_license IS 'Possible license recognized by license indexer'; + + +-- +-- Name: COLUMN fossology_license.id; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.fossology_license.id IS 'License identifier'; + + +-- +-- Name: COLUMN fossology_license.name; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.fossology_license.name IS 'License name'; + + +-- +-- Name: fossology_license_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.fossology_license_id_seq + AS smallint + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: fossology_license_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.fossology_license_id_seq OWNED BY public.fossology_license.id; + + +-- +-- Name: indexer_configuration_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.indexer_configuration_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: indexer_configuration_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.indexer_configuration_id_seq OWNED BY public.indexer_configuration.id; + + +-- +-- Name: origin_metadata_translation; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.origin_metadata_translation ( + id bigint NOT NULL, + result jsonb, + tool_id bigint +); + + +-- +-- Name: TABLE origin_metadata_translation; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON TABLE public.origin_metadata_translation IS 'keeps translated for an origin_metadata entry'; + + +-- +-- Name: COLUMN origin_metadata_translation.id; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.origin_metadata_translation.id IS 'the entry id in origin_metadata'; + + +-- +-- Name: COLUMN origin_metadata_translation.result; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.origin_metadata_translation.result IS 'translated_metadata result after translation with tool'; + + +-- +-- Name: COLUMN origin_metadata_translation.tool_id; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.origin_metadata_translation.tool_id IS 'tool used for translation'; + + +-- +-- Name: origin_metadata_translation_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.origin_metadata_translation_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: origin_metadata_translation_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.origin_metadata_translation_id_seq OWNED BY public.origin_metadata_translation.id; + + +-- +-- Name: revision_metadata; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.revision_metadata ( + id public.sha1_git NOT NULL, + translated_metadata jsonb NOT NULL, + indexer_configuration_id bigint NOT NULL +); + + +-- +-- Name: TABLE revision_metadata; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON TABLE public.revision_metadata IS 'metadata semantically detected and translated in a revision'; + + +-- +-- Name: COLUMN revision_metadata.id; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.revision_metadata.id IS 'sha1_git of revision'; + + +-- +-- Name: COLUMN revision_metadata.translated_metadata; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.revision_metadata.translated_metadata IS 'result of detection and translation with defined format'; + + +-- +-- Name: COLUMN revision_metadata.indexer_configuration_id; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON COLUMN public.revision_metadata.indexer_configuration_id IS 'tool used for detection'; + + +-- +-- Name: content_fossology_license license_id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.content_fossology_license ALTER COLUMN license_id SET DEFAULT nextval('public.content_fossology_license_license_id_seq'::regclass); + + +-- +-- Name: fossology_license id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.fossology_license ALTER COLUMN id SET DEFAULT nextval('public.fossology_license_id_seq'::regclass); + + +-- +-- Name: indexer_configuration id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.indexer_configuration ALTER COLUMN id SET DEFAULT nextval('public.indexer_configuration_id_seq'::regclass); + + +-- +-- Name: origin_metadata_translation id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.origin_metadata_translation ALTER COLUMN id SET DEFAULT nextval('public.origin_metadata_translation_id_seq'::regclass); + + +-- +-- Data for Name: content_ctags; Type: TABLE DATA; Schema: public; Owner: - +-- + +COPY public.content_ctags (id, name, kind, line, lang, indexer_configuration_id) FROM stdin; +\. + + +-- +-- Data for Name: content_fossology_license; Type: TABLE DATA; Schema: public; Owner: - +-- + +COPY public.content_fossology_license (id, license_id, indexer_configuration_id) FROM stdin; +\. + + +-- +-- Data for Name: content_language; Type: TABLE DATA; Schema: public; Owner: - +-- + +COPY public.content_language (id, lang, indexer_configuration_id) FROM stdin; +\. + + +-- +-- Data for Name: content_metadata; Type: TABLE DATA; Schema: public; Owner: - +-- + +COPY public.content_metadata (id, translated_metadata, indexer_configuration_id) FROM stdin; +\. + + +-- +-- Data for Name: content_mimetype; Type: TABLE DATA; Schema: public; Owner: - +-- + +COPY public.content_mimetype (id, mimetype, encoding, indexer_configuration_id) FROM stdin; +\. + + +-- +-- Data for Name: dbversion; Type: TABLE DATA; Schema: public; Owner: - +-- + +COPY public.dbversion (version, release, description) FROM stdin; +115 2018-06-22 18:02:38.144382+02 Work In Progress +\. + + +-- +-- Data for Name: fossology_license; Type: TABLE DATA; Schema: public; Owner: - +-- + +COPY public.fossology_license (id, name) FROM stdin; +\. + + +-- +-- Data for Name: indexer_configuration; Type: TABLE DATA; Schema: public; Owner: - +-- + +COPY public.indexer_configuration (id, tool_name, tool_version, tool_configuration) FROM stdin; +1 nomos 3.1.0rc2-31-ga2cbb8c {"command_line": "nomossa "} +2 file 5.22 {"command_line": "file --mime "} +3 universal-ctags ~git7859817b {"command_line": "ctags --fields=+lnz --sort=no --links=no --output-format=json "} +4 pygments 2.0.1+dfsg-1.1+deb8u1 {"type": "library", "debian-package": "python3-pygments"} +5 pygments 2.0.1+dfsg-1.1+deb8u1 {"type": "library", "debian-package": "python3-pygments", "max_content_size": 10240} +6 swh-metadata-translator 0.0.1 {"type": "local", "context": "npm"} +7 swh-metadata-detector 0.0.1 {"type": "local", "context": ["npm", "codemeta"]} +8 swh-deposit 0.0.1 {"sword_version": "2"} +9 file 1:5.30-1+deb9u1 {"type": "library", "debian-package": "python3-magic"} +\. + + +-- +-- Data for Name: origin_metadata_translation; Type: TABLE DATA; Schema: public; Owner: - +-- + +COPY public.origin_metadata_translation (id, result, tool_id) FROM stdin; +\. + + +-- +-- Data for Name: revision_metadata; Type: TABLE DATA; Schema: public; Owner: - +-- + +COPY public.revision_metadata (id, translated_metadata, indexer_configuration_id) FROM stdin; +\. + + +-- +-- Name: content_fossology_license_license_id_seq; Type: SEQUENCE SET; Schema: public; Owner: - +-- + +SELECT pg_catalog.setval('public.content_fossology_license_license_id_seq', 1, false); + + +-- +-- Name: fossology_license_id_seq; Type: SEQUENCE SET; Schema: public; Owner: - +-- + +SELECT pg_catalog.setval('public.fossology_license_id_seq', 1, false); + + +-- +-- Name: indexer_configuration_id_seq; Type: SEQUENCE SET; Schema: public; Owner: - +-- + +SELECT pg_catalog.setval('public.indexer_configuration_id_seq', 9, true); + + +-- +-- Name: origin_metadata_translation_id_seq; Type: SEQUENCE SET; Schema: public; Owner: - +-- + +SELECT pg_catalog.setval('public.origin_metadata_translation_id_seq', 1, false); + + +-- +-- Name: content_fossology_license content_fossology_license_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.content_fossology_license + ADD CONSTRAINT content_fossology_license_pkey PRIMARY KEY (id, license_id, indexer_configuration_id); + + +-- +-- Name: content_language content_language_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.content_language + ADD CONSTRAINT content_language_pkey PRIMARY KEY (id, indexer_configuration_id); + + +-- +-- Name: content_metadata content_metadata_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.content_metadata + ADD CONSTRAINT content_metadata_pkey PRIMARY KEY (id, indexer_configuration_id); + + +-- +-- Name: content_mimetype content_mimetype_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.content_mimetype + ADD CONSTRAINT content_mimetype_pkey PRIMARY KEY (id, indexer_configuration_id); + + +-- +-- Name: dbversion dbversion_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.dbversion + ADD CONSTRAINT dbversion_pkey PRIMARY KEY (version); + + +-- +-- Name: fossology_license fossology_license_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.fossology_license + ADD CONSTRAINT fossology_license_pkey PRIMARY KEY (id); + + +-- +-- Name: indexer_configuration indexer_configuration_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.indexer_configuration + ADD CONSTRAINT indexer_configuration_pkey PRIMARY KEY (id); + + +-- +-- Name: revision_metadata revision_metadata_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.revision_metadata + ADD CONSTRAINT revision_metadata_pkey PRIMARY KEY (id, indexer_configuration_id); + + +-- +-- Name: content_ctags_hash_sha1_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX content_ctags_hash_sha1_idx ON public.content_ctags USING btree (public.hash_sha1(name)); + + +-- +-- Name: content_ctags_id_hash_sha1_kind_line_lang_indexer_configura_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX content_ctags_id_hash_sha1_kind_line_lang_indexer_configura_idx ON public.content_ctags USING btree (id, public.hash_sha1(name), kind, line, lang, indexer_configuration_id); + + +-- +-- Name: content_ctags_id_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX content_ctags_id_idx ON public.content_ctags USING btree (id); + + +-- +-- Name: fossology_license_name_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX fossology_license_name_idx ON public.fossology_license USING btree (name); + + +-- +-- Name: indexer_configuration_tool_name_tool_version_tool_configura_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX indexer_configuration_tool_name_tool_version_tool_configura_idx ON public.indexer_configuration USING btree (tool_name, tool_version, tool_configuration); + + +-- +-- Name: content_ctags content_ctags_indexer_configuration_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.content_ctags + ADD CONSTRAINT content_ctags_indexer_configuration_id_fkey FOREIGN KEY (indexer_configuration_id) REFERENCES public.indexer_configuration(id); + + +-- +-- Name: content_fossology_license content_fossology_license_indexer_configuration_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.content_fossology_license + ADD CONSTRAINT content_fossology_license_indexer_configuration_id_fkey FOREIGN KEY (indexer_configuration_id) REFERENCES public.indexer_configuration(id); + + +-- +-- Name: content_fossology_license content_fossology_license_license_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.content_fossology_license + ADD CONSTRAINT content_fossology_license_license_id_fkey FOREIGN KEY (license_id) REFERENCES public.fossology_license(id); + + +-- +-- Name: content_language content_language_indexer_configuration_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.content_language + ADD CONSTRAINT content_language_indexer_configuration_id_fkey FOREIGN KEY (indexer_configuration_id) REFERENCES public.indexer_configuration(id); + + +-- +-- Name: content_metadata content_metadata_indexer_configuration_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.content_metadata + ADD CONSTRAINT content_metadata_indexer_configuration_id_fkey FOREIGN KEY (indexer_configuration_id) REFERENCES public.indexer_configuration(id); + + +-- +-- Name: content_mimetype content_mimetype_indexer_configuration_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.content_mimetype + ADD CONSTRAINT content_mimetype_indexer_configuration_id_fkey FOREIGN KEY (indexer_configuration_id) REFERENCES public.indexer_configuration(id); + + +-- +-- Name: revision_metadata revision_metadata_indexer_configuration_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.revision_metadata + ADD CONSTRAINT revision_metadata_indexer_configuration_id_fkey FOREIGN KEY (indexer_configuration_id) REFERENCES public.indexer_configuration(id); + + +-- +-- PostgreSQL database dump complete +-- + diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py index 7b97b61..470c470 100644 --- a/swh/indexer/tests/storage/test_storage.py +++ b/swh/indexer/tests/storage/test_storage.py @@ -1,1487 +1,1484 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pathlib import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.model.hashutil import hash_to_bytes from swh.indexer.storage import get_indexer_storage from swh.core.tests.db_testing import DbTestFixture - - -PATH_TO_STORAGE_TEST_DATA = '../../../../../swh-storage-testdata' +from swh.indexer.tests import DATA_DIR class StorageTestFixture: """Mix this in a test subject class to get Storage testing support. This fixture requires to come before DbTestFixture in the inheritance list as it uses its methods to setup its own internal database. Usage example: class TestStorage(StorageTestFixture, DbTestFixture): ... """ TEST_STORAGE_DB_NAME = 'softwareheritage-test-indexer' @classmethod def setUpClass(cls): if not hasattr(cls, 'DB_TEST_FIXTURE_IMPORTED'): raise RuntimeError("StorageTestFixture needs to be followed by " "DbTestFixture in the inheritance list.") - test_dir = pathlib.Path(__file__).absolute().parent - test_data_dir = test_dir / PATH_TO_STORAGE_TEST_DATA - test_db_dump = (test_data_dir / 'dumps/swh-indexer.dump').absolute() - cls.add_db(cls.TEST_STORAGE_DB_NAME, str(test_db_dump), 'pg_dump') + test_data_dir = pathlib.Path(DATA_DIR).absolute() + test_db_dump = test_data_dir / 'dumps/swh-indexer.sql' + cls.add_db(cls.TEST_STORAGE_DB_NAME, str(test_db_dump), 'psql') super().setUpClass() def setUp(self): super().setUp() self.storage_config = { 'cls': 'local', 'args': { 'db': 'dbname=%s' % self.TEST_STORAGE_DB_NAME, }, } self.storage = get_indexer_storage(**self.storage_config) def tearDown(self): self.storage = None super().tearDown() def reset_storage_tables(self): excluded = {'indexer_configuration'} self.reset_db_tables(self.TEST_STORAGE_DB_NAME, excluded=excluded) db = self.test_db[self.TEST_STORAGE_DB_NAME] db.conn.commit() @attr('db') class BaseTestStorage(StorageTestFixture, DbTestFixture): def setUp(self): super().setUp() self.sha1_1 = hash_to_bytes('34973274ccef6ab4dfaaf86599792fa9c3fe4689') self.sha1_2 = hash_to_bytes('61c2b3a30496d329e21af70dd2d7e097046d07b7') self.revision_id_1 = hash_to_bytes( '7026b7c1a2af56521e951c01ed20f255fa054238') self.revision_id_2 = hash_to_bytes( '7026b7c1a2af56521e9587659012345678904321') cur = self.test_db[self.TEST_STORAGE_DB_NAME].cursor tools = {} cur.execute(''' select tool_name, id, tool_version, tool_configuration from indexer_configuration order by id''') for row in cur.fetchall(): key = row[0] while key in tools: key = '_' + key tools[key] = { 'id': row[1], 'name': row[0], 'version': row[2], 'configuration': row[3] } self.tools = tools def tearDown(self): self.reset_storage_tables() super().tearDown() @attr('db') class CommonTestStorage(BaseTestStorage): """Base class for Indexer Storage testing. """ @istest def check_config(self): self.assertTrue(self.storage.check_config(check_write=True)) self.assertTrue(self.storage.check_config(check_write=False)) @istest def content_mimetype_missing(self): # given tool_id = self.tools['file']['id'] mimetypes = [ { 'id': self.sha1_1, 'indexer_configuration_id': tool_id, }, { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, }] # when actual_missing = self.storage.content_mimetype_missing(mimetypes) # then self.assertEqual(list(actual_missing), [ self.sha1_1, self.sha1_2, ]) # given self.storage.content_mimetype_add([{ 'id': self.sha1_2, 'mimetype': b'text/plain', 'encoding': b'utf-8', 'indexer_configuration_id': tool_id, }]) # when actual_missing = self.storage.content_mimetype_missing(mimetypes) # then self.assertEqual(list(actual_missing), [self.sha1_1]) @istest def content_mimetype_add__drop_duplicate(self): # given tool_id = self.tools['file']['id'] mimetype_v1 = { 'id': self.sha1_2, 'mimetype': b'text/plain', 'encoding': b'utf-8', 'indexer_configuration_id': tool_id, } # given self.storage.content_mimetype_add([mimetype_v1]) # when actual_mimetypes = list(self.storage.content_mimetype_get( [self.sha1_2])) # then expected_mimetypes_v1 = [{ 'id': self.sha1_2, 'mimetype': b'text/plain', 'encoding': b'utf-8', 'tool': self.tools['file'], }] self.assertEqual(actual_mimetypes, expected_mimetypes_v1) # given mimetype_v2 = mimetype_v1.copy() mimetype_v2.update({ 'mimetype': b'text/html', 'encoding': b'us-ascii', }) self.storage.content_mimetype_add([mimetype_v2]) actual_mimetypes = list(self.storage.content_mimetype_get( [self.sha1_2])) # mimetype did not change as the v2 was dropped. self.assertEqual(actual_mimetypes, expected_mimetypes_v1) @istest def content_mimetype_add__update_in_place_duplicate(self): # given tool_id = self.tools['file']['id'] mimetype_v1 = { 'id': self.sha1_2, 'mimetype': b'text/plain', 'encoding': b'utf-8', 'indexer_configuration_id': tool_id, } # given self.storage.content_mimetype_add([mimetype_v1]) # when actual_mimetypes = list(self.storage.content_mimetype_get( [self.sha1_2])) expected_mimetypes_v1 = [{ 'id': self.sha1_2, 'mimetype': b'text/plain', 'encoding': b'utf-8', 'tool': self.tools['file'], }] # then self.assertEqual(actual_mimetypes, expected_mimetypes_v1) # given mimetype_v2 = mimetype_v1.copy() mimetype_v2.update({ 'mimetype': b'text/html', 'encoding': b'us-ascii', }) self.storage.content_mimetype_add([mimetype_v2], conflict_update=True) actual_mimetypes = list(self.storage.content_mimetype_get( [self.sha1_2])) expected_mimetypes_v2 = [{ 'id': self.sha1_2, 'mimetype': b'text/html', 'encoding': b'us-ascii', 'tool': { 'id': 2, 'name': 'file', 'version': '5.22', 'configuration': {'command_line': 'file --mime '} } }] # mimetype did change as the v2 was used to overwrite v1 self.assertEqual(actual_mimetypes, expected_mimetypes_v2) @istest def content_mimetype_get(self): # given tool_id = self.tools['file']['id'] mimetypes = [self.sha1_2, self.sha1_1] mimetype1 = { 'id': self.sha1_2, 'mimetype': b'text/plain', 'encoding': b'utf-8', 'indexer_configuration_id': tool_id, } # when self.storage.content_mimetype_add([mimetype1]) # then actual_mimetypes = list(self.storage.content_mimetype_get(mimetypes)) # then expected_mimetypes = [{ 'id': self.sha1_2, 'mimetype': b'text/plain', 'encoding': b'utf-8', 'tool': self.tools['file'] }] self.assertEqual(actual_mimetypes, expected_mimetypes) @istest def content_language_missing(self): # given tool_id = self.tools['pygments']['id'] languages = [ { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, }, { 'id': self.sha1_1, 'indexer_configuration_id': tool_id, } ] # when actual_missing = list(self.storage.content_language_missing(languages)) # then self.assertEqual(list(actual_missing), [ self.sha1_2, self.sha1_1, ]) # given self.storage.content_language_add([{ 'id': self.sha1_2, 'lang': 'haskell', 'indexer_configuration_id': tool_id, }]) # when actual_missing = list(self.storage.content_language_missing(languages)) # then self.assertEqual(actual_missing, [self.sha1_1]) @istest def content_language_get(self): # given tool_id = self.tools['pygments']['id'] language1 = { 'id': self.sha1_2, 'lang': 'common-lisp', 'indexer_configuration_id': tool_id, } # when self.storage.content_language_add([language1]) # then actual_languages = list(self.storage.content_language_get( [self.sha1_2, self.sha1_1])) # then expected_languages = [{ 'id': self.sha1_2, 'lang': 'common-lisp', 'tool': self.tools['pygments'] }] self.assertEqual(actual_languages, expected_languages) @istest def content_language_add__drop_duplicate(self): # given tool_id = self.tools['pygments']['id'] language_v1 = { 'id': self.sha1_2, 'lang': 'emacslisp', 'indexer_configuration_id': tool_id, } # given self.storage.content_language_add([language_v1]) # when actual_languages = list(self.storage.content_language_get( [self.sha1_2])) # then expected_languages_v1 = [{ 'id': self.sha1_2, 'lang': 'emacslisp', 'tool': self.tools['pygments'] }] self.assertEqual(actual_languages, expected_languages_v1) # given language_v2 = language_v1.copy() language_v2.update({ 'lang': 'common-lisp', }) self.storage.content_language_add([language_v2]) actual_languages = list(self.storage.content_language_get( [self.sha1_2])) # language did not change as the v2 was dropped. self.assertEqual(actual_languages, expected_languages_v1) @istest def content_language_add__update_in_place_duplicate(self): # given tool_id = self.tools['pygments']['id'] language_v1 = { 'id': self.sha1_2, 'lang': 'common-lisp', 'indexer_configuration_id': tool_id, } # given self.storage.content_language_add([language_v1]) # when actual_languages = list(self.storage.content_language_get( [self.sha1_2])) # then expected_languages_v1 = [{ 'id': self.sha1_2, 'lang': 'common-lisp', 'tool': self.tools['pygments'] }] self.assertEqual(actual_languages, expected_languages_v1) # given language_v2 = language_v1.copy() language_v2.update({ 'lang': 'emacslisp', }) self.storage.content_language_add([language_v2], conflict_update=True) actual_languages = list(self.storage.content_language_get( [self.sha1_2])) # language did not change as the v2 was dropped. expected_languages_v2 = [{ 'id': self.sha1_2, 'lang': 'emacslisp', 'tool': self.tools['pygments'] }] # language did change as the v2 was used to overwrite v1 self.assertEqual(actual_languages, expected_languages_v2) @istest def content_ctags_missing(self): # given tool_id = self.tools['universal-ctags']['id'] ctags = [ { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, }, { 'id': self.sha1_1, 'indexer_configuration_id': tool_id, } ] # when actual_missing = self.storage.content_ctags_missing(ctags) # then self.assertEqual(list(actual_missing), [ self.sha1_2, self.sha1_1 ]) # given self.storage.content_ctags_add([ { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, 'ctags': [{ 'name': 'done', 'kind': 'variable', 'line': 119, 'lang': 'OCaml', }] }, ]) # when actual_missing = self.storage.content_ctags_missing(ctags) # then self.assertEqual(list(actual_missing), [self.sha1_1]) @istest def content_ctags_get(self): # given tool_id = self.tools['universal-ctags']['id'] ctags = [self.sha1_2, self.sha1_1] ctag1 = { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, 'ctags': [ { 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Python', }, { 'name': 'main', 'kind': 'function', 'line': 119, 'lang': 'Python', }] } # when self.storage.content_ctags_add([ctag1]) # then actual_ctags = list(self.storage.content_ctags_get(ctags)) # then expected_ctags = [ { 'id': self.sha1_2, 'tool': self.tools['universal-ctags'], 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Python', }, { 'id': self.sha1_2, 'tool': self.tools['universal-ctags'], 'name': 'main', 'kind': 'function', 'line': 119, 'lang': 'Python', } ] self.assertEqual(actual_ctags, expected_ctags) @istest def content_ctags_search(self): # 1. given tool = self.tools['universal-ctags'] tool_id = tool['id'] ctag1 = { 'id': self.sha1_1, 'indexer_configuration_id': tool_id, 'ctags': [ { 'name': 'hello', 'kind': 'function', 'line': 133, 'lang': 'Python', }, { 'name': 'counter', 'kind': 'variable', 'line': 119, 'lang': 'Python', }, ] } ctag2 = { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, 'ctags': [ { 'name': 'hello', 'kind': 'variable', 'line': 100, 'lang': 'C', }, ] } self.storage.content_ctags_add([ctag1, ctag2]) # 1. when actual_ctags = list(self.storage.content_ctags_search('hello', limit=1)) # 1. then self.assertEqual(actual_ctags, [ { 'id': ctag1['id'], 'tool': tool, 'name': 'hello', 'kind': 'function', 'line': 133, 'lang': 'Python', } ]) # 2. when actual_ctags = list(self.storage.content_ctags_search( 'hello', limit=1, last_sha1=ctag1['id'])) # 2. then self.assertEqual(actual_ctags, [ { 'id': ctag2['id'], 'tool': tool, 'name': 'hello', 'kind': 'variable', 'line': 100, 'lang': 'C', } ]) # 3. when actual_ctags = list(self.storage.content_ctags_search('hello')) # 3. then self.assertEqual(actual_ctags, [ { 'id': ctag1['id'], 'tool': tool, 'name': 'hello', 'kind': 'function', 'line': 133, 'lang': 'Python', }, { 'id': ctag2['id'], 'tool': tool, 'name': 'hello', 'kind': 'variable', 'line': 100, 'lang': 'C', }, ]) # 4. when actual_ctags = list(self.storage.content_ctags_search('counter')) # then self.assertEqual(actual_ctags, [{ 'id': ctag1['id'], 'tool': tool, 'name': 'counter', 'kind': 'variable', 'line': 119, 'lang': 'Python', }]) @istest def content_ctags_search_no_result(self): actual_ctags = list(self.storage.content_ctags_search('counter')) self.assertEquals(actual_ctags, []) @istest def content_ctags_add__add_new_ctags_added(self): # given tool = self.tools['universal-ctags'] tool_id = tool['id'] ctag_v1 = { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, 'ctags': [{ 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Scheme', }] } # given self.storage.content_ctags_add([ctag_v1]) self.storage.content_ctags_add([ctag_v1]) # conflict does nothing # when actual_ctags = list(self.storage.content_ctags_get( [self.sha1_2])) # then expected_ctags = [{ 'id': self.sha1_2, 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Scheme', 'tool': tool, }] self.assertEqual(actual_ctags, expected_ctags) # given ctag_v2 = ctag_v1.copy() ctag_v2.update({ 'ctags': [ { 'name': 'defn', 'kind': 'function', 'line': 120, 'lang': 'Scheme', } ] }) self.storage.content_ctags_add([ctag_v2]) expected_ctags = [ { 'id': self.sha1_2, 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Scheme', 'tool': tool, }, { 'id': self.sha1_2, 'name': 'defn', 'kind': 'function', 'line': 120, 'lang': 'Scheme', 'tool': tool, } ] actual_ctags = list(self.storage.content_ctags_get( [self.sha1_2])) self.assertEqual(actual_ctags, expected_ctags) @istest def content_ctags_add__update_in_place(self): # given tool = self.tools['universal-ctags'] tool_id = tool['id'] ctag_v1 = { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, 'ctags': [{ 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Scheme', }] } # given self.storage.content_ctags_add([ctag_v1]) # when actual_ctags = list(self.storage.content_ctags_get( [self.sha1_2])) # then expected_ctags = [ { 'id': self.sha1_2, 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Scheme', 'tool': tool } ] self.assertEqual(actual_ctags, expected_ctags) # given ctag_v2 = ctag_v1.copy() ctag_v2.update({ 'ctags': [ { 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Scheme', }, { 'name': 'defn', 'kind': 'function', 'line': 120, 'lang': 'Scheme', } ] }) self.storage.content_ctags_add([ctag_v2], conflict_update=True) actual_ctags = list(self.storage.content_ctags_get( [self.sha1_2])) # ctag did change as the v2 was used to overwrite v1 expected_ctags = [ { 'id': self.sha1_2, 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Scheme', 'tool': tool, }, { 'id': self.sha1_2, 'name': 'defn', 'kind': 'function', 'line': 120, 'lang': 'Scheme', 'tool': tool, } ] self.assertEqual(actual_ctags, expected_ctags) @istest def content_fossology_license_get(self): # given tool = self.tools['nomos'] tool_id = tool['id'] license1 = { 'id': self.sha1_1, 'licenses': ['GPL-2.0+'], 'indexer_configuration_id': tool_id, } # when self.storage.content_fossology_license_add([license1]) # then actual_licenses = list(self.storage.content_fossology_license_get( [self.sha1_2, self.sha1_1])) expected_license = { self.sha1_1: [{ 'licenses': ['GPL-2.0+'], 'tool': tool, }] } # then self.assertEqual(actual_licenses, [expected_license]) @istest def content_fossology_license_add__new_license_added(self): # given tool = self.tools['nomos'] tool_id = tool['id'] license_v1 = { 'id': self.sha1_1, 'licenses': ['Apache-2.0'], 'indexer_configuration_id': tool_id, } # given self.storage.content_fossology_license_add([license_v1]) # conflict does nothing self.storage.content_fossology_license_add([license_v1]) # when actual_licenses = list(self.storage.content_fossology_license_get( [self.sha1_1])) # then expected_license = { self.sha1_1: [{ 'licenses': ['Apache-2.0'], 'tool': tool, }] } self.assertEqual(actual_licenses, [expected_license]) # given license_v2 = license_v1.copy() license_v2.update({ 'licenses': ['BSD-2-Clause'], }) self.storage.content_fossology_license_add([license_v2]) actual_licenses = list(self.storage.content_fossology_license_get( [self.sha1_1])) expected_license = { self.sha1_1: [{ 'licenses': ['Apache-2.0', 'BSD-2-Clause'], 'tool': tool }] } # license did not change as the v2 was dropped. self.assertEqual(actual_licenses, [expected_license]) @istest def content_fossology_license_add__update_in_place_duplicate(self): # given tool = self.tools['nomos'] tool_id = tool['id'] license_v1 = { 'id': self.sha1_1, 'licenses': ['CECILL'], 'indexer_configuration_id': tool_id, } # given self.storage.content_fossology_license_add([license_v1]) # conflict does nothing self.storage.content_fossology_license_add([license_v1]) # when actual_licenses = list(self.storage.content_fossology_license_get( [self.sha1_1])) # then expected_license = { self.sha1_1: [{ 'licenses': ['CECILL'], 'tool': tool, }] } self.assertEqual(actual_licenses, [expected_license]) # given license_v2 = license_v1.copy() license_v2.update({ 'licenses': ['CECILL-2.0'] }) self.storage.content_fossology_license_add([license_v2], conflict_update=True) actual_licenses = list(self.storage.content_fossology_license_get( [self.sha1_1])) # license did change as the v2 was used to overwrite v1 expected_license = { self.sha1_1: [{ 'licenses': ['CECILL-2.0'], 'tool': tool, }] } self.assertEqual(actual_licenses, [expected_license]) @istest def content_metadata_missing(self): # given tool_id = self.tools['swh-metadata-translator']['id'] metadata = [ { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, }, { 'id': self.sha1_1, 'indexer_configuration_id': tool_id, } ] # when actual_missing = list(self.storage.content_metadata_missing(metadata)) # then self.assertEqual(list(actual_missing), [ self.sha1_2, self.sha1_1, ]) # given self.storage.content_metadata_add([{ 'id': self.sha1_2, 'translated_metadata': { 'other': {}, 'codeRepository': { 'type': 'git', 'url': 'https://github.com/moranegg/metadata_test' }, 'description': 'Simple package.json test for indexer', 'name': 'test_metadata', 'version': '0.0.1' }, 'indexer_configuration_id': tool_id }]) # when actual_missing = list(self.storage.content_metadata_missing(metadata)) # then self.assertEqual(actual_missing, [self.sha1_1]) @istest def content_metadata_get(self): # given tool_id = self.tools['swh-metadata-translator']['id'] metadata1 = { 'id': self.sha1_2, 'translated_metadata': { 'other': {}, 'codeRepository': { 'type': 'git', 'url': 'https://github.com/moranegg/metadata_test' }, 'description': 'Simple package.json test for indexer', 'name': 'test_metadata', 'version': '0.0.1' }, 'indexer_configuration_id': tool_id, } # when self.storage.content_metadata_add([metadata1]) # then actual_metadata = list(self.storage.content_metadata_get( [self.sha1_2, self.sha1_1])) expected_metadata = [{ 'id': self.sha1_2, 'translated_metadata': { 'other': {}, 'codeRepository': { 'type': 'git', 'url': 'https://github.com/moranegg/metadata_test' }, 'description': 'Simple package.json test for indexer', 'name': 'test_metadata', 'version': '0.0.1' }, 'tool': self.tools['swh-metadata-translator'] }] self.assertEqual(actual_metadata, expected_metadata) @istest def content_metadata_add_drop_duplicate(self): # given tool_id = self.tools['swh-metadata-translator']['id'] metadata_v1 = { 'id': self.sha1_2, 'translated_metadata': { 'other': {}, 'name': 'test_metadata', 'version': '0.0.1' }, 'indexer_configuration_id': tool_id, } # given self.storage.content_metadata_add([metadata_v1]) # when actual_metadata = list(self.storage.content_metadata_get( [self.sha1_2])) expected_metadata_v1 = [{ 'id': self.sha1_2, 'translated_metadata': { 'other': {}, 'name': 'test_metadata', 'version': '0.0.1' }, 'tool': self.tools['swh-metadata-translator'] }] self.assertEqual(actual_metadata, expected_metadata_v1) # given metadata_v2 = metadata_v1.copy() metadata_v2.update({ 'translated_metadata': { 'other': {}, 'name': 'test_drop_duplicated_metadata', 'version': '0.0.1' }, }) self.storage.content_metadata_add([metadata_v2]) # then actual_metadata = list(self.storage.content_metadata_get( [self.sha1_2])) # metadata did not change as the v2 was dropped. self.assertEqual(actual_metadata, expected_metadata_v1) @istest def content_metadata_add_update_in_place_duplicate(self): # given tool_id = self.tools['swh-metadata-translator']['id'] metadata_v1 = { 'id': self.sha1_2, 'translated_metadata': { 'other': {}, 'name': 'test_metadata', 'version': '0.0.1' }, 'indexer_configuration_id': tool_id, } # given self.storage.content_metadata_add([metadata_v1]) # when actual_metadata = list(self.storage.content_metadata_get( [self.sha1_2])) # then expected_metadata_v1 = [{ 'id': self.sha1_2, 'translated_metadata': { 'other': {}, 'name': 'test_metadata', 'version': '0.0.1' }, 'tool': self.tools['swh-metadata-translator'] }] self.assertEqual(actual_metadata, expected_metadata_v1) # given metadata_v2 = metadata_v1.copy() metadata_v2.update({ 'translated_metadata': { 'other': {}, 'name': 'test_update_duplicated_metadata', 'version': '0.0.1' }, }) self.storage.content_metadata_add([metadata_v2], conflict_update=True) actual_metadata = list(self.storage.content_metadata_get( [self.sha1_2])) # language did not change as the v2 was dropped. expected_metadata_v2 = [{ 'id': self.sha1_2, 'translated_metadata': { 'other': {}, 'name': 'test_update_duplicated_metadata', 'version': '0.0.1' }, 'tool': self.tools['swh-metadata-translator'] }] # metadata did change as the v2 was used to overwrite v1 self.assertEqual(actual_metadata, expected_metadata_v2) @istest def revision_metadata_missing(self): # given tool_id = self.tools['swh-metadata-detector']['id'] metadata = [ { 'id': self.revision_id_1, 'indexer_configuration_id': tool_id, }, { 'id': self.revision_id_2, 'indexer_configuration_id': tool_id, } ] # when actual_missing = list(self.storage.revision_metadata_missing( metadata)) # then self.assertEqual(list(actual_missing), [ self.revision_id_1, self.revision_id_2, ]) # given self.storage.revision_metadata_add([{ 'id': self.revision_id_1, 'translated_metadata': { 'developmentStatus': None, 'version': None, 'operatingSystem': None, 'description': None, 'keywords': None, 'issueTracker': None, 'name': None, 'author': None, 'relatedLink': None, 'url': None, 'type': None, 'license': None, 'maintainer': None, 'email': None, 'softwareRequirements': None, 'identifier': None }, 'indexer_configuration_id': tool_id }]) # when actual_missing = list(self.storage.revision_metadata_missing( metadata)) # then self.assertEqual(actual_missing, [self.revision_id_2]) @istest def revision_metadata_get(self): # given tool_id = self.tools['swh-metadata-detector']['id'] metadata_rev = { 'id': self.revision_id_2, 'translated_metadata': { 'developmentStatus': None, 'version': None, 'operatingSystem': None, 'description': None, 'keywords': None, 'issueTracker': None, 'name': None, 'author': None, 'relatedLink': None, 'url': None, 'type': None, 'license': None, 'maintainer': None, 'email': None, 'softwareRequirements': None, 'identifier': None }, 'indexer_configuration_id': tool_id } # when self.storage.revision_metadata_add([metadata_rev]) # then actual_metadata = list(self.storage.revision_metadata_get( [self.revision_id_2, self.revision_id_1])) expected_metadata = [{ 'id': self.revision_id_2, 'translated_metadata': metadata_rev['translated_metadata'], 'tool': self.tools['swh-metadata-detector'] }] self.assertEqual(actual_metadata, expected_metadata) @istest def revision_metadata_add_drop_duplicate(self): # given tool_id = self.tools['swh-metadata-detector']['id'] metadata_v1 = { 'id': self.revision_id_1, 'translated_metadata': { 'developmentStatus': None, 'version': None, 'operatingSystem': None, 'description': None, 'keywords': None, 'issueTracker': None, 'name': None, 'author': None, 'relatedLink': None, 'url': None, 'type': None, 'license': None, 'maintainer': None, 'email': None, 'softwareRequirements': None, 'identifier': None }, 'indexer_configuration_id': tool_id, } # given self.storage.revision_metadata_add([metadata_v1]) # when actual_metadata = list(self.storage.revision_metadata_get( [self.revision_id_1])) expected_metadata_v1 = [{ 'id': self.revision_id_1, 'translated_metadata': metadata_v1['translated_metadata'], 'tool': self.tools['swh-metadata-detector'] }] self.assertEqual(actual_metadata, expected_metadata_v1) # given metadata_v2 = metadata_v1.copy() metadata_v2.update({ 'translated_metadata': { 'name': 'test_metadata', 'author': 'MG', }, }) self.storage.revision_metadata_add([metadata_v2]) # then actual_metadata = list(self.storage.revision_metadata_get( [self.revision_id_1])) # metadata did not change as the v2 was dropped. self.assertEqual(actual_metadata, expected_metadata_v1) @istest def revision_metadata_add_update_in_place_duplicate(self): # given tool_id = self.tools['swh-metadata-detector']['id'] metadata_v1 = { 'id': self.revision_id_2, 'translated_metadata': { 'developmentStatus': None, 'version': None, 'operatingSystem': None, 'description': None, 'keywords': None, 'issueTracker': None, 'name': None, 'author': None, 'relatedLink': None, 'url': None, 'type': None, 'license': None, 'maintainer': None, 'email': None, 'softwareRequirements': None, 'identifier': None }, 'indexer_configuration_id': tool_id, } # given self.storage.revision_metadata_add([metadata_v1]) # when actual_metadata = list(self.storage.revision_metadata_get( [self.revision_id_2])) # then expected_metadata_v1 = [{ 'id': self.revision_id_2, 'translated_metadata': metadata_v1['translated_metadata'], 'tool': self.tools['swh-metadata-detector'] }] self.assertEqual(actual_metadata, expected_metadata_v1) # given metadata_v2 = metadata_v1.copy() metadata_v2.update({ 'translated_metadata': { 'name': 'test_update_duplicated_metadata', 'author': 'MG' }, }) self.storage.revision_metadata_add([metadata_v2], conflict_update=True) actual_metadata = list(self.storage.revision_metadata_get( [self.revision_id_2])) # language did not change as the v2 was dropped. expected_metadata_v2 = [{ 'id': self.revision_id_2, 'translated_metadata': metadata_v2['translated_metadata'], 'tool': self.tools['swh-metadata-detector'] }] # metadata did change as the v2 was used to overwrite v1 self.assertEqual(actual_metadata, expected_metadata_v2) @istest def indexer_configuration_add(self): tool = { 'tool_name': 'some-unknown-tool', 'tool_version': 'some-version', 'tool_configuration': {"debian-package": "some-package"}, } actual_tool = self.storage.indexer_configuration_get(tool) self.assertIsNone(actual_tool) # does not exist # add it actual_tools = list(self.storage.indexer_configuration_add([tool])) self.assertEquals(len(actual_tools), 1) actual_tool = actual_tools[0] self.assertIsNotNone(actual_tool) # now it exists new_id = actual_tool.pop('id') self.assertEquals(actual_tool, tool) actual_tools2 = list(self.storage.indexer_configuration_add([tool])) actual_tool2 = actual_tools2[0] self.assertIsNotNone(actual_tool2) # now it exists new_id2 = actual_tool2.pop('id') self.assertEqual(new_id, new_id2) self.assertEqual(actual_tool, actual_tool2) @istest def indexer_configuration_add_multiple(self): tool = { 'tool_name': 'some-unknown-tool', 'tool_version': 'some-version', 'tool_configuration': {"debian-package": "some-package"}, } actual_tools = list(self.storage.indexer_configuration_add([tool])) self.assertEqual(len(actual_tools), 1) new_tools = [tool, { 'tool_name': 'yet-another-tool', 'tool_version': 'version', 'tool_configuration': {}, }] actual_tools = list(self.storage.indexer_configuration_add(new_tools)) self.assertEqual(len(actual_tools), 2) # order not guaranteed, so we iterate over results to check for tool in actual_tools: _id = tool.pop('id') self.assertIsNotNone(_id) self.assertIn(tool, new_tools) @istest def indexer_configuration_get_missing(self): tool = { 'tool_name': 'unknown-tool', 'tool_version': '3.1.0rc2-31-ga2cbb8c', 'tool_configuration': {"command_line": "nomossa "}, } actual_tool = self.storage.indexer_configuration_get(tool) self.assertIsNone(actual_tool) @istest def indexer_configuration_get(self): tool = { 'tool_name': 'nomos', 'tool_version': '3.1.0rc2-31-ga2cbb8c', 'tool_configuration': {"command_line": "nomossa "}, } actual_tool = self.storage.indexer_configuration_get(tool) expected_tool = tool.copy() expected_tool['id'] = 1 self.assertEqual(expected_tool, actual_tool) @istest def indexer_configuration_metadata_get_missing_context(self): tool = { 'tool_name': 'swh-metadata-translator', 'tool_version': '0.0.1', 'tool_configuration': {"context": "unknown-context"}, } actual_tool = self.storage.indexer_configuration_get(tool) self.assertIsNone(actual_tool) @istest def indexer_configuration_metadata_get(self): tool = { 'tool_name': 'swh-metadata-translator', 'tool_version': '0.0.1', 'tool_configuration': {"type": "local", "context": "npm"}, } actual_tool = self.storage.indexer_configuration_get(tool) expected_tool = tool.copy() expected_tool['id'] = actual_tool['id'] self.assertEqual(expected_tool, actual_tool) class IndexerTestStorage(CommonTestStorage, unittest.TestCase): """Running the tests locally. For the client api tests (remote storage), see `class`:swh.indexer.storage.test_api_client:TestRemoteStorage class. """ pass