diff --git a/.gitignore b/.gitignore
index f5fc2ae..43c4b92 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,10 @@
 *.pyc
 *.sw?
 *~
 .coverage
 .eggs/
 __pycache__
 *.egg-info/
-version.txt
\ No newline at end of file
+version.txt
+/sql/createdb-stamp
+/sql/filldb-stamp
diff --git a/PKG-INFO b/PKG-INFO
index 99942cf..2481b88 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,10 +1,10 @@
 Metadata-Version: 1.0
 Name: swh.indexer
-Version: 0.0.43
+Version: 0.0.44
 Summary: Software Heritage Content Indexer
 Home-page: https://forge.softwareheritage.org/diffusion/78/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Description: UNKNOWN
 Platform: UNKNOWN
diff --git a/debian/rules b/debian/rules
index 5383204..aa268db 100755
--- a/debian/rules
+++ b/debian/rules
@@ -1,11 +1,11 @@
 #!/usr/bin/make -f
 
 export PYBUILD_NAME=swh.indexer
-export PYBUILD_TEST_ARGS=--with-doctest -sv -a !db,!fs
+export PYBUILD_TEST_ARGS=-sv -a !db,!fs
 
 %:
 	dh $@ --with python3 --buildsystem=pybuild
 
 override_dh_install:
 	dh_install
 	rm -v $(CURDIR)/debian/python3-*/usr/lib/python*/dist-packages/swh/__init__.py
diff --git a/sql/Makefile b/sql/Makefile
new file mode 100644
index 0000000..d52d181
--- /dev/null
+++ b/sql/Makefile
@@ -0,0 +1,43 @@
+# Depends: postgresql-client, postgresql-autodoc
+
+DBNAME = softwareheritage-indexer-dev
+DOCDIR = autodoc
+
+SQL_INIT    = swh-init.sql
+SQL_ENUMS   = swh-enums.sql
+SQL_SCHEMA  = swh-schema.sql
+SQL_FUNC    = swh-func.sql
+SQL_DATA    = swh-data.sql
+SQL_INDEX   = swh-indexes.sql
+SQLS = $(SQL_INIT) $(SQL_ENUMS) $(SQL_SCHEMA) $(SQL_FUNC) $(SQL_INDEX) $(SQL_DATA)
+
+PSQL_BIN = psql
+PSQL_FLAGS = --echo-all -X -v ON_ERROR_STOP=1
+PSQL = $(PSQL_BIN) $(PSQL_FLAGS)
+
+all:
+
+createdb: createdb-stamp
+createdb-stamp: $(SQL_INIT)
+	createdb $(DBNAME)
+	touch $@
+
+filldb: filldb-stamp
+filldb-stamp: createdb-stamp
+	cat $(SQLS) | $(PSQL) $(DBNAME)
+	touch $@
+
+dropdb:
+	-dropdb $(DBNAME)
+
+dumpdb: swh-indexer.dump
+swh-indexer.dump: filldb-stamp
+	pg_dump -Fc $(DBNAME) > $@
+
+clean:
+	rm -rf *-stamp $(DOCDIR)/
+
+distclean: clean dropdb
+	rm -f swh-indexer.dump
+
+.PHONY: all initdb createdb dropdb doc clean
diff --git a/sql/bin/db-upgrade b/sql/bin/db-upgrade
new file mode 100755
index 0000000..1dd4e2b
--- /dev/null
+++ b/sql/bin/db-upgrade
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+# Compute a draft upgrade script for the DB schema, based on Git revisions.
+
+# Depends: apgdiff
+
+set -e
+
+SQLS="swh-*.sql"
+VERSION_SQL="swh-schema.sql"
+UPGRADE_DIR="upgrades"
+DB_NAME="softwareheritage-dev"
+
+usage () {
+    echo "Usage: db-upgrade GIT_REV_FROM [GIT_REV_TO]"
+    echo "Example: db-upgrade HEAD^"
+    echo "         db-upgrade HEAD~4 HEAD~2"
+    echo "See also: gitrevisions(7)"
+    exit 1
+}
+
+pg_dump_revision () {
+    rev="$1"
+    dump="$2"
+
+    echo "checking out revision $rev, and dumping DB at the time..."
+    if [ "$rev" != "HEAD" ] ; then
+	git checkout --quiet "$rev"
+    fi
+    make distclean filldb > /dev/null
+    pg_dump "$DB_NAME" > "$dump"
+    if [ "$rev" != "HEAD" ] ; then
+	git checkout --quiet -
+    fi
+}
+
+# argument parsing
+if [ -z "$1" ] ; then
+    usage
+fi
+from_rev="$1"
+shift 1
+if [ -z "$1" ] ; then
+    to_rev="HEAD"
+else
+    to_rev="$1"
+    shift 1
+fi
+
+old_dump=$(mktemp tmp.swh-db-upgrade.XXXXXXXXXX)
+new_dump=$(mktemp tmp.swh-db-upgrade.XXXXXXXXXX)
+trap "rm -f $old_dump $new_dump" EXIT
+
+schema_version=$(grep -i -A 1 '^insert into dbversion' "$VERSION_SQL" | tail -n 1 \
+	      | sed -e 's/.*values(//i' -e 's/,.*//')
+upgrade_script=$(mktemp -p "$UPGRADE_DIR" $(printf '%.03d' ${schema_version}).XXXX.sql)
+pg_dump_revision "$from_rev" "$old_dump"
+pg_dump_revision "$to_rev" "$new_dump"
+
+cat > "$upgrade_script" <<EOF
+-- SWH DB schema upgrade
+-- from_version: XXX TODO
+-- to_version: ${schema_version}
+-- description: XXX TODO
+
+insert into dbversion(version, release, description)
+      values($schema_version, now(), 'Work In Progress');
+EOF
+echo "diffing dumps..."
+apgdiff "$old_dump" "$new_dump" >> "$upgrade_script"
+
+echo "all done."
+echo "Draft upgrade script is at: ${upgrade_script}"
diff --git a/sql/bin/dot_add_content b/sql/bin/dot_add_content
new file mode 100755
index 0000000..fc24e38
--- /dev/null
+++ b/sql/bin/dot_add_content
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+DOT_FILE="$1"
+DOT_EXTRA="$2"
+if [ -z "$DOT_FILE" -o -z "$DOT_EXTRA" ] ; then
+    echo "Usage: $0 DOT_FILE DOT_EXTRA"
+    exit 1
+fi
+
+schema_version=$(grep -i -A 1 '^insert into dbversion' swh-schema.sql | tail -n 1 \
+	      | sed -e 's/.*values(//i' -e 's/,.*//')
+
+head -n -1 "$DOT_FILE"  # all of $DOT_FILE but last line
+sed "s/@@VERSION@@/$schema_version/" "$DOT_EXTRA"
+echo "}"
diff --git a/sql/json/.gitignore b/sql/json/.gitignore
new file mode 100644
index 0000000..c337aa9
--- /dev/null
+++ b/sql/json/.gitignore
@@ -0,0 +1 @@
+*-stamp
diff --git a/sql/json/Makefile b/sql/json/Makefile
new file mode 100644
index 0000000..5d983b8
--- /dev/null
+++ b/sql/json/Makefile
@@ -0,0 +1,19 @@
+# Depends: json-glib-tools
+
+JSONVAL = json-glib-validate
+JSONS = $(wildcard *.json)
+
+all: validate
+check: validate
+test: validate
+
+validate: validate-stamp
+validate-stamp: $(JSONS)
+	make $(patsubst %,validate/%,$?)
+	touch $@
+
+validate/%:
+	$(JSONVAL) $*
+
+clean:
+	rm -f validate-stamp
diff --git a/sql/json/indexer_configuration.tool_configuration.schema.json b/sql/json/indexer_configuration.tool_configuration.schema.json
new file mode 100644
index 0000000..28396b4
--- /dev/null
+++ b/sql/json/indexer_configuration.tool_configuration.schema.json
@@ -0,0 +1,11 @@
+{
+    "$schema": "http://json-schema.org/schema#",
+    "id": "http://softwareheritage.org/schemas/indexer_configuration.tool_configuration.schema.json",
+
+    "type": "object",
+    "properties": {
+        "command_line": {
+            "type": "string"
+        }
+    }
+}
diff --git a/sql/json/revision_metadata.translated_metadata.json b/sql/json/revision_metadata.translated_metadata.json
new file mode 100644
index 0000000..1806fc7
--- /dev/null
+++ b/sql/json/revision_metadata.translated_metadata.json
@@ -0,0 +1,59 @@
+{
+    "$schema": "http://json-schema.org/schema#",
+    "id": "http://softwareheritage.org/schemas/revision_metadata.translated_metadata.schema.json",
+
+    "type": "object",
+    "properties": {
+        "developmentStatus": {
+            "type": "list"
+          },
+         "version": {
+            "type": "list"
+          },
+          "operatingSystem": {
+              "type": "list"
+          },
+          "description": {
+              "type": "list"
+          },
+          "keywords": {
+              "type": "list"
+          },
+          "issueTracker": {
+              "type": "list"
+          },
+          "name": {
+                "type": "list"
+          },
+          "author": {
+              "type": "list"
+          },
+          "relatedLink": {
+              "type": "list"
+          },
+          "url": {
+              "type": "list"
+          },
+          "type": {
+              "type": "list"
+          },
+          "license": {
+              "type": "list"
+          },
+          "maintainer": {
+              "type": "list"
+          },
+          "email": {
+              "type": "list"
+          },
+          "softwareRequirements": {
+              "type": "list"
+          },
+          "identifier": {
+              "type": "list"
+          },
+          "codeRepository": {
+              "type": "list"
+          },
+    }
+}
diff --git a/sql/swh-data.sql b/sql/swh-data.sql
new file mode 100644
index 0000000..e429343
--- /dev/null
+++ b/sql/swh-data.sql
@@ -0,0 +1,26 @@
+insert into indexer_configuration(tool_name, tool_version, tool_configuration)
+values ('nomos', '3.1.0rc2-31-ga2cbb8c', '{"command_line": "nomossa <filepath>"}');
+
+insert into indexer_configuration(tool_name, tool_version, tool_configuration)
+values ('file', '5.22', '{"command_line": "file --mime <filepath>"}');
+
+insert into indexer_configuration(tool_name, tool_version, tool_configuration)
+values ('universal-ctags', '~git7859817b', '{"command_line": "ctags --fields=+lnz --sort=no --links=no --output-format=json <filepath>"}');
+
+insert into indexer_configuration(tool_name, tool_version, tool_configuration)
+values ('pygments', '2.0.1+dfsg-1.1+deb8u1', '{"type": "library", "debian-package": "python3-pygments"}');
+
+insert into indexer_configuration(tool_name, tool_version, tool_configuration)
+values ('pygments', '2.0.1+dfsg-1.1+deb8u1', '{"type": "library", "debian-package": "python3-pygments", "max_content_size": 10240}');
+
+insert into indexer_configuration(tool_name, tool_version, tool_configuration)
+values ('swh-metadata-translator', '0.0.1', '{"type": "local", "context": "npm"}');
+
+insert into indexer_configuration(tool_name, tool_version, tool_configuration)
+values ('swh-metadata-detector', '0.0.1', '{"type": "local", "context": ["npm", "codemeta"]}');
+
+insert into indexer_configuration(tool_name, tool_version, tool_configuration)
+values ('swh-deposit', '0.0.1', '{"sword_version": "2"}');
+
+insert into indexer_configuration(tool_name, tool_version, tool_configuration)
+values ('file', '1:5.30-1+deb9u1', '{"type": "library", "debian-package": "python3-magic"}');
diff --git a/sql/swh-enums.sql b/sql/swh-enums.sql
new file mode 100644
index 0000000..a357eb5
--- /dev/null
+++ b/sql/swh-enums.sql
@@ -0,0 +1,100 @@
+create type languages as enum ( 'abap', 'abnf', 'actionscript',
+  'actionscript-3', 'ada', 'adl', 'agda', 'alloy', 'ambienttalk',
+  'antlr', 'antlr-with-actionscript-target', 'antlr-with-c#-target',
+  'antlr-with-cpp-target', 'antlr-with-java-target',
+  'antlr-with-objectivec-target', 'antlr-with-perl-target',
+  'antlr-with-python-target', 'antlr-with-ruby-target', 'apacheconf',
+  'apl', 'applescript', 'arduino', 'aspectj', 'aspx-cs', 'aspx-vb',
+  'asymptote', 'autohotkey', 'autoit', 'awk', 'base-makefile', 'bash',
+  'bash-session', 'batchfile', 'bbcode', 'bc', 'befunge',
+  'blitzbasic', 'blitzmax', 'bnf', 'boo', 'boogie', 'brainfuck',
+  'bro', 'bugs', 'c', 'c#', 'c++', 'c-objdump', 'ca65-assembler',
+  'cadl', 'camkes', 'cbm-basic-v2', 'ceylon', 'cfengine3',
+  'cfstatement', 'chaiscript', 'chapel', 'cheetah', 'cirru', 'clay',
+  'clojure', 'clojurescript', 'cmake', 'cobol', 'cobolfree',
+  'coffeescript', 'coldfusion-cfc', 'coldfusion-html', 'common-lisp',
+  'component-pascal', 'coq', 'cpp-objdump', 'cpsa', 'crmsh', 'croc',
+  'cryptol', 'csound-document', 'csound-orchestra', 'csound-score',
+  'css', 'css+django/jinja', 'css+genshi-text', 'css+lasso',
+  'css+mako', 'css+mozpreproc', 'css+myghty', 'css+php', 'css+ruby',
+  'css+smarty', 'cuda', 'cypher', 'cython', 'd', 'd-objdump',
+  'darcs-patch', 'dart', 'debian-control-file', 'debian-sourcelist',
+  'delphi', 'dg', 'diff', 'django/jinja', 'docker', 'dtd', 'duel',
+  'dylan', 'dylan-session', 'dylanlid', 'earl-grey', 'easytrieve',
+  'ebnf', 'ec', 'ecl', 'eiffel', 'elixir', 'elixir-iex-session',
+  'elm', 'emacslisp', 'embedded-ragel', 'erb', 'erlang',
+  'erlang-erl-session', 'evoque', 'ezhil', 'factor', 'fancy',
+  'fantom', 'felix', 'fish', 'fortran', 'fortranfixed', 'foxpro',
+  'fsharp', 'gap', 'gas', 'genshi', 'genshi-text', 'gettext-catalog',
+  'gherkin', 'glsl', 'gnuplot', 'go', 'golo', 'gooddata-cl', 'gosu',
+  'gosu-template', 'groff', 'groovy', 'haml', 'handlebars', 'haskell',
+  'haxe', 'hexdump', 'html', 'html+cheetah', 'html+django/jinja',
+  'html+evoque', 'html+genshi', 'html+handlebars', 'html+lasso',
+  'html+mako', 'html+myghty', 'html+php', 'html+smarty', 'html+twig',
+  'html+velocity', 'http', 'hxml', 'hy', 'hybris', 'idl', 'idris',
+  'igor', 'inform-6', 'inform-6-template', 'inform-7', 'ini', 'io',
+  'ioke', 'irc-logs', 'isabelle', 'j', 'jade', 'jags', 'jasmin',
+  'java', 'java-server-page', 'javascript', 'javascript+cheetah',
+  'javascript+django/jinja', 'javascript+genshi-text',
+  'javascript+lasso', 'javascript+mako', 'javascript+mozpreproc',
+  'javascript+myghty', 'javascript+php', 'javascript+ruby',
+  'javascript+smarty', 'jcl', 'json', 'json-ld', 'julia',
+  'julia-console', 'kal', 'kconfig', 'koka', 'kotlin', 'lasso',
+  'lean', 'lesscss', 'lighttpd-configuration-file', 'limbo', 'liquid',
+  'literate-agda', 'literate-cryptol', 'literate-haskell',
+  'literate-idris', 'livescript', 'llvm', 'logos', 'logtalk', 'lsl',
+  'lua', 'makefile', 'mako', 'maql', 'mask', 'mason', 'mathematica',
+  'matlab', 'matlab-session', 'minid', 'modelica', 'modula-2',
+  'moinmoin/trac-wiki-markup', 'monkey', 'moocode', 'moonscript',
+  'mozhashpreproc', 'mozpercentpreproc', 'mql', 'mscgen',
+  'msdos-session', 'mupad', 'mxml', 'myghty', 'mysql', 'nasm',
+  'nemerle', 'nesc', 'newlisp', 'newspeak',
+  'nginx-configuration-file', 'nimrod', 'nit', 'nix', 'nsis', 'numpy',
+  'objdump', 'objdump-nasm', 'objective-c', 'objective-c++',
+  'objective-j', 'ocaml', 'octave', 'odin', 'ooc', 'opa',
+  'openedge-abl', 'pacmanconf', 'pan', 'parasail', 'pawn', 'perl',
+  'perl6', 'php', 'pig', 'pike', 'pkgconfig', 'pl/pgsql',
+  'postgresql-console-(psql)', 'postgresql-sql-dialect', 'postscript',
+  'povray', 'powershell', 'powershell-session', 'praat', 'prolog',
+  'properties', 'protocol-buffer', 'puppet', 'pypy-log', 'python',
+  'python-3', 'python-3.0-traceback', 'python-console-session',
+  'python-traceback', 'qbasic', 'qml', 'qvto', 'racket', 'ragel',
+  'ragel-in-c-host', 'ragel-in-cpp-host', 'ragel-in-d-host',
+  'ragel-in-java-host', 'ragel-in-objective-c-host',
+  'ragel-in-ruby-host', 'raw-token-data', 'rconsole', 'rd', 'rebol',
+  'red', 'redcode', 'reg', 'resourcebundle', 'restructuredtext',
+  'rexx', 'rhtml', 'roboconf-graph', 'roboconf-instances',
+  'robotframework', 'rpmspec', 'rql', 'rsl', 'ruby',
+  'ruby-irb-session', 'rust', 's', 'sass', 'scala',
+  'scalate-server-page', 'scaml', 'scheme', 'scilab', 'scss', 'shen',
+  'slim', 'smali', 'smalltalk', 'smarty', 'snobol', 'sourcepawn',
+  'sparql', 'sql', 'sqlite3con', 'squidconf', 'stan', 'standard-ml',
+  'supercollider', 'swift', 'swig', 'systemverilog', 'tads-3', 'tap',
+  'tcl', 'tcsh', 'tcsh-session', 'tea', 'termcap', 'terminfo',
+  'terraform', 'tex', 'text-only', 'thrift', 'todotxt',
+  'trafficscript', 'treetop', 'turtle', 'twig', 'typescript',
+  'urbiscript', 'vala', 'vb.net', 'vctreestatus', 'velocity',
+  'verilog', 'vgl', 'vhdl', 'viml', 'x10', 'xml', 'xml+cheetah',
+  'xml+django/jinja', 'xml+evoque', 'xml+lasso', 'xml+mako',
+  'xml+myghty', 'xml+php', 'xml+ruby', 'xml+smarty', 'xml+velocity',
+  'xquery', 'xslt', 'xtend', 'xul+mozpreproc', 'yaml', 'yaml+jinja',
+  'zephir', 'unknown'
+);
+comment on type languages is 'Languages recognized by language indexer';
+
+create type ctags_languages as enum ( 'Ada', 'AnsiblePlaybook', 'Ant',
+  'Asm', 'Asp', 'Autoconf', 'Automake', 'Awk', 'Basic', 'BETA', 'C',
+  'C#', 'C++', 'Clojure', 'Cobol', 'CoffeeScript [disabled]', 'CSS',
+  'ctags', 'D', 'DBusIntrospect', 'Diff', 'DosBatch', 'DTS', 'Eiffel',
+  'Erlang', 'Falcon', 'Flex', 'Fortran', 'gdbinit [disabled]',
+  'Glade', 'Go', 'HTML', 'Iniconf', 'Java', 'JavaProperties',
+  'JavaScript', 'JSON', 'Lisp', 'Lua', 'M4', 'Make', 'man [disabled]',
+  'MatLab', 'Maven2', 'Myrddin', 'ObjectiveC', 'OCaml', 'OldC
+  [disabled]', 'OldC++ [disabled]', 'Pascal', 'Perl', 'Perl6', 'PHP',
+  'PlistXML', 'pod', 'Protobuf', 'Python', 'PythonLoggingConfig', 'R',
+  'RelaxNG', 'reStructuredText', 'REXX', 'RpmSpec', 'Ruby', 'Rust',
+  'Scheme', 'Sh', 'SLang', 'SML', 'SQL', 'SVG', 'SystemdUnit',
+  'SystemVerilog', 'Tcl', 'Tex', 'TTCN', 'Vera', 'Verilog', 'VHDL',
+  'Vim', 'WindRes', 'XSLT', 'YACC', 'Yaml', 'YumRepo', 'Zephir'
+);
+comment on type ctags_languages is 'Languages recognized by ctags indexer';
diff --git a/sql/swh-func.sql b/sql/swh-func.sql
new file mode 100644
index 0000000..62df8fa
--- /dev/null
+++ b/sql/swh-func.sql
@@ -0,0 +1,721 @@
+-- create a temporary table with a single "bytea" column for fast object lookup.
+create or replace function swh_mktemp_bytea()
+    returns void
+    language sql
+as $$
+    create temporary table tmp_bytea (
+      id bytea
+    ) on commit drop;
+$$;
+
+-- create a temporary table called tmp_TBLNAME, mimicking existing table
+-- TBLNAME
+--
+-- Args:
+--     tblname: name of the table to mimick
+create or replace function swh_mktemp(tblname regclass)
+    returns void
+    language plpgsql
+as $$
+begin
+    execute format('
+	create temporary table tmp_%1$I
+	    (like %1$I including defaults)
+	    on commit drop;
+      alter table tmp_%1$I drop column if exists object_id;
+	', tblname);
+    return;
+end
+$$;
+
+-- create a temporary table for content_ctags tmp_content_mimetype_missing,
+create or replace function swh_mktemp_content_mimetype_missing()
+    returns void
+    language sql
+as $$
+  create temporary table tmp_content_mimetype_missing (
+    id sha1,
+    indexer_configuration_id bigint
+  ) on commit drop;
+$$;
+
+comment on function swh_mktemp_content_mimetype_missing() IS 'Helper table to filter existing mimetype information';
+
+-- check which entries of tmp_bytea are missing from content_mimetype
+--
+-- operates in bulk: 0. swh_mktemp_bytea(), 1. COPY to tmp_bytea,
+-- 2. call this function
+create or replace function swh_content_mimetype_missing()
+    returns setof sha1
+    language plpgsql
+as $$
+begin
+    return query
+	(select id::sha1 from tmp_content_mimetype_missing as tmp
+	 where not exists
+	     (select 1 from content_mimetype as c
+              where c.id = tmp.id and c.indexer_configuration_id = tmp.indexer_configuration_id));
+    return;
+end
+$$;
+
+comment on function swh_content_mimetype_missing() is 'Filter existing mimetype information';
+
+-- create a temporary table for content_mimetype tmp_content_mimetype,
+create or replace function swh_mktemp_content_mimetype()
+    returns void
+    language sql
+as $$
+  create temporary table tmp_content_mimetype (
+    like content_mimetype including defaults
+  ) on commit drop;
+$$;
+
+comment on function swh_mktemp_content_mimetype() IS 'Helper table to add mimetype information';
+
+-- add tmp_content_mimetype entries to content_mimetype, overwriting
+-- duplicates if conflict_update is true, skipping duplicates otherwise.
+--
+-- If filtering duplicates is in order, the call to
+-- swh_content_mimetype_missing must take place before calling this
+-- function.
+--
+--
+-- operates in bulk: 0. swh_mktemp(content_mimetype), 1. COPY to tmp_content_mimetype,
+-- 2. call this function
+create or replace function swh_content_mimetype_add(conflict_update boolean)
+    returns void
+    language plpgsql
+as $$
+begin
+    if conflict_update then
+        insert into content_mimetype (id, mimetype, encoding, indexer_configuration_id)
+        select id, mimetype, encoding, indexer_configuration_id
+        from tmp_content_mimetype tcm
+            on conflict(id, indexer_configuration_id)
+                do update set mimetype = excluded.mimetype,
+                              encoding = excluded.encoding;
+
+    else
+        insert into content_mimetype (id, mimetype, encoding, indexer_configuration_id)
+        select id, mimetype, encoding, indexer_configuration_id
+        from tmp_content_mimetype tcm
+            on conflict(id, indexer_configuration_id) do nothing;
+    end if;
+    return;
+end
+$$;
+
+comment on function swh_content_mimetype_add(boolean) IS 'Add new content mimetypes';
+
+create type content_mimetype_signature as(
+    id sha1,
+    mimetype bytea,
+    encoding bytea,
+    tool_id integer,
+    tool_name text,
+    tool_version text,
+    tool_configuration jsonb
+);
+
+-- Retrieve list of content mimetype from the temporary table.
+--
+-- operates in bulk: 0. mktemp(tmp_bytea), 1. COPY to tmp_bytea,
+-- 2. call this function
+create or replace function swh_content_mimetype_get()
+    returns setof content_mimetype_signature
+    language plpgsql
+as $$
+begin
+    return query
+        select c.id, mimetype, encoding,
+               i.id as tool_id, tool_name, tool_version, tool_configuration
+        from tmp_bytea t
+        inner join content_mimetype c on c.id=t.id
+        inner join indexer_configuration i on c.indexer_configuration_id=i.id;
+    return;
+end
+$$;
+
+comment on function swh_content_mimetype_get() IS 'List content''s mimetypes';
+
+-- create a temporary table for content_language tmp_content_language,
+create or replace function swh_mktemp_content_language_missing()
+    returns void
+    language sql
+as $$
+  create temporary table tmp_content_language_missing (
+    id sha1,
+    indexer_configuration_id integer
+  ) on commit drop;
+$$;
+
+comment on function swh_mktemp_content_language_missing() is 'Helper table to filter missing language';
+
+-- check which entries of tmp_bytea are missing from content_language
+--
+-- operates in bulk: 0. swh_mktemp_bytea(), 1. COPY to tmp_bytea,
+-- 2. call this function
+create or replace function swh_content_language_missing()
+    returns setof sha1
+    language plpgsql
+as $$
+begin
+    return query
+	select id::sha1 from tmp_content_language_missing as tmp
+	where not exists
+	    (select 1 from content_language as c
+             where c.id = tmp.id and c.indexer_configuration_id = tmp.indexer_configuration_id);
+    return;
+end
+$$;
+
+comment on function swh_content_language_missing() IS 'Filter missing content languages';
+
+-- add tmp_content_language entries to content_language, overwriting
+-- duplicates if conflict_update is true, skipping duplicates otherwise.
+--
+-- If filtering duplicates is in order, the call to
+-- swh_content_language_missing must take place before calling this
+-- function.
+--
+-- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to
+-- tmp_content_language, 2. call this function
+create or replace function swh_content_language_add(conflict_update boolean)
+    returns void
+    language plpgsql
+as $$
+begin
+    if conflict_update then
+      insert into content_language (id, lang, indexer_configuration_id)
+      select id, lang, indexer_configuration_id
+    	from tmp_content_language tcl
+            on conflict(id, indexer_configuration_id)
+                do update set lang = excluded.lang;
+
+    else
+        insert into content_language (id, lang, indexer_configuration_id)
+        select id, lang, indexer_configuration_id
+    	  from tmp_content_language tcl
+            on conflict(id, indexer_configuration_id)
+            do nothing;
+    end if;
+    return;
+end
+$$;
+
+comment on function swh_content_language_add(boolean) IS 'Add new content languages';
+
+-- create a temporary table for retrieving content_language
+create or replace function swh_mktemp_content_language()
+    returns void
+    language sql
+as $$
+  create temporary table tmp_content_language (
+    like content_language including defaults
+  ) on commit drop;
+$$;
+
+comment on function swh_mktemp_content_language() is 'Helper table to add content language';
+
+create type content_language_signature as (
+    id sha1,
+    lang languages,
+    tool_id integer,
+    tool_name text,
+    tool_version text,
+    tool_configuration jsonb
+);
+
+-- Retrieve list of content language from the temporary table.
+--
+-- operates in bulk: 0. mktemp(tmp_bytea), 1. COPY to tmp_bytea, 2. call this function
+create or replace function swh_content_language_get()
+    returns setof content_language_signature
+    language plpgsql
+as $$
+begin
+    return query
+        select c.id, lang, i.id as tool_id, tool_name, tool_version, tool_configuration
+        from tmp_bytea t
+        inner join content_language c on c.id = t.id
+        inner join indexer_configuration i on i.id=c.indexer_configuration_id;
+    return;
+end
+$$;
+
+comment on function swh_content_language_get() is 'List content''s language';
+
+
+-- create a temporary table for content_ctags tmp_content_ctags,
+create or replace function swh_mktemp_content_ctags()
+    returns void
+    language sql
+as $$
+  create temporary table tmp_content_ctags (
+    like content_ctags including defaults
+  ) on commit drop;
+$$;
+
+comment on function swh_mktemp_content_ctags() is 'Helper table to add content ctags';
+
+
+-- add tmp_content_ctags entries to content_ctags, overwriting
+-- duplicates if conflict_update is true, skipping duplicates otherwise.
+--
+-- operates in bulk: 0. swh_mktemp(content_ctags), 1. COPY to tmp_content_ctags,
+-- 2. call this function
+create or replace function swh_content_ctags_add(conflict_update boolean)
+    returns void
+    language plpgsql
+as $$
+begin
+    if conflict_update then
+        delete from content_ctags
+        where id in (select tmp.id
+                     from tmp_content_ctags tmp
+                     inner join indexer_configuration i on i.id=tmp.indexer_configuration_id);
+    end if;
+
+    insert into content_ctags (id, name, kind, line, lang, indexer_configuration_id)
+    select id, name, kind, line, lang, indexer_configuration_id
+    from tmp_content_ctags tct
+        on conflict(id, hash_sha1(name), kind, line, lang, indexer_configuration_id)
+        do nothing;
+    return;
+end
+$$;
+
+comment on function swh_content_ctags_add(boolean) IS 'Add new ctags symbols per content';
+
+-- create a temporary table for content_ctags missing routine
+create or replace function swh_mktemp_content_ctags_missing()
+    returns void
+    language sql
+as $$
+  create temporary table tmp_content_ctags_missing (
+    id           sha1,
+    indexer_configuration_id    integer
+  ) on commit drop;
+$$;
+
+comment on function swh_mktemp_content_ctags_missing() is 'Helper table to filter missing content ctags';
+
+-- check which entries of tmp_bytea are missing from content_ctags
+--
+-- operates in bulk: 0. swh_mktemp_bytea(), 1. COPY to tmp_bytea,
+-- 2. call this function
+create or replace function swh_content_ctags_missing()
+    returns setof sha1
+    language plpgsql
+as $$
+begin
+    return query
+	(select id::sha1 from tmp_content_ctags_missing as tmp
+	 where not exists
+	     (select 1 from content_ctags as c
+              where c.id = tmp.id and c.indexer_configuration_id=tmp.indexer_configuration_id
+              limit 1));
+    return;
+end
+$$;
+
+comment on function swh_content_ctags_missing() IS 'Filter missing content ctags';
+
+create type content_ctags_signature as (
+  id sha1,
+  name text,
+  kind text,
+  line bigint,
+  lang ctags_languages,
+  tool_id integer,
+  tool_name text,
+  tool_version text,
+  tool_configuration jsonb
+);
+
+-- Retrieve list of content ctags from the temporary table.
+--
+-- operates in bulk: 0. mktemp(tmp_bytea), 1. COPY to tmp_bytea, 2. call this function
+create or replace function swh_content_ctags_get()
+    returns setof content_ctags_signature
+    language plpgsql
+as $$
+begin
+    return query
+        select c.id, c.name, c.kind, c.line, c.lang,
+               i.id as tool_id, i.tool_name, i.tool_version, i.tool_configuration
+        from tmp_bytea t
+        inner join content_ctags c using(id)
+        inner join indexer_configuration i on i.id = c.indexer_configuration_id
+        order by line;
+    return;
+end
+$$;
+
+comment on function swh_content_ctags_get() IS 'List content ctags';
+
+-- Search within ctags content.
+--
+create or replace function swh_content_ctags_search(
+       expression text,
+       l integer default 10,
+       last_sha1 sha1 default '\x0000000000000000000000000000000000000000')
+    returns setof content_ctags_signature
+    language sql
+as $$
+    select c.id, name, kind, line, lang,
+           i.id as tool_id, tool_name, tool_version, tool_configuration
+    from content_ctags c
+    inner join indexer_configuration i on i.id = c.indexer_configuration_id
+    where hash_sha1(name) = hash_sha1(expression)
+    and c.id > last_sha1
+    order by id
+    limit l;
+$$;
+
+comment on function swh_content_ctags_search(text, integer, sha1) IS 'Equality search through ctags'' symbols';
+
+
+-- create a temporary table for content_fossology_license tmp_content_fossology_license,
+create or replace function swh_mktemp_content_fossology_license()
+    returns void
+    language sql
+as $$
+  create temporary table tmp_content_fossology_license (
+    id                       sha1,
+    license                  text,
+    indexer_configuration_id integer
+  ) on commit drop;
+$$;
+
+comment on function swh_mktemp_content_fossology_license() is 'Helper table to add content license';
+
+-- add tmp_content_fossology_license entries to content_fossology_license, overwriting
+-- duplicates if conflict_update is true, skipping duplicates otherwise.
+--
+-- operates in bulk: 0. swh_mktemp(content_fossology_license), 1. COPY to
+-- tmp_content_fossology_license, 2. call this function
+create or replace function swh_content_fossology_license_add(conflict_update boolean)
+    returns void
+    language plpgsql
+as $$
+begin
+    -- insert unknown licenses first
+    insert into fossology_license (name)
+    select distinct license from tmp_content_fossology_license tmp
+    where not exists (select 1 from fossology_license where name=tmp.license)
+    on conflict(name) do nothing;
+
+    if conflict_update then
+        -- delete from content_fossology_license c
+        --   using tmp_content_fossology_license tmp, indexer_configuration i
+        --   where c.id = tmp.id and i.id=tmp.indexer_configuration_id
+        delete from content_fossology_license
+        where id in (select tmp.id
+                     from tmp_content_fossology_license tmp
+                     inner join indexer_configuration i on i.id=tmp.indexer_configuration_id);
+    end if;
+
+    insert into content_fossology_license (id, license_id, indexer_configuration_id)
+    select tcl.id,
+          (select id from fossology_license where name = tcl.license) as license,
+          indexer_configuration_id
+    from tmp_content_fossology_license tcl
+        on conflict(id, license_id, indexer_configuration_id)
+        do nothing;
+    return;
+end
+$$;
+
+comment on function swh_content_fossology_license_add(boolean) IS 'Add new content licenses';
+
+create type content_fossology_license_signature as (
+  id                 sha1,
+  tool_id            integer,
+  tool_name          text,
+  tool_version       text,
+  tool_configuration jsonb,
+  licenses           text[]
+);
+
+-- Retrieve list of content license from the temporary table.
+--
+-- operates in bulk: 0. mktemp(tmp_bytea), 1. COPY to tmp_bytea,
+-- 2. call this function
+create or replace function swh_content_fossology_license_get()
+    returns setof content_fossology_license_signature
+    language plpgsql
+as $$
+begin
+    return query
+      select cl.id,
+             ic.id as tool_id,
+             ic.tool_name,
+             ic.tool_version,
+             ic.tool_configuration,
+             array(select name
+                   from fossology_license
+                   where id = ANY(array_agg(cl.license_id))) as licenses
+      from tmp_bytea tcl
+      inner join content_fossology_license cl using(id)
+      inner join indexer_configuration ic on ic.id=cl.indexer_configuration_id
+      group by cl.id, ic.id, ic.tool_name, ic.tool_version, ic.tool_configuration;
+    return;
+end
+$$;
+
+comment on function swh_content_fossology_license_get() IS 'List content licenses';
+
+-- content_metadata functions
+--
+-- create a temporary table for content_metadata tmp_content_metadata,
+create or replace function swh_mktemp_content_metadata_missing()
+    returns void
+    language sql
+as $$
+  create temporary table tmp_content_metadata_missing (
+    id sha1,
+    indexer_configuration_id integer
+  ) on commit drop;
+$$;
+
+comment on function swh_mktemp_content_metadata_missing() is 'Helper table to filter missing metadata in content_metadata';
+
+-- check which entries of tmp_bytea are missing from content_metadata
+--
+-- operates in bulk: 0. swh_mktemp_bytea(), 1. COPY to tmp_bytea,
+-- 2. call this function
+create or replace function swh_content_metadata_missing()
+    returns setof sha1
+    language plpgsql
+as $$
+begin
+    return query
+	select id::sha1 from tmp_content_metadata_missing as tmp
+	where not exists
+	    (select 1 from content_metadata as c
+             where c.id = tmp.id and c.indexer_configuration_id = tmp.indexer_configuration_id);
+    return;
+end
+$$;
+
+comment on function swh_content_metadata_missing() IS 'Filter missing content metadata';
+
+-- add tmp_content_metadata entries to content_metadata, overwriting
+-- duplicates if conflict_update is true, skipping duplicates otherwise.
+--
+-- If filtering duplicates is in order, the call to
+-- swh_content_metadata_missing must take place before calling this
+-- function.
+--
+-- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to
+-- tmp_content_metadata, 2. call this function
+create or replace function swh_content_metadata_add(conflict_update boolean)
+    returns void
+    language plpgsql
+as $$
+begin
+    if conflict_update then
+      insert into content_metadata (id, translated_metadata, indexer_configuration_id)
+      select id, translated_metadata, indexer_configuration_id
+    	from tmp_content_metadata tcm
+            on conflict(id, indexer_configuration_id)
+                do update set translated_metadata = excluded.translated_metadata;
+
+    else
+        insert into content_metadata (id, translated_metadata, indexer_configuration_id)
+        select id, translated_metadata, indexer_configuration_id
+    	from tmp_content_metadata tcm
+            on conflict(id, indexer_configuration_id)
+            do nothing;
+    end if;
+    return;
+end
+$$;
+
+comment on function swh_content_metadata_add(boolean) IS 'Add new content metadata';
+
+-- create a temporary table for retrieving content_metadata
+create or replace function swh_mktemp_content_metadata()
+    returns void
+    language sql
+as $$
+  create temporary table tmp_content_metadata (
+    like content_metadata including defaults
+  ) on commit drop;
+$$;
+
+comment on function swh_mktemp_content_metadata() is 'Helper table to add content metadata';
+
+--
+create type content_metadata_signature as (
+    id sha1,
+    translated_metadata jsonb,
+    tool_id integer,
+    tool_name text,
+    tool_version text,
+    tool_configuration jsonb
+);
+
+-- Retrieve list of content metadata from the temporary table.
+--
+-- operates in bulk: 0. mktemp(tmp_bytea), 1. COPY to tmp_bytea, 2. call this function
+create or replace function swh_content_metadata_get()
+    returns setof content_metadata_signature
+    language plpgsql
+as $$
+begin
+    return query
+        select c.id, translated_metadata, i.id as tool_id, tool_name, tool_version, tool_configuration
+        from tmp_bytea t
+        inner join content_metadata c on c.id = t.id
+        inner join indexer_configuration i on i.id=c.indexer_configuration_id;
+    return;
+end
+$$;
+
+comment on function swh_content_metadata_get() is 'List content''s metadata';
+-- end content_metadata functions
+
+-- revision_metadata functions
+--
+-- create a temporary table for revision_metadata tmp_revision_metadata,
+create or replace function swh_mktemp_revision_metadata_missing()
+    returns void
+    language sql
+as $$
+  create temporary table tmp_revision_metadata_missing (
+    id sha1_git,
+    indexer_configuration_id integer
+  ) on commit drop;
+$$;
+
+comment on function swh_mktemp_revision_metadata_missing() is 'Helper table to filter missing metadata in revision_metadata';
+
+-- check which entries of tmp_bytea are missing from revision_metadata
+--
+-- operates in bulk: 0. swh_mktemp_bytea(), 1. COPY to tmp_bytea,
+-- 2. call this function
+create or replace function swh_revision_metadata_missing()
+    returns setof sha1
+    language plpgsql
+as $$
+begin
+    return query
+	select id::sha1 from tmp_revision_metadata_missing as tmp
+	where not exists
+	    (select 1 from revision_metadata as c
+             where c.id = tmp.id and c.indexer_configuration_id = tmp.indexer_configuration_id);
+    return;
+end
+$$;
+
+comment on function swh_revision_metadata_missing() IS 'Filter missing content metadata';
+
+-- add tmp_revision_metadata entries to revision_metadata, overwriting
+-- duplicates if conflict_update is true, skipping duplicates otherwise.
+--
+-- If filtering duplicates is in order, the call to
+-- swh_revision_metadata_missing must take place before calling this
+-- function.
+--
+-- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to
+-- tmp_revision_metadata, 2. call this function
+create or replace function swh_revision_metadata_add(conflict_update boolean)
+    returns void
+    language plpgsql
+as $$
+begin
+    if conflict_update then
+      insert into revision_metadata (id, translated_metadata, indexer_configuration_id)
+      select id, translated_metadata, indexer_configuration_id
+    	from tmp_revision_metadata tcm
+            on conflict(id, indexer_configuration_id)
+                do update set translated_metadata = excluded.translated_metadata;
+
+    else
+        insert into revision_metadata (id, translated_metadata, indexer_configuration_id)
+        select id, translated_metadata, indexer_configuration_id
+    	from tmp_revision_metadata tcm
+            on conflict(id, indexer_configuration_id)
+            do nothing;
+    end if;
+    return;
+end
+$$;
+
+comment on function swh_revision_metadata_add(boolean) IS 'Add new revision metadata';
+
+-- create a temporary table for retrieving revision_metadata
+create or replace function swh_mktemp_revision_metadata()
+    returns void
+    language sql
+as $$
+  create temporary table tmp_revision_metadata (
+    like revision_metadata including defaults
+  ) on commit drop;
+$$;
+
+comment on function swh_mktemp_revision_metadata() is 'Helper table to add revision metadata';
+
+--
+create type revision_metadata_signature as (
+    id sha1_git,
+    translated_metadata jsonb,
+    tool_id integer,
+    tool_name text,
+    tool_version text,
+    tool_configuration jsonb
+);
+
+-- Retrieve list of revision metadata from the temporary table.
+--
+-- operates in bulk: 0. mktemp(tmp_bytea), 1. COPY to tmp_bytea, 2. call this function
+create or replace function swh_revision_metadata_get()
+    returns setof revision_metadata_signature
+    language plpgsql
+as $$
+begin
+    return query
+        select c.id, translated_metadata, i.id as tool_id, tool_name, tool_version, tool_configuration
+        from tmp_bytea t
+        inner join revision_metadata c on c.id = t.id
+        inner join indexer_configuration i on i.id=c.indexer_configuration_id;
+    return;
+end
+$$;
+
+create or replace function swh_mktemp_indexer_configuration()
+    returns void
+    language sql
+as $$
+    create temporary table tmp_indexer_configuration (
+      like indexer_configuration including defaults
+    ) on commit drop;
+    alter table tmp_indexer_configuration drop column id;
+$$;
+
+
+-- add tmp_indexer_configuration entries to indexer_configuration,
+-- skipping duplicates if any.
+--
+-- operates in bulk: 0. create temporary tmp_indexer_configuration, 1. COPY to
+-- it, 2. call this function to insert and filtering out duplicates
+create or replace function swh_indexer_configuration_add()
+    returns setof indexer_configuration
+    language plpgsql
+as $$
+begin
+      insert into indexer_configuration(tool_name, tool_version, tool_configuration)
+      select tool_name, tool_version, tool_configuration from tmp_indexer_configuration tmp
+      on conflict(tool_name, tool_version, tool_configuration) do nothing;
+
+      return query
+          select id, tool_name, tool_version, tool_configuration
+          from tmp_indexer_configuration join indexer_configuration
+              using(tool_name, tool_version, tool_configuration);
+
+      return;
+end
+$$;
diff --git a/sql/swh-indexes.sql b/sql/swh-indexes.sql
new file mode 100644
index 0000000..addb720
--- /dev/null
+++ b/sql/swh-indexes.sql
@@ -0,0 +1,57 @@
+-- fossology_license
+create unique index fossology_license_pkey on fossology_license(id);
+alter table fossology_license add primary key using index fossology_license_pkey;
+
+create unique index on fossology_license(name);
+
+-- indexer_configuration
+create unique index concurrently indexer_configuration_pkey on indexer_configuration(id);
+alter table indexer_configuration add primary key using index indexer_configuration_pkey;
+
+create unique index on indexer_configuration(tool_name, tool_version, tool_configuration);
+
+-- content_ctags
+create index on content_ctags(id);
+create index on content_ctags(hash_sha1(name));
+create unique index on content_ctags(id, hash_sha1(name), kind, line, lang, indexer_configuration_id);
+
+alter table content_ctags add constraint content_ctags_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
+alter table content_ctags validate constraint content_ctags_indexer_configuration_id_fkey;
+
+-- content_metadata
+create unique index content_metadata_pkey on content_metadata(id, indexer_configuration_id);
+alter table content_metadata add primary key using index content_metadata_pkey;
+
+alter table content_metadata add constraint content_metadata_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
+alter table content_metadata validate constraint content_metadata_indexer_configuration_id_fkey;
+
+-- revision_metadata
+create unique index revision_metadata_pkey on revision_metadata(id, indexer_configuration_id);
+alter table revision_metadata add primary key using index revision_metadata_pkey;
+
+alter table revision_metadata add constraint revision_metadata_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
+alter table revision_metadata validate constraint revision_metadata_indexer_configuration_id_fkey;
+
+-- content_mimetype
+create unique index content_mimetype_pkey on content_mimetype(id, indexer_configuration_id);
+alter table content_mimetype add primary key using index content_mimetype_pkey;
+
+alter table content_mimetype add constraint content_mimetype_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
+alter table content_mimetype validate constraint content_mimetype_indexer_configuration_id_fkey;
+
+-- content_language
+create unique index content_language_pkey on content_language(id, indexer_configuration_id);
+alter table content_language add primary key using index content_language_pkey;
+
+alter table content_language add constraint content_language_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
+alter table content_language validate constraint content_language_indexer_configuration_id_fkey;
+
+-- content_fossology_license
+create unique index content_fossology_license_pkey on content_fossology_license(id, license_id, indexer_configuration_id);
+alter table content_fossology_license add primary key using index content_fossology_license_pkey;
+
+alter table content_fossology_license add constraint content_fossology_license_license_id_fkey foreign key (license_id) references fossology_license(id) not valid;
+alter table content_fossology_license validate constraint content_fossology_license_license_id_fkey;
+
+alter table content_fossology_license add constraint content_fossology_license_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
+alter table content_fossology_license validate constraint content_fossology_license_indexer_configuration_id_fkey;
diff --git a/sql/swh-init.sql b/sql/swh-init.sql
new file mode 100644
index 0000000..e78ac3c
--- /dev/null
+++ b/sql/swh-init.sql
@@ -0,0 +1,13 @@
+create extension if not exists btree_gist;
+create extension if not exists pgcrypto;
+
+create or replace language plpgsql;
+create or replace language plpython3u;
+
+create or replace function hash_sha1(text)
+returns text
+as $$
+select encode(digest($1, 'sha1'), 'hex')
+$$ language sql strict immutable;
+
+comment on function hash_sha1(text) is 'Compute sha1 hash as text';
diff --git a/sql/swh-schema.sql b/sql/swh-schema.sql
new file mode 100644
index 0000000..b950793
--- /dev/null
+++ b/sql/swh-schema.sql
@@ -0,0 +1,138 @@
+---
+--- Software Heritage Indexers Data Model
+---
+
+-- drop schema if exists swh cascade;
+-- create schema swh;
+-- set search_path to swh;
+
+create table dbversion
+(
+  version     int primary key,
+  release     timestamptz,
+  description text
+);
+
+insert into dbversion(version, release, description)
+      values(114, now(), 'Work In Progress');
+-- Computing metadata on sha1's contents
+
+-- a SHA1 checksum (not necessarily originating from Git)
+create domain sha1 as bytea check (length(value) = 20);
+
+-- a Git object ID, i.e., a SHA1 checksum
+create domain sha1_git as bytea check (length(value) = 20);
+
+create table indexer_configuration (
+  id serial not null,
+  tool_name text not null,
+  tool_version text not null,
+  tool_configuration jsonb
+);
+
+comment on table indexer_configuration is 'Indexer''s configuration version';
+comment on column indexer_configuration.id is 'Tool identifier';
+comment on column indexer_configuration.tool_version is 'Tool name';
+comment on column indexer_configuration.tool_version is 'Tool version';
+comment on column indexer_configuration.tool_configuration is 'Tool configuration: command line, flags, etc...';
+
+-- Properties (mimetype, encoding, etc...)
+create table content_mimetype (
+  id sha1 not null,
+  mimetype bytea not null,
+  encoding bytea not null,
+  indexer_configuration_id bigint not null
+);
+
+comment on table content_mimetype is 'Metadata associated to a raw content';
+comment on column content_mimetype.mimetype is 'Raw content Mimetype';
+comment on column content_mimetype.encoding is 'Raw content encoding';
+comment on column content_mimetype.indexer_configuration_id is 'Tool used to compute the information';
+
+-- Language metadata
+create table content_language (
+  id sha1 not null,
+  lang languages not null,
+  indexer_configuration_id bigint not null
+);
+
+comment on table content_language is 'Language information on a raw content';
+comment on column content_language.lang is 'Language information';
+comment on column content_language.indexer_configuration_id is 'Tool used to compute the information';
+
+-- ctags information per content
+create table content_ctags (
+  id sha1 not null,
+  name text not null,
+  kind text not null,
+  line bigint not null,
+  lang ctags_languages not null,
+  indexer_configuration_id bigint not null
+);
+
+comment on table content_ctags is 'Ctags information on a raw content';
+comment on column content_ctags.id is 'Content identifier';
+comment on column content_ctags.name is 'Symbol name';
+comment on column content_ctags.kind is 'Symbol kind (function, class, variable, const...)';
+comment on column content_ctags.line is 'Symbol line';
+comment on column content_ctags.lang is 'Language information for that content';
+comment on column content_ctags.indexer_configuration_id is 'Tool used to compute the information';
+
+create table fossology_license(
+  id smallserial,
+  name text not null
+);
+
+comment on table fossology_license is 'Possible license recognized by license indexer';
+comment on column fossology_license.id is 'License identifier';
+comment on column fossology_license.name is 'License name';
+
+create table content_fossology_license (
+  id sha1 not null,
+  license_id smallserial not null,
+  indexer_configuration_id bigint not null
+);
+
+comment on table content_fossology_license is 'license associated to a raw content';
+comment on column content_fossology_license.id is 'Raw content identifier';
+comment on column content_fossology_license.license_id is 'One of the content''s license identifier';
+comment on column content_fossology_license.indexer_configuration_id is 'Tool used to compute the information';
+
+
+-- The table content_metadata provides a translation to files
+-- identified as potentially containning metadata with a translation tool (indexer_configuration_id)
+create table content_metadata(
+  id                       sha1   not null,
+  translated_metadata      jsonb  not null,
+  indexer_configuration_id bigint not null
+);
+
+comment on table content_metadata is 'metadata semantically translated from a content file';
+comment on column content_metadata.id is 'sha1 of content file';
+comment on column content_metadata.translated_metadata is 'result of translation with defined format';
+comment on column content_metadata.indexer_configuration_id is 'tool used for translation';
+
+-- The table revision_metadata provides a minimal set of intrinsic metadata
+-- detected with the detection  tool (indexer_configuration_id) and aggregated
+-- from the content_metadata translation.
+create table revision_metadata(
+  id                       sha1_git   not null,
+  translated_metadata      jsonb      not null,
+  indexer_configuration_id bigint     not null
+);
+
+comment on table revision_metadata is 'metadata semantically detected and translated in a revision';
+comment on column revision_metadata.id is 'sha1_git of revision';
+comment on column revision_metadata.translated_metadata is 'result of detection and translation with defined format';
+comment on column revision_metadata.indexer_configuration_id is 'tool used for detection';
+
+create table origin_metadata_translation(
+  id         bigserial     not null,  -- PK origin_metadata identifier
+  result     jsonb,
+  tool_id    bigint
+);
+
+comment on table origin_metadata_translation is 'keeps translated for an origin_metadata entry';
+comment on column origin_metadata_translation.id is 'the entry id in origin_metadata';
+comment on column origin_metadata_translation.result is 'translated_metadata result after translation with tool';
+comment on column origin_metadata_translation.tool_id is 'tool used for translation';
diff --git a/sql/upgrades/114.sql b/sql/upgrades/114.sql
new file mode 100644
index 0000000..7699a6d
--- /dev/null
+++ b/sql/upgrades/114.sql
@@ -0,0 +1,8 @@
+create sequence origin_metadata_translation_id_seq
+	start with 1
+	increment by 1
+	no maxvalue
+	no minvalue
+	cache 1;
+
+select setval('fossology_license_id_seq', 833, true);
diff --git a/swh.indexer.egg-info/PKG-INFO b/swh.indexer.egg-info/PKG-INFO
index 99942cf..2481b88 100644
--- a/swh.indexer.egg-info/PKG-INFO
+++ b/swh.indexer.egg-info/PKG-INFO
@@ -1,10 +1,10 @@
 Metadata-Version: 1.0
 Name: swh.indexer
-Version: 0.0.43
+Version: 0.0.44
 Summary: Software Heritage Content Indexer
 Home-page: https://forge.softwareheritage.org/diffusion/78/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Description: UNKNOWN
 Platform: UNKNOWN
diff --git a/swh.indexer.egg-info/SOURCES.txt b/swh.indexer.egg-info/SOURCES.txt
index 81e40f6..e096619 100644
--- a/swh.indexer.egg-info/SOURCES.txt
+++ b/swh.indexer.egg-info/SOURCES.txt
@@ -1,46 +1,72 @@
 .gitignore
 AUTHORS
 LICENSE
 MANIFEST.in
 Makefile
 README
 codemeta.json
 requirements-swh.txt
 requirements.txt
 setup.py
 version.txt
 debian/changelog
 debian/compat
 debian/control
 debian/copyright
 debian/rules
 debian/source/format
 docs/.gitignore
 docs/Makefile
 docs/conf.py
 docs/index.rst
 docs/_static/.placeholder
 docs/_templates/.placeholder
+sql/Makefile
+sql/swh-data.sql
+sql/swh-enums.sql
+sql/swh-func.sql
+sql/swh-indexes.sql
+sql/swh-init.sql
+sql/swh-schema.sql
+sql/bin/db-upgrade
+sql/bin/dot_add_content
+sql/doc/json
+sql/json/.gitignore
+sql/json/Makefile
+sql/json/indexer_configuration.tool_configuration.schema.json
+sql/json/revision_metadata.translated_metadata.json
+sql/upgrades/114.sql
 swh/__init__.py
 swh.indexer.egg-info/PKG-INFO
 swh.indexer.egg-info/SOURCES.txt
 swh.indexer.egg-info/dependency_links.txt
 swh.indexer.egg-info/requires.txt
 swh.indexer.egg-info/top_level.txt
 swh/indexer/__init__.py
 swh/indexer/ctags.py
 swh/indexer/fossology_license.py
 swh/indexer/indexer.py
 swh/indexer/language.py
 swh/indexer/metadata.py
 swh/indexer/metadata_detector.py
 swh/indexer/metadata_dictionary.py
 swh/indexer/mimetype.py
 swh/indexer/orchestrator.py
 swh/indexer/producer.py
 swh/indexer/rehash.py
 swh/indexer/tasks.py
+swh/indexer/storage/__init__.py
+swh/indexer/storage/converters.py
+swh/indexer/storage/db.py
+swh/indexer/storage/api/__init__.py
+swh/indexer/storage/api/client.py
+swh/indexer/storage/api/server.py
+swh/indexer/tests/__init__.py
 swh/indexer/tests/test_language.py
 swh/indexer/tests/test_metadata.py
 swh/indexer/tests/test_mimetype.py
-swh/indexer/tests/test_utils.py
\ No newline at end of file
+swh/indexer/tests/test_utils.py
+swh/indexer/tests/storage/__init__.py
+swh/indexer/tests/storage/test_api_client.py
+swh/indexer/tests/storage/test_converters.py
+swh/indexer/tests/storage/test_storage.py
\ No newline at end of file
diff --git a/swh/indexer/__init__.py b/swh/indexer/__init__.py
index a5f3dfd..b558a81 100644
--- a/swh/indexer/__init__.py
+++ b/swh/indexer/__init__.py
@@ -1,29 +1,55 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 INDEXER_CLASSES = {
     'mimetype': 'swh.indexer.mimetype.ContentMimetypeIndexer',
     'language': 'swh.indexer.language.ContentLanguageIndexer',
     'ctags': 'swh.indexer.ctags.CtagsIndexer',
     'fossology_license':
     'swh.indexer.fossology_license.ContentFossologyLicenseIndexer',
 }
 
 
 TASK_NAMES = {
     'orchestrator_all': 'swh.indexer.tasks.SWHOrchestratorAllContentsTask',
     'orchestrator_text': 'swh.indexer.tasks.SWHOrchestratorTextContentsTask',
     'mimetype': 'swh.indexer.tasks.SWHContentMimetypeTask',
     'language': 'swh.indexer.tasks.SWHContentLanguageTask',
     'ctags': 'swh.indexer.tasks.SWHCtagsTask',
     'fossology_license': 'swh.indexer.tasks.SWHContentFossologyLicenseTask',
     'rehash': 'swh.indexer.tasks.SWHRecomputeChecksumsTask',
 }
 
 
 __all__ = [
     'INDEXER_CLASSES', 'TASK_NAMES',
 ]
+
+
+def get_indexer_storage(cls, args):
+    """Get an indexer storage object of class `storage_class` with
+    arguments `storage_args`.
+
+    Args:
+        storage (dict): dictionary with keys:
+        - cls (str): storage's class, either 'local' or 'remote'
+        - args (dict): dictionary with keys
+
+    Returns:
+        an instance of swh.indexer's storage (either local or remote)
+
+    Raises:
+        ValueError if passed an unknown storage class.
+
+    """
+    if cls == 'remote':
+        from .storage.api.client import RemoteStorage as IndexerStorage
+    elif cls == 'local':
+        from .storage import IndexerStorage
+    else:
+        raise ValueError('Unknown indexer storage class `%s`' % cls)
+
+    return IndexerStorage(**args)
diff --git a/swh/indexer/ctags.py b/swh/indexer/ctags.py
index ec395f2..dde3740 100644
--- a/swh/indexer/ctags.py
+++ b/swh/indexer/ctags.py
@@ -1,161 +1,161 @@
 # Copyright (C) 2015-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import click
 import subprocess
 import json
 
 from swh.model import hashutil
 
 from .language import compute_language
 from .indexer import ContentIndexer, DiskIndexer
 
 
 # Options used to compute tags
 __FLAGS = [
     '--fields=+lnz',  # +l: language
                       # +n: line number of tag definition
                       # +z: include the symbol's kind (function, variable, ...)
     '--sort=no',      # sort output on tag name
     '--links=no',     # do not follow symlinks
     '--output-format=json',  # outputs in json
 ]
 
 
 def run_ctags(path, lang=None, ctags_command='ctags'):
     """Run ctags on file path with optional language.
 
     Args:
         path: path to the file
         lang: language for that path (optional)
 
     Returns:
         ctags' output
 
     """
     optional = []
     if lang:
         optional = ['--language-force=%s' % lang]
 
     cmd = [ctags_command] + __FLAGS + optional + [path]
     output = subprocess.check_output(cmd, universal_newlines=True)
 
     for symbol in output.split('\n'):
         if not symbol:
             continue
         js_symbol = json.loads(symbol)
         yield {
             'name': js_symbol['name'],
             'kind': js_symbol['kind'],
             'line': js_symbol['line'],
             'lang': js_symbol['language'],
         }
 
 
 class CtagsIndexer(ContentIndexer, DiskIndexer):
     CONFIG_BASE_FILENAME = 'indexer/ctags'
 
     ADDITIONAL_CONFIG = {
         'workdir': ('str', '/tmp/swh/indexer.ctags'),
         'tools': ('dict', {
             'name': 'universal-ctags',
             'version': '~git7859817b',
             'configuration': {
                 'command_line': '''ctags --fields=+lnz --sort=no --links=no '''
                                 '''--output-format=json <filepath>'''
             },
         }),
         'languages': ('dict', {
             'ada': 'Ada',
             'adl': None,
             'agda': None,
             # ...
         })
     }
 
     def prepare(self):
         super().prepare()
         self.working_directory = self.config['workdir']
         self.language_map = self.config['languages']
         self.tool = self.tools[0]
 
     def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
 
         """
-        yield from self.storage.content_ctags_missing((
+        yield from self.idx_storage.content_ctags_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1 in ids
         ))
 
     def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
             id (bytes): content's identifier
             data (bytes): raw content in bytes
 
         Returns:
             A dict, representing a content_mimetype, with keys:
               - id (bytes): content's identifier (sha1)
               - ctags ([dict]): ctags list of symbols
 
         """
         lang = compute_language(data, log=self.log)['lang']
 
         if not lang:
             return None
 
         ctags_lang = self.language_map.get(lang)
 
         if not ctags_lang:
             return None
 
         ctags = {
             'id': id,
         }
 
         filename = hashutil.hash_to_hex(id)
         content_path = self.write_to_temp(
             filename=filename,
             data=data)
 
         result = run_ctags(content_path, lang=ctags_lang)
         ctags.update({
             'ctags': list(result),
             'indexer_configuration_id': self.tool['id'],
         })
 
         self.cleanup(content_path)
 
         return ctags
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - ctags ([dict]): ctags list of symbols
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
-        self.storage.content_ctags_add(
+        self.idx_storage.content_ctags_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
 
 @click.command()
 @click.option('--path', help="Path to execute index on")
 def main(path):
     r = list(run_ctags(path))
     print(r)
 
 
 if __name__ == '__main__':
     main()
diff --git a/swh/indexer/fossology_license.py b/swh/indexer/fossology_license.py
index d1f9db6..3d46407 100644
--- a/swh/indexer/fossology_license.py
+++ b/swh/indexer/fossology_license.py
@@ -1,141 +1,141 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import click
 import subprocess
 
 from swh.model import hashutil
 
 from .indexer import ContentIndexer, DiskIndexer
 
 
 def compute_license(path, log=None):
     """Determine license from file at path.
 
     Args:
         path: filepath to determine the license
 
     Returns:
         A dict with the following keys:
         - licenses ([str]): associated detected licenses to path
         - path (bytes): content filepath
         - tool (str): tool used to compute the output
 
     """
     try:
         properties = subprocess.check_output(['nomossa', path],
                                              universal_newlines=True)
         if properties:
             res = properties.rstrip().split(' contains license(s) ')
             licenses = res[1].split(',')
 
             return {
                 'licenses': licenses,
                 'path': path,
             }
     except subprocess.CalledProcessError:
         if log:
             from os import path as __path
             log.exception('Problem during license detection for sha1 %s' %
                           __path.basename(path))
         return {
             'licenses': [],
             'path': path,
         }
 
 
 class ContentFossologyLicenseIndexer(ContentIndexer, DiskIndexer):
     """Indexer in charge of:
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
     - computing {license, encoding} from that content
     - store result in storage
 
     """
     ADDITIONAL_CONFIG = {
         'workdir': ('str', '/tmp/swh/indexer.fossology.license'),
         'tools': ('dict', {
             'name': 'nomos',
             'version': '3.1.0rc2-31-ga2cbb8c',
             'configuration': {
                 'command_line': 'nomossa <filepath>',
             },
         }),
     }
 
     CONFIG_BASE_FILENAME = 'indexer/fossology_license'
 
     def prepare(self):
         super().prepare()
         self.working_directory = self.config['workdir']
         self.tool = self.tools[0]
 
     def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
 
         """
-        yield from self.storage.content_fossology_license_missing((
+        yield from self.idx_storage.content_fossology_license_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1 in ids
         ))
 
     def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
             sha1 (bytes): content's identifier
             raw_content (bytes): raw content in bytes
 
         Returns:
             A dict, representing a content_license, with keys:
               - id (bytes): content's identifier (sha1)
               - license (bytes): license in bytes
               - path (bytes): path
 
         """
         filename = hashutil.hash_to_hex(id)
         content_path = self.write_to_temp(
             filename=filename,
             data=data)
 
         try:
             properties = compute_license(path=content_path, log=self.log)
             properties.update({
                 'id': id,
                 'indexer_configuration_id': self.tool['id'],
             })
         finally:
             self.cleanup(content_path)
 
         return properties
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_license, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - license (bytes): license in bytes
               - path (bytes): path
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
-        self.storage.content_fossology_license_add(
+        self.idx_storage.content_fossology_license_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
 
 @click.command(help='Compute license for path using tool')
 @click.option('--tool', default='nomossa', help="Path to tool")
 @click.option('--path', required=1, help="Path to execute index on")
 def main(tool, path):
     print(compute_license(tool, path))
 
 
 if __name__ == '__main__':
     main()
diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py
index 2cd850d..07cd85c 100644
--- a/swh/indexer/indexer.py
+++ b/swh/indexer/indexer.py
@@ -1,419 +1,418 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import abc
 import os
 import logging
 import shutil
 import tempfile
 
 from swh.core.config import SWHConfig
 from swh.objstorage import get_objstorage
 from swh.objstorage.exc import ObjNotFoundError
 from swh.model import hashutil
-from swh.storage import get_storage
 from swh.scheduler.utils import get_task
+from swh.indexer import get_indexer_storage
 
 
 class DiskIndexer:
     """Mixin intended to be used with other SomethingIndexer classes.
 
        Indexers inheriting from this class are a category of indexers
        which needs the disk for their computations.
 
        Note:
            This expects `self.working_directory` variable defined at
            runtime.
 
     """
     def write_to_temp(self, filename, data):
         """Write the sha1's content in a temporary file.
 
         Args:
             sha1 (str): the sha1 name
             filename (str): one of sha1's many filenames
             data (bytes): the sha1's content to write in temporary
             file
 
         Returns:
             The path to the temporary file created. That file is
             filled in with the raw content's data.
 
         """
         os.makedirs(self.working_directory, exist_ok=True)
         temp_dir = tempfile.mkdtemp(dir=self.working_directory)
         content_path = os.path.join(temp_dir, filename)
 
         with open(content_path, 'wb') as f:
             f.write(data)
 
         return content_path
 
     def cleanup(self, content_path):
         """Remove content_path from working directory.
 
         Args:
             content_path (str): the file to remove
 
         """
         temp_dir = os.path.dirname(content_path)
         shutil.rmtree(temp_dir)
 
 
 class BaseIndexer(SWHConfig,
                   metaclass=abc.ABCMeta):
     """Base class for indexers to inherit from.
 
     The main entry point is the :func:`run` function which is in
     charge of triggering the computations on the batch dict/ids
     received.
 
     Indexers can:
 
     - filter out ids whose data has already been indexed.
     - retrieve ids data from storage or objstorage
     - index this data depending on the object and store the result in
       storage.
 
     To implement a new object type indexer, inherit from the
     BaseIndexer and implement the process of indexation:
 
     :func:`run`:
       object_ids are different depending on object. For example: sha1 for
       content, sha1_git for revision, directory, release, and id for origin
 
     To implement a new concrete indexer, inherit from the object level
     classes: :class:`ContentIndexer`, :class:`RevisionIndexer` (later
     on :class:`OriginIndexer` will also be available)
 
     Then you need to implement the following functions:
 
     :func:`filter`:
       filter out data already indexed (in storage). This function is used by
       the orchestrator and not directly by the indexer
       (cf. swh.indexer.orchestrator.BaseOrchestratorIndexer).
 
     :func:`index_object`:
       compute index on id with data (retrieved from the storage or the
       objstorage by the id key) and return the resulting index computation.
 
     :func:`persist_index_computations`:
       persist the results of multiple index computations in the storage.
 
     The new indexer implementation can also override the following functions:
 
     :func:`prepare`:
       Configuration preparation for the indexer.  When overriding, this must
       call the `super().prepare()` instruction.
 
     :func:`check`:
       Configuration check for the indexer.  When overriding, this must call the
       `super().check()` instruction.
 
     :func:`register_tools`:
       This should return a dict of the tool(s) to use when indexing or
       filtering.
 
     """
     CONFIG = 'indexer/base'
 
     DEFAULT_CONFIG = {
-        'storage': ('dict', {
-            'host': 'uffizi',
+        'indexer_storage': ('dict', {
             'cls': 'remote',
-            'args': {'root': '/tmp/softwareheritage/objects',
-                     'slicing': '0:2/2:4/4:6'}
+            'args': {
+                'db': 'service=swh-indexer-dev'
+            }
         }),
+
         # queue to reschedule if problem (none for no rescheduling,
         # the default)
         'rescheduling_task': ('str', None),
         'objstorage': ('dict', {
             'cls': 'multiplexer',
             'args': {
                 'objstorages': [{
                     'cls': 'filtered',
                     'args': {
                         'storage_conf': {
                             'cls': 'azure-storage',
                             'args': {
                                 'account_name': '0euwestswh',
                                 'api_secret_key': 'secret',
                                 'container_name': 'contents'
                             }
                         },
                         'filters_conf': [
                             {'type': 'readonly'},
                             {'type': 'prefix', 'prefix': '0'}
                         ]
                     }
                 }, {
                     'cls': 'filtered',
                     'args': {
                         'storage_conf': {
                             'cls': 'azure-storage',
                             'args': {
                                 'account_name': '1euwestswh',
                                 'api_secret_key': 'secret',
                                 'container_name': 'contents'
                             }
                         },
                         'filters_conf': [
                             {'type': 'readonly'},
                             {'type': 'prefix', 'prefix': '1'}
                         ]
                     }
                 }]
             },
         }),
     }
 
     ADDITIONAL_CONFIG = {}
 
     def __init__(self):
         """Prepare and check that the indexer is ready to run.
 
         """
         super().__init__()
         self.prepare()
         self.check()
 
     def prepare(self):
         """Prepare the indexer's needed runtime configuration.
            Without this step, the indexer cannot possibly run.
 
         """
         self.config = self.parse_config_file(
             additional_configs=[self.ADDITIONAL_CONFIG])
         objstorage = self.config['objstorage']
         self.objstorage = get_objstorage(objstorage['cls'], objstorage['args'])
-        storage = self.config['storage']
-        self.storage = get_storage(storage['cls'], storage['args'])
+        idx_storage = self.config['indexer_storage']
+        self.idx_storage = get_indexer_storage(**idx_storage)
         rescheduling_task = self.config['rescheduling_task']
         if rescheduling_task:
             self.rescheduling_task = get_task(rescheduling_task)
         else:
             self.rescheduling_task = None
 
         l = logging.getLogger('requests.packages.urllib3.connectionpool')
         l.setLevel(logging.WARN)
         self.log = logging.getLogger('swh.indexer')
         self.tools = list(self.register_tools(self.config['tools']))
 
     def check(self):
         """Check the indexer's configuration is ok before proceeding.
            If ok, does nothing. If not raise error.
 
         """
         if not self.tools:
             raise ValueError('Tools %s is unknown, cannot continue' %
                              self.tools)
 
     def _prepare_tool(self, tool):
         """Prepare the tool dict to be compliant with the storage api.
 
         """
         return {'tool_%s' % key: value for key, value in tool.items()}
 
     def register_tools(self, tools):
         """Permit to register tools to the storage.
 
            Add a sensible default which can be overridden if not
            sufficient.  (For now, all indexers use only one tool)
 
            Expects the self.config['tools'] property to be set with
            one or more tools.
 
         Args:
             tools (dict/[dict]): Either a dict or a list of dict.
 
         Returns:
             List of dict with additional id key.
 
         Raises:
             ValueError if not a list nor a dict.
 
         """
         tools = self.config['tools']
         if isinstance(tools, list):
             tools = map(self._prepare_tool, tools)
         elif isinstance(tools, dict):
             tools = [self._prepare_tool(tools)]
         else:
             raise ValueError('Configuration tool(s) must be a dict or list!')
 
-        registered_tools = self.storage.indexer_configuration_add(tools)
-        return registered_tools
+        return self.idx_storage.indexer_configuration_add(tools)
 
     @abc.abstractmethod
     def filter(self, ids):
         """Filter missing ids for that particular indexer.
 
         Args:
             ids ([bytes]): list of ids
 
         Yields:
             iterator of missing ids
 
         """
         pass
 
     @abc.abstractmethod
     def index(self, id, data):
         """Index computation for the id and associated raw data.
 
         Args:
             id (bytes): identifier
             data (bytes): id's data from storage or objstorage depending on
                              object type
 
         Returns:
             a dict that makes sense for the persist_index_computations
         function.
 
         """
         pass
 
     @abc.abstractmethod
     def persist_index_computations(self, results, policy_update):
         """Persist the computation resulting from the index.
 
         Args:
 
             results ([result]): List of results. One result is the
                                 result of the index function.
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
                                    respectively update duplicates or ignore
                                    them
 
         Returns:
             None
 
         """
         pass
 
     def next_step(self, results):
         """Do something else with computations results (e.g. send to another
         queue, ...).
 
         (This is not an abstractmethod since it is optional).
 
         Args:
             results ([result]): List of results (dict) as returned
                                 by index function.
 
         Returns:
             None
 
         """
         pass
 
     @abc.abstractmethod
     def run(self, ids, policy_update):
         """Given a list of ids:
 
         - retrieves the data from the storage
         - executes the indexing computations
         - stores the results (according to policy_update)
 
         Args:
             ids ([bytes]): id's identifier list
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         pass
 
 
 class ContentIndexer(BaseIndexer):
     """An object type indexer, inherits from the :class:`BaseIndexer` and
     implements the process of indexation for Contents using the run
     method
 
     Note: the :class:`ContentIndexer` is not an instantiable
     object. To use it in another context, one should inherit from this
     class and override the methods mentioned in the
     :class:`BaseIndexer` class.
 
     """
 
     def run(self, ids, policy_update):
         """Given a list of ids:
 
         - retrieve the content from the storage
         - execute the indexing computations
         - store the results (according to policy_update)
 
         Args:
             ids ([bytes]): sha1's identifier list
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
                                    respectively update duplicates or ignore
                                    them
 
         """
         results = []
         try:
             for sha1 in ids:
                 try:
                     raw_content = self.objstorage.get(sha1)
                 except ObjNotFoundError:
                     self.log.warn('Content %s not found in objstorage' %
                                   hashutil.hash_to_hex(sha1))
                     continue
                 res = self.index(sha1, raw_content)
                 if res:  # If no results, skip it
                     results.append(res)
 
             self.persist_index_computations(results, policy_update)
             self.next_step(results)
         except Exception:
             self.log.exception(
                 'Problem when reading contents metadata.')
             if self.rescheduling_task:
                 self.log.warn('Rescheduling batch')
                 self.rescheduling_task.delay(ids, policy_update)
 
 
 class RevisionIndexer(BaseIndexer):
     """An object type indexer, inherits from the :class:`BaseIndexer` and
     implements the process of indexation for Revisions using the run
     method
 
     Note: the :class:`RevisionIndexer` is not an instantiable object.
     To use it in another context one should inherit from this class
     and override the methods mentioned in the :class:`BaseIndexer`
     class.
 
     """
-
     def run(self, ids, policy_update):
         """Given a list of sha1_gits:
 
         - retrieve revisions from storage
         - execute the indexing computations
         - store the results (according to policy_update)
 
         Args:
             ids ([bytes]): sha1_git's identifier list
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
                                    respectively update duplicates or ignore
                                    them
 
         """
         results = []
         revs = self.storage.revision_get(ids)
 
         for rev in revs:
             if not rev:
                 self.log.warn('Revisions %s not found in storage' %
                               list(map(hashutil.hash_to_hex, ids)))
                 continue
             try:
                 res = self.index(rev)
                 if res:  # If no results, skip it
                     results.append(res)
             except Exception:
                 self.log.exception(
                         'Problem when processing revision')
         self.persist_index_computations(results, policy_update)
diff --git a/swh/indexer/language.py b/swh/indexer/language.py
index 78d6b62..6433977 100644
--- a/swh/indexer/language.py
+++ b/swh/indexer/language.py
@@ -1,208 +1,208 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 import io
 
 from pygments.lexers import guess_lexer
 from pygments.util import ClassNotFound
 from chardet.universaldetector import UniversalDetector
 
 from .indexer import ContentIndexer
 
 
 def _cleanup_classname(classname):
     """Determine the language from the pygments' lexer names.
 
     """
     return classname.lower().replace(' ', '-')
 
 
 def _read_raw(raw_content, size=2048):
     """Read raw content in chunk.
 
     """
     bs = io.BytesIO(raw_content)
     while True:
         chunk = bs.read(size)
         if not chunk:
             break
         yield chunk
 
 
 def _detect_encoding(raw_content):
     """Given a raw content, try and detect its encoding.
 
     """
     detector = UniversalDetector()
     for chunk in _read_raw(raw_content):
         detector.feed(chunk)
         if detector.done:
             break
     detector.close()
     return detector.result['encoding']
 
 
 def compute_language_from_chunk(encoding, length, raw_content, max_size,
                                 log=None):
     """Determine the raw content's language.
 
     Args:
         encoding (str): Encoding to use to decode the content
         length (int): raw_content's length
         raw_content (bytes): raw content to work with
         max_size (int): max size to split the raw content at
 
     Returns:
         Dict with keys:
         - lang: None if nothing found or the possible language
 
     """
     try:
         if max_size <= length:
             raw_content = raw_content[0:max_size]
 
         content = raw_content.decode(encoding)
         lang = _cleanup_classname(
             guess_lexer(content).name)
     except ClassNotFound:
         lang = None
     except UnicodeDecodeError:
         raise
     except Exception:
         if log:
             log.exception('Problem during language detection, skipping')
         lang = None
     return {
         'lang': lang
     }
 
 
 def compute_language(raw_content, encoding=None, log=None):
     """Determine the raw content's language.
 
     Args:
         raw_content (bytes): raw content to work with
 
     Returns:
         Dict with keys:
         - lang: None if nothing found or the possible language
 
     """
     try:
         encoding = _detect_encoding(raw_content)
         content = raw_content.decode(encoding)
         lang = _cleanup_classname(
             guess_lexer(content).name)
     except ClassNotFound:
         lang = None
     except Exception:
         if log:
             log.exception('Problem during language detection, skipping')
         lang = None
     return {
         'lang': lang
     }
 
 
 class ContentLanguageIndexer(ContentIndexer):
     """Indexer in charge of:
 
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
     - computing {mimetype, encoding} from that content
     - store result in storage
 
     """
     CONFIG_BASE_FILENAME = 'indexer/language'
 
     ADDITIONAL_CONFIG = {
         'tools': ('dict', {
             'name': 'pygments',
             'version': '2.0.1+dfsg-1.1+deb8u1',
             'configuration': {
                 'type': 'library',
                 'debian-package': 'python3-pygments',
                 'max_content_size': 10240,
             },
         }),
     }
 
     def prepare(self):
         super().prepare()
         c = self.config
         self.max_content_size = c['tools']['configuration']['max_content_size']
         self.tool = self.tools[0]
 
     def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
 
         """
-        yield from self.storage.content_language_missing((
+        yield from self.idx_storage.content_language_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tool['id']
             } for sha1 in ids
         ))
 
     def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
             id (bytes): content's identifier
             data (bytes): raw content in bytes
 
         Returns:
             A dict, representing a content_mimetype, with keys:
               - id (bytes): content's identifier (sha1)
               - lang (bytes): detected language
 
         """
         result = {
             'id': id,
             'indexer_configuration_id': self.tool['id'],
             'lang': None,
         }
 
         encoding = _detect_encoding(data)
 
         if not encoding:
             return result
 
         l = len(data)
         for i in range(0, 9):
             max_size = self.max_content_size + i
 
             try:
                 result = compute_language_from_chunk(
                     encoding, l, data, max_size, log=self.log)
             except UnicodeDecodeError:
                 self.log.warn('Decoding failed on wrong byte chunk at [0-%s]'
                               ', trying again at next ending byte.' % max_size)
                 continue
 
             # we found something, so we return it
             result.update({
                 'id': id,
                 'indexer_configuration_id': self.tool['id'],
             })
             break
 
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - lang (bytes): detected language
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
-        self.storage.content_language_add(
+        self.idx_storage.content_language_add(
             results, conflict_update=(policy_update == 'update-dups'))
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
index f40c0e4..9bded05 100644
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -1,293 +1,299 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 import click
 import logging
 
 from swh.indexer.indexer import ContentIndexer, RevisionIndexer
 from swh.indexer.metadata_dictionary import compute_metadata
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata_detector import extract_minimal_metadata_dict
 
 from swh.model import hashutil
 
 
 class ContentMetadataIndexer(ContentIndexer):
     """Content-level indexer
 
     This indexer is in charge of:
 
     - filtering out content already indexed in content_metadata
     - reading content from objstorage with the content's id sha1
     - computing translated_metadata by given context
     - using the metadata_dictionary as the 'swh-metadata-translator' tool
     - store result in content_metadata table
 
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
     def __init__(self, tool, config):
         # twisted way to use the exact same config of RevisionMetadataIndexer
         # object that uses internally ContentMetadataIndexer
         self.config = config
         self.config['tools'] = tool
         super().__init__()
 
     def prepare(self):
         self.results = []
-        if self.config['storage']:
-            self.storage = self.config['storage']
+        if self.config['indexer_storage']:
+            self.idx_storage = self.config['indexer_storage']
         if self.config['objstorage']:
             self.objstorage = self.config['objstorage']
         l = logging.getLogger('requests.packages.urllib3.connectionpool')
         l.setLevel(logging.WARN)
         self.log = logging.getLogger('swh.indexer')
         self.tools = self.register_tools(self.config['tools'])
         # NOTE: only one tool so far, change when no longer true
         self.tool = self.tools[0]
 
     def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
         """
-        yield from self.storage.content_metadata_missing((
+        yield from self.idx_storage.content_metadata_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1 in ids
         ))
 
     def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
             id (bytes): content's identifier
             data (bytes): raw content in bytes
 
         Returns:
             dict: dictionary representing a content_metadata. If the
             translation wasn't successful the translated_metadata keys will
             be returned as None
 
         """
         result = {
             'id': id,
             'indexer_configuration_id': self.tool['id'],
             'translated_metadata': None
         }
         try:
             context = self.tool['tool_configuration']['context']
             result['translated_metadata'] = compute_metadata(context, data)
             # a twisted way to keep result with indexer object for get_results
             self.results.append(result)
         except:
             self.log.exception(
                 "Problem during tool retrieval of metadata translation")
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_metadata, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - translated_metadata (jsonb): detected metadata
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
-        self.storage.content_metadata_add(
+        self.idx_storage.content_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def get_results(self):
         """can be called only if run method was called before
 
         Returns:
             list: list of content_metadata entries calculated by
                   current indexer
 
         """
         return self.results
 
 
 class RevisionMetadataIndexer(RevisionIndexer):
     """Revision-level indexer
 
     This indexer is in charge of:
 
     - filtering revisions already indexed in revision_metadata table with
       defined computation tool
     - retrieve all entry_files in root directory
     - use metadata_detector for file_names containig metadata
     - compute metadata translation if necessary and possible (depends on tool)
     - send sha1s to content indexing if possible
     - store the results for revision
 
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
     ADDITIONAL_CONFIG = {
+        'storage': ('dict', {
+            'cls': 'remote',
+            'args': {
+                'url': 'http://localhost:5002/',
+            }
+        }),
         'tools': ('dict', {
             'name': 'swh-metadata-detector',
             'version': '0.0.1',
             'configuration': {
                 'type': 'local',
                 'context': ['npm', 'codemeta']
             },
         }),
     }
 
     def prepare(self):
         super().prepare()
         self.tool = self.tools[0]
 
     def filter(self, sha1_gits):
         """Filter out known sha1s and return only missing ones.
 
         """
-        yield from self.storage.revision_metadata_missing((
+        yield from self.idx_storage.revision_metadata_missing((
             {
                 'id': sha1_git,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1_git in sha1_gits
         ))
 
     def index(self, rev):
         """Index rev by processing it and organizing result.
 
         use metadata_detector to iterate on filenames
 
         - if one filename detected -> sends file to content indexer
         - if multiple file detected -> translation needed at revision level
 
         Args:
           rev (bytes): revision artifact from storage
 
         Returns:
             dict: dictionary representing a revision_metadata, with keys:
 
                 - id (bytes): rev's identifier (sha1_git)
                 - indexer_configuration_id (bytes): tool used
                 - translated_metadata (bytes): dict of retrieved metadata
 
         """
         try:
             result = {
                 'id': rev['id'],
                 'indexer_configuration_id': self.tool['id'],
                 'translated_metadata': None
             }
 
             root_dir = rev['directory']
             dir_ls = self.storage.directory_ls(root_dir, recursive=False)
             files = (entry for entry in dir_ls if entry['type'] == 'file')
             detected_files = detect_metadata(files)
             result['translated_metadata'] = self.translate_revision_metadata(
                                                                 detected_files)
         except Exception as e:
             self.log.exception(
                 'Problem when indexing rev')
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         # TODO: add functions in storage to keep data in revision_metadata
-        self.storage.revision_metadata_add(
+        self.idx_storage.revision_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def translate_revision_metadata(self, detected_files):
         """
         Determine plan of action to translate metadata when containing
         one or multiple detected files:
 
         Args:
             detected_files (dict): dictionary mapping context names (e.g.,
               "npm", "authors") to list of sha1
 
         Returns:
             dict: dict with translated metadata according to the CodeMeta
             vocabulary
 
         """
         translated_metadata = []
         tool = {
                 'name': 'swh-metadata-translator',
                 'version': '0.0.1',
                 'configuration': {
                     'type': 'local',
                     'context': None
                 },
             }
         # TODO: iterate on each context, on each file
         # -> get raw_contents
         # -> translate each content
         config = {
-            'storage': self.storage,
+            'indexer_storage': self.idx_storage,
             'objstorage': self.objstorage
         }
         for context in detected_files.keys():
             tool['configuration']['context'] = context
             c_metadata_indexer = ContentMetadataIndexer(tool, config)
             # sha1s that are in content_metadata table
             sha1s_in_storage = []
-            metadata_generator = self.storage.content_metadata_get(
-                                              detected_files[context])
+            metadata_generator = self.idx_storage.content_metadata_get(
+                detected_files[context])
             for c in metadata_generator:
                 # extracting translated_metadata
                 sha1 = c['id']
                 sha1s_in_storage.append(sha1)
                 local_metadata = c['translated_metadata']
                 # local metadata is aggregated
                 if local_metadata:
                     translated_metadata.append(local_metadata)
 
             sha1s_filtered = [item for item in detected_files[context]
                               if item not in sha1s_in_storage]
 
             if sha1s_filtered:
                 # schedule indexation of content
                 try:
                     c_metadata_indexer.run(sha1s_filtered,
                                            policy_update='ignore-dups')
                     # on the fly possibility:
                     results = c_metadata_indexer.get_results()
 
                     for result in results:
                         local_metadata = result['translated_metadata']
                         translated_metadata.append(local_metadata)
 
                 except Exception as e:
                     self.log.warn("""Exception while indexing content""", e)
 
         # transform translated_metadata into min set with swh-metadata-detector
         min_metadata = extract_minimal_metadata_dict(translated_metadata)
         return min_metadata
 
 
 @click.command()
 @click.option('--revs', '-i',
               default=['8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
                        '026040ea79dec1b49b4e3e7beda9132b6b26b51b',
                        '9699072e21eded4be8d45e3b8d543952533fa190'],
               help='Default sha1_git to lookup', multiple=True)
 def main(revs):
     _git_sha1s = list(map(hashutil.hash_to_bytes, revs))
     rev_metadata_indexer = RevisionMetadataIndexer()
     rev_metadata_indexer.run(_git_sha1s, 'update-dups')
 
 
 if __name__ == '__main__':
     logging.basicConfig(level=logging.INFO)
     main()
diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
index 5e2ee14..56a0e54 100644
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -1,211 +1,210 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 import json
 
 
 def convert(raw_content):
     """
     convert raw_content recursively:
 
     - from bytes to string
     - from string to dict
 
     Args:
         raw_content (bytes / string / dict)
 
     Returns:
         dict: content (if string was json, otherwise returns string)
 
     """
     if isinstance(raw_content, bytes):
         return convert(raw_content.decode())
     if isinstance(raw_content, str):
         try:
             content = json.loads(raw_content)
             if content:
                 return content
             else:
                 return raw_content
         except json.decoder.JSONDecodeError:
             return raw_content
     if isinstance(raw_content, dict):
         return raw_content
 
 
 class BaseMapping():
     """Base class for mappings to inherit from
 
     To implement a new mapping:
 
     - inherit this class
     - add a local property self.mapping
     - override translate function
     """
 
     def translate(self, content_dict):
         """
         Tranlsates content  by parsing content to a json object
         and translating with the npm mapping (for now hard_coded mapping)
 
         Args:
             context_text (text): should be json
 
         Returns:
             dict: translated metadata in jsonb form needed for the indexer
 
         """
         translated_metadata = {}
         default = 'other'
         translated_metadata['other'] = {}
         try:
             for k, v in content_dict.items():
                 try:
                     term = self.mapping.get(k, default)
                     if term not in translated_metadata:
                         translated_metadata[term] = v
                         continue
                     if isinstance(translated_metadata[term], str):
                         in_value = translated_metadata[term]
                         translated_metadata[term] = [in_value, v]
                         continue
                     if isinstance(translated_metadata[term], list):
                         translated_metadata[term].append(v)
                         continue
                     if isinstance(translated_metadata[term], dict):
                         translated_metadata[term][k] = v
                         continue
                 except KeyError:
                     self.log.exception(
                         "Problem during item mapping")
                     continue
         except:
             return None
         return translated_metadata
 
 
 class NpmMapping(BaseMapping):
     """
     dedicated class for NPM (package.json) mapping and translation
     """
     mapping = {
         'repository': 'codeRepository',
         'os': 'operatingSystem',
         'cpu': 'processorRequirements',
         'engines': 'processorRequirements',
         'dependencies': 'softwareRequirements',
         'bundleDependencies': 'softwareRequirements',
         'peerDependencies': 'softwareRequirements',
         'author': 'author',
         'contributor': 'contributor',
         'keywords': 'keywords',
         'license': 'license',
         'version': 'version',
         'description': 'description',
         'name': 'name',
         'devDependencies': 'softwareSuggestions',
         'optionalDependencies': 'softwareSuggestions',
         'bugs': 'issueTracker',
         'homepage': 'url'
     }
 
     def translate(self, raw_content):
         content_dict = convert(raw_content)
         return super().translate(content_dict)
 
 
 class MavenMapping(BaseMapping):
     """
     dedicated class for Maven (pom.xml) mapping and translation
     """
     mapping = {
         'license': 'license',
         'version': 'version',
         'description': 'description',
         'name': 'name',
         'prerequisites': 'softwareRequirements',
         'repositories': 'codeRepository',
         'groupId': 'identifier',
         'ciManagement': 'contIntegration',
         'issuesManagement': 'issueTracker',
     }
 
     def translate(self, raw_content):
         content = convert(raw_content)
         # parse content from xml to dict
         return super().translate(content)
 
 
 class DoapMapping(BaseMapping):
     mapping = {
 
     }
 
     def translate(self, raw_content):
         content = convert(raw_content)
         # parse content from xml to dict
         return super().translate(content)
 
 
 def parse_xml(content):
     """
     Parses content from xml to a python dict
     Args:
         - content (text): the string form of the raw_content ( in xml)
 
     Returns:
         - parsed_xml (dict): a python dict of the content after parsing
     """
     # check if xml
     # use xml parser to dict
     return content
 
 
 mapping_tool_fn = {
     "npm": NpmMapping(),
     "maven": MavenMapping(),
     "doap_xml": DoapMapping()
 }
 
 
 def compute_metadata(context, raw_content):
     """
     first landing method: a dispatcher that sends content
     to the right function to carry out the real parsing of syntax
     and translation of terms
 
     Args:
         context (text): defines to which function/tool the content is sent
         content (text): the string form of the raw_content
 
     Returns:
         dict: translated metadata jsonb dictionary needed for the indexer to
           store in storage
 
     """
     if raw_content is None or raw_content is b"":
         return None
 
     # TODO: keep mapping not in code (maybe fetch crosswalk from storage?)
     # if fetched from storage should be done once for batch of sha1s
     dictionary = mapping_tool_fn[context]
     translated_metadata = dictionary.translate(raw_content)
-    # print(translated_metadata)
     return translated_metadata
 
 
 def main():
     raw_content = """{"name": "test_name", "unknown_term": "ut"}"""
     raw_content1 = b"""{"name": "test_name",
                         "unknown_term": "ut",
                         "prerequisites" :"packageXYZ"}"""
     result = compute_metadata("npm", raw_content)
     result1 = compute_metadata("maven", raw_content1)
 
     print(result)
     print(result1)
 
 
 if __name__ == "__main__":
     main()
diff --git a/swh/indexer/mimetype.py b/swh/indexer/mimetype.py
index 8bbbf64..57bcd3a 100644
--- a/swh/indexer/mimetype.py
+++ b/swh/indexer/mimetype.py
@@ -1,158 +1,158 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import click
 import magic
 
 from swh.model import hashutil
 from swh.scheduler import utils
 
 from .indexer import ContentIndexer
 
 
 def compute_mimetype_encoding(raw_content):
     """Determine mimetype and encoding from the raw content.
 
     Args:
         raw_content (bytes): content's raw data
 
     Returns:
         A dict with mimetype and encoding key and corresponding values
         (as bytes).
 
     """
     r = magic.detect_from_content(raw_content)
     return {
         'mimetype': r.mime_type.encode('utf-8'),
         'encoding': r.encoding.encode('utf-8'),
     }
 
 
 class ContentMimetypeIndexer(ContentIndexer):
     """Indexer in charge of:
 
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
     - computing {mimetype, encoding} from that content
     - store result in storage
 
     """
     ADDITIONAL_CONFIG = {
         'destination_queue': ('str', None),
         'tools': ('dict', {
             'name': 'file',
             'version': '1:5.30-1+deb9u1',
             'configuration': {
                 "type": "library",
                 "debian-package": "python3-magic"
             },
         }),
     }
 
     CONFIG_BASE_FILENAME = 'indexer/mimetype'
 
     def prepare(self):
         super().prepare()
         destination_queue = self.config.get('destination_queue')
         if destination_queue:
             self.task_destination = utils.get_task(destination_queue)
         else:
             self.task_destination = None
         self.tool = self.tools[0]
 
     def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
 
         """
-        yield from self.storage.content_mimetype_missing((
+        yield from self.idx_storage.content_mimetype_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1 in ids
         ))
 
     def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
             id (bytes): content's identifier
             data (bytes): raw content in bytes
 
         Returns:
             A dict, representing a content_mimetype, with keys:
 
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
 
         """
         try:
             properties = compute_mimetype_encoding(data)
             properties.update({
                 'id': id,
                 'indexer_configuration_id': self.tool['id'],
                 })
         except TypeError:
             self.log.error('Detecting mimetype error for id %s' % (
                 hashutil.hash_to_hex(id), ))
             return None
 
         return properties
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
 
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
 
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
-        self.storage.content_mimetype_add(
+        self.idx_storage.content_mimetype_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def _filter_text(self, results):
         """Filter sha1 whose raw content is text.
 
         """
         for result in results:
             if b'binary' in result['encoding']:
                 continue
             yield result['id']
 
     def next_step(self, results):
         """When the computations is done, we'd like to send over only text
         contents to the text content orchestrator.
 
         Args:
             results ([dict]): List of content_mimetype results, dict
             with the following keys:
 
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
 
         """
         if self.task_destination:
             self.task_destination.delay(list(self._filter_text(results)))
 
 
 @click.command()
 @click.option('--path', help="Path to execute index on")
 def main(path):
     with open(path, 'rb') as f:
         raw_content = f.read()
 
     print(compute_mimetype_encoding(raw_content))
 
 
 if __name__ == '__main__':
     main()
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
new file mode 100644
index 0000000..8325954
--- /dev/null
+++ b/swh/indexer/storage/__init__.py
@@ -0,0 +1,521 @@
+# Copyright (C) 2015-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+import json
+import dateutil.parser
+import psycopg2
+
+from swh.storage.common import db_transaction_generator, db_transaction
+from swh.storage.exc import StorageDBError
+from .db import Db
+
+from . import converters
+
+
+class IndexerStorage():
+    """SWH Indexer Storage
+
+    """
+    def __init__(self, db):
+        """
+        Args:
+            db_conn: either a libpq connection string, or a psycopg2 connection
+            obj_root: path to the root of the object storage
+
+        """
+        try:
+            if isinstance(db, psycopg2.extensions.connection):
+                self.db = Db(db)
+            else:
+                self.db = Db.connect(db)
+        except psycopg2.OperationalError as e:
+            raise StorageDBError(e)
+
+    def check_config(self, *, check_write):
+        """Check that the storage is configured and ready to go."""
+        # Check permissions on one of the tables
+        with self.db.transaction() as cur:
+            if check_write:
+                check = 'INSERT'
+            else:
+                check = 'SELECT'
+
+            cur.execute(
+                "select has_table_privilege(current_user, 'content_mimetype', %s)",  # noqa
+                (check,)
+            )
+            return cur.fetchone()[0]
+
+        return True
+
+    @db_transaction_generator
+    def content_mimetype_missing(self, mimetypes, cur=None):
+        """List mimetypes missing from storage.
+
+        Args:
+            mimetypes (iterable): iterable of dict with keys:
+
+                - id (bytes): sha1 identifier
+                - tool_name (str): tool used to compute the results
+                - tool_version (str): associated tool's version
+
+        Returns:
+            iterable: an iterable of missing id for the triplets id, tool_name,
+            tool_version
+
+        """
+        db = self.db
+        db.mktemp_content_mimetype_missing(cur)
+        db.copy_to(mimetypes, 'tmp_content_mimetype_missing',
+                   ['id', 'indexer_configuration_id'],
+                   cur)
+        for obj in db.content_mimetype_missing_from_temp(cur):
+            yield obj[0]
+
+    @db_transaction
+    def content_mimetype_add(self, mimetypes, conflict_update=False, cur=None):
+        """Add mimetypes not present in storage.
+
+        Args:
+            mimetypes (iterable): dictionaries with keys:
+
+                - id (bytes): sha1 identifier
+                - mimetype (bytes): raw content's mimetype
+                - encoding (bytes): raw content's encoding
+                - indexer_configuration_id (int): tool's id used to
+                  compute the results
+                - conflict_update: Flag to determine if we want to
+                  overwrite (true) or skip duplicates (false, the default)
+
+        """
+        db = self.db
+        db.mktemp_content_mimetype(cur)
+        db.copy_to(mimetypes, 'tmp_content_mimetype',
+                   ['id', 'mimetype', 'encoding', 'indexer_configuration_id'],
+                   cur)
+        db.content_mimetype_add_from_temp(conflict_update, cur)
+
+    @db_transaction_generator
+    def content_mimetype_get(self, ids, cur=None):
+        db = self.db
+        db.store_tmp_bytea(ids, cur)
+        for c in db.content_mimetype_get_from_temp():
+            yield converters.db_to_mimetype(
+                dict(zip(db.content_mimetype_cols, c)))
+
+    @db_transaction_generator
+    def content_language_missing(self, languages, cur=None):
+        """List languages missing from storage.
+
+        Args:
+            languages (iterable): dictionaries with keys:
+
+                - id (bytes): sha1 identifier
+                - tool_name (str): tool used to compute the results
+                - tool_version (str): associated tool's version
+
+        Returns:
+            iterable: identifiers of missing languages
+
+        """
+        db = self.db
+        db.mktemp_content_language_missing(cur)
+        db.copy_to(languages, 'tmp_content_language_missing',
+                   ['id', 'indexer_configuration_id'], cur)
+        for obj in db.content_language_missing_from_temp(cur):
+            yield obj[0]
+
+    @db_transaction_generator
+    def content_language_get(self, ids, cur=None):
+        db = self.db
+        db.store_tmp_bytea(ids, cur)
+        for c in db.content_language_get_from_temp():
+            yield converters.db_to_language(
+                dict(zip(db.content_language_cols, c)))
+
+    @db_transaction
+    def content_language_add(self, languages, conflict_update=False, cur=None):
+        """Add languages not present in storage.
+
+        Args:
+            languages (iterable): dictionaries with keys:
+
+                - id: sha1
+                - lang: bytes
+
+            conflict_update: Flag to determine if we want to overwrite (true)
+                or skip duplicates (false, the default)
+
+        """
+        db = self.db
+        db.mktemp_content_language(cur)
+        # empty language is mapped to 'unknown'
+        db.copy_to(
+            ({
+                'id': l['id'],
+                'lang': 'unknown' if not l['lang'] else l['lang'],
+                'indexer_configuration_id': l['indexer_configuration_id'],
+            } for l in languages),
+            'tmp_content_language',
+            ['id', 'lang', 'indexer_configuration_id'], cur)
+
+        db.content_language_add_from_temp(conflict_update, cur)
+
+    @db_transaction_generator
+    def content_ctags_missing(self, ctags, cur=None):
+        """List ctags missing from storage.
+
+        Args:
+            ctags (iterable): dicts with keys:
+
+            - id (bytes): sha1 identifier
+            - tool_name (str): tool name used
+            - tool_version (str): associated version
+
+        Returns:
+            an iterable of missing id
+
+        """
+        db = self.db
+
+        db.mktemp_content_ctags_missing(cur)
+        db.copy_to(ctags,
+                   tblname='tmp_content_ctags_missing',
+                   columns=['id', 'indexer_configuration_id'],
+                   cur=cur)
+        for obj in db.content_ctags_missing_from_temp(cur):
+            yield obj[0]
+
+    @db_transaction_generator
+    def content_ctags_get(self, ids, cur=None):
+        """Retrieve ctags per id.
+
+        Args:
+            ids (iterable): sha1 checksums
+
+        """
+        db = self.db
+        db.store_tmp_bytea(ids, cur)
+        for c in db.content_ctags_get_from_temp():
+            yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, c)))
+
+    @db_transaction
+    def content_ctags_add(self, ctags, conflict_update=False, cur=None):
+        """Add ctags not present in storage
+
+        Args:
+            ctags (iterable): dictionaries with keys:
+
+                - id (bytes): sha1
+                - ctags ([list): List of dictionary with keys: name, kind,
+                  line, language
+
+        """
+        db = self.db
+
+        def _convert_ctags(__ctags):
+            """Convert ctags dict to list of ctags.
+
+            """
+            for ctags in __ctags:
+                yield from converters.ctags_to_db(ctags)
+
+        db.mktemp_content_ctags(cur)
+        db.copy_to(list(_convert_ctags(ctags)),
+                   tblname='tmp_content_ctags',
+                   columns=['id', 'name', 'kind', 'line',
+                            'lang', 'indexer_configuration_id'],
+                   cur=cur)
+
+        db.content_ctags_add_from_temp(conflict_update, cur)
+
+    @db_transaction_generator
+    def content_ctags_search(self, expression,
+                             limit=10, last_sha1=None, cur=None):
+        """Search through content's raw ctags symbols.
+
+        Args:
+            expression (str): Expression to search for
+            limit (int): Number of rows to return (default to 10).
+            last_sha1 (str): Offset from which retrieving data (default to '').
+
+        Yields:
+            rows of ctags including id, name, lang, kind, line, etc...
+
+        """
+        db = self.db
+
+        for obj in db.content_ctags_search(expression, last_sha1, limit,
+                                           cur=cur):
+            yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, obj)))
+
+    @db_transaction_generator
+    def content_fossology_license_get(self, ids, cur=None):
+        """Retrieve licenses per id.
+
+        Args:
+            ids (iterable): sha1 checksums
+
+        Yields:
+            list: dictionaries with the following keys:
+
+            - id (bytes)
+            - licenses ([str]): associated licenses for that content
+
+        """
+        db = self.db
+        db.store_tmp_bytea(ids, cur)
+
+        for c in db.content_fossology_license_get_from_temp():
+            license = dict(zip(db.content_fossology_license_cols, c))
+            yield converters.db_to_fossology_license(license)
+
+    @db_transaction
+    def content_fossology_license_add(self, licenses,
+                                      conflict_update=False, cur=None):
+        """Add licenses not present in storage.
+
+        Args:
+            licenses (iterable): dictionaries with keys:
+
+                - id: sha1
+                - license ([bytes]): List of licenses associated to sha1
+                - tool (str): nomossa
+
+            conflict_update: Flag to determine if we want to overwrite (true)
+                or skip duplicates (false, the default)
+
+        Returns:
+            list: content_license entries which failed due to unknown licenses
+
+        """
+        db = self.db
+
+        # Then, we add the correct ones
+        db.mktemp_content_fossology_license(cur)
+        db.copy_to(
+            ({
+                'id': sha1['id'],
+                'indexer_configuration_id': sha1['indexer_configuration_id'],
+                'license': license,
+              } for sha1 in licenses
+                for license in sha1['licenses']),
+            tblname='tmp_content_fossology_license',
+            columns=['id', 'license', 'indexer_configuration_id'],
+            cur=cur)
+        db.content_fossology_license_add_from_temp(conflict_update, cur)
+
+    @db_transaction_generator
+    def content_metadata_missing(self, metadatas, cur=None):
+        """List metadatas missing from storage.
+
+        Args:
+            metadatas (iterable): dictionaries with keys:
+
+                - id (bytes): sha1 identifier
+                - tool_name (str): tool used to compute the results
+                - tool_version (str): associated tool's version
+
+        Returns:
+            iterable: missing ids
+
+        """
+        db = self.db
+        db.mktemp_content_metadata_missing(cur)
+        db.copy_to(metadatas, 'tmp_content_metadata_missing',
+                   ['id', 'indexer_configuration_id'], cur)
+        for obj in db.content_metadata_missing_from_temp(cur):
+            yield obj[0]
+
+    @db_transaction_generator
+    def content_metadata_get(self, ids, cur=None):
+        db = self.db
+        db.store_tmp_bytea(ids, cur)
+        for c in db.content_metadata_get_from_temp():
+            yield converters.db_to_metadata(
+                dict(zip(db.content_metadata_cols, c)))
+
+    @db_transaction
+    def content_metadata_add(self, metadatas, conflict_update=False, cur=None):
+        """Add metadatas not present in storage.
+
+        Args:
+            metadatas (iterable): dictionaries with keys:
+
+                - id: sha1
+                - translated_metadata: bytes / jsonb ?
+
+            conflict_update: Flag to determine if we want to overwrite (true)
+                or skip duplicates (false, the default)
+
+        """
+        db = self.db
+        db.mktemp_content_metadata(cur)
+        # empty metadata is mapped to 'unknown'
+
+        db.copy_to(metadatas, 'tmp_content_metadata',
+                   ['id', 'translated_metadata', 'indexer_configuration_id'],
+                   cur)
+        db.content_metadata_add_from_temp(conflict_update, cur)
+
+    @db_transaction_generator
+    def revision_metadata_missing(self, metadatas, cur=None):
+        """List metadatas missing from storage.
+
+        Args:
+            metadatas (iterable): dictionaries with keys:
+
+               - id (bytes): sha1_git revision identifier
+               - tool_name (str): tool used to compute the results
+               - tool_version (str): associated tool's version
+
+        Returns:
+            iterable: missing ids
+
+        """
+        db = self.db
+        db.mktemp_revision_metadata_missing(cur)
+        db.copy_to(metadatas, 'tmp_revision_metadata_missing',
+                   ['id', 'indexer_configuration_id'], cur)
+        for obj in db.revision_metadata_missing_from_temp(cur):
+            yield obj[0]
+
+    @db_transaction_generator
+    def revision_metadata_get(self, ids, cur=None):
+        db = self.db
+        db.store_tmp_bytea(ids, cur)
+        for c in db.revision_metadata_get_from_temp():
+            yield converters.db_to_metadata(
+                dict(zip(db.revision_metadata_cols, c)))
+
+    @db_transaction
+    def revision_metadata_add(self, metadatas,
+                              conflict_update=False, cur=None):
+        """Add metadatas not present in storage.
+
+        Args:
+            metadatas (iterable): dictionaries with keys:
+
+                - id: sha1_git of revision
+                - translated_metadata: bytes / jsonb ?
+
+            conflict_update: Flag to determine if we want to overwrite (true)
+              or skip duplicates (false, the default)
+
+        """
+        db = self.db
+        db.mktemp_revision_metadata(cur)
+        # empty metadata is mapped to 'unknown'
+
+        db.copy_to(metadatas, 'tmp_revision_metadata',
+                   ['id', 'translated_metadata', 'indexer_configuration_id'],
+                   cur)
+        db.revision_metadata_add_from_temp(conflict_update, cur)
+
+    @db_transaction
+    def origin_metadata_add(self, origin_id, ts, provider, tool, metadata,
+                            cur=None):
+        """ Add an origin_metadata for the origin at ts with provenance and
+        metadata.
+
+        Args:
+            origin_id (int): the origin's id for which the metadata is added
+            ts (datetime): timestamp of the found metadata
+            provider (int): the provider of metadata (ex:'hal')
+            tool (int): tool used to extract metadata
+            metadata (jsonb): the metadata retrieved at the time and location
+
+        Returns:
+            id (int): the origin_metadata unique id
+        """
+        if isinstance(ts, str):
+            ts = dateutil.parser.parse(ts)
+
+        return self.db.origin_metadata_add(origin_id, ts, provider, tool,
+                                           metadata, cur)
+
+    @db_transaction_generator
+    def origin_metadata_get_by(self, origin_id, provider_type=None, cur=None):
+        """Retrieve list of all origin_metadata entries for the origin_id
+
+        Args:
+            origin_id (int): the unique origin identifier
+            provider_type (str): (optional) type of provider
+
+        Returns:
+            list of dicts: the origin_metadata dictionary with the keys:
+
+            - id (int): origin_metadata's id
+            - origin_id (int): origin's id
+            - discovery_date (datetime): timestamp of discovery
+            - tool_id (int): metadata's extracting tool
+            - metadata (jsonb)
+            - provider_id (int): metadata's provider
+            - provider_name (str)
+            - provider_type (str)
+            - provider_url (str)
+
+        """
+        db = self.db
+        for line in db.origin_metadata_get_by(origin_id, provider_type, cur):
+            yield dict(zip(db.origin_metadata_get_cols, line))
+
+    @db_transaction_generator
+    def indexer_configuration_add(self, tools, cur=None):
+        """Add new tools to the storage.
+
+        Args:
+            tools ([dict]): List of dictionary representing tool to
+            insert in the db. Dictionary with the following keys::
+
+                tool_name (str): tool's name
+                tool_version (str): tool's version
+                tool_configuration (dict): tool's configuration (free form
+                                           dict)
+
+        Returns:
+            List of dict inserted in the db (holding the id key as
+            well).  The order of the list is not guaranteed to match
+            the order of the initial list.
+
+        """
+        db = self.db
+        db.mktemp_indexer_configuration(cur)
+        db.copy_to(tools, 'tmp_indexer_configuration',
+                   ['tool_name', 'tool_version', 'tool_configuration'],
+                   cur)
+
+        tools = db.indexer_configuration_add_from_temp(cur)
+        for line in tools:
+            yield dict(zip(db.indexer_configuration_cols, line))
+
+    @db_transaction
+    def indexer_configuration_get(self, tool, cur=None):
+        """Retrieve tool information.
+
+        Args:
+            tool (dict): Dictionary representing a tool with the
+            following keys::
+
+                tool_name (str): tool's name
+                tool_version (str): tool's version
+                tool_configuration (dict): tool's configuration (free form
+                                           dict)
+
+        Returns:
+            The identifier of the tool if it exists, None otherwise.
+
+        """
+        db = self.db
+        tool_conf = tool['tool_configuration']
+        if isinstance(tool_conf, dict):
+            tool_conf = json.dumps(tool_conf)
+        idx = db.indexer_configuration_get(tool['tool_name'],
+                                           tool['tool_version'],
+                                           tool_conf)
+        if not idx:
+            return None
+        return dict(zip(self.db.indexer_configuration_cols, idx))
diff --git a/swh/indexer/storage/api/__init__.py b/swh/indexer/storage/api/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/indexer/storage/api/client.py b/swh/indexer/storage/api/client.py
new file mode 100644
index 0000000..e6a87a9
--- /dev/null
+++ b/swh/indexer/storage/api/client.py
@@ -0,0 +1,100 @@
+# Copyright (C) 2015-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+from swh.core.api import SWHRemoteAPI
+
+from swh.storage.exc import StorageAPIError
+
+
+class RemoteStorage(SWHRemoteAPI):
+    """Proxy to a remote storage API"""
+    def __init__(self, url):
+        super().__init__(api_exception=StorageAPIError, url=url)
+
+    def check_config(self, *, check_write):
+        return self.post('check_config', {'check_write': check_write})
+
+    def content_mimetype_add(self, mimetypes, conflict_update=False):
+        return self.post('content_mimetype/add', {
+            'mimetypes': mimetypes,
+            'conflict_update': conflict_update,
+        })
+
+    def content_mimetype_missing(self, mimetypes):
+        return self.post('content_mimetype/missing', {'mimetypes': mimetypes})
+
+    def content_mimetype_get(self, ids):
+        return self.post('content_mimetype', {'ids': ids})
+
+    def content_language_add(self, languages, conflict_update=False):
+        return self.post('content_language/add', {
+            'languages': languages,
+            'conflict_update': conflict_update,
+        })
+
+    def content_language_missing(self, languages):
+        return self.post('content_language/missing', {'languages': languages})
+
+    def content_language_get(self, ids):
+        return self.post('content_language', {'ids': ids})
+
+    def content_ctags_add(self, ctags, conflict_update=False):
+        return self.post('content/ctags/add', {
+            'ctags': ctags,
+            'conflict_update': conflict_update,
+        })
+
+    def content_ctags_missing(self, ctags):
+        return self.post('content/ctags/missing', {'ctags': ctags})
+
+    def content_ctags_get(self, ids):
+        return self.post('content/ctags', {'ids': ids})
+
+    def content_ctags_search(self, expression, limit=10, last_sha1=None):
+        return self.post('content/ctags/search', {
+            'expression': expression,
+            'limit': limit,
+            'last_sha1': last_sha1,
+        })
+
+    def content_fossology_license_add(self, licenses, conflict_update=False):
+        return self.post('content/fossology_license/add', {
+            'licenses': licenses,
+            'conflict_update': conflict_update,
+        })
+
+    def content_fossology_license_get(self, ids):
+        return self.post('content/fossology_license', {'ids': ids})
+
+    def content_metadata_add(self, metadatas, conflict_update=False):
+        return self.post('content_metadata/add', {
+            'metadatas': metadatas,
+            'conflict_update': conflict_update,
+        })
+
+    def content_metadata_missing(self, metadatas):
+        return self.post('content_metadata/missing', {'metadatas': metadatas})
+
+    def content_metadata_get(self, ids):
+        return self.post('content_metadata', {'ids': ids})
+
+    def revision_metadata_add(self, metadatas, conflict_update=False):
+        return self.post('revision_metadata/add', {
+            'metadatas': metadatas,
+            'conflict_update': conflict_update,
+        })
+
+    def revision_metadata_missing(self, metadatas):
+        return self.post('revision_metadata/missing', {'metadatas': metadatas})
+
+    def revision_metadata_get(self, ids):
+        return self.post('revision_metadata', {'ids': ids})
+
+    def indexer_configuration_add(self, tools):
+        return self.post('indexer_configuration/add', {'tools': tools})
+
+    def indexer_configuration_get(self, tool):
+        return self.post('indexer_configuration/data', {'tool': tool})
diff --git a/swh/indexer/storage/api/server.py b/swh/indexer/storage/api/server.py
new file mode 100644
index 0000000..5ebacf2
--- /dev/null
+++ b/swh/indexer/storage/api/server.py
@@ -0,0 +1,197 @@
+# Copyright (C) 2015-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import logging
+import click
+
+from flask import g, request
+
+from swh.core import config
+from swh.core.api import (SWHServerAPIApp, decode_request,
+                          error_handler,
+                          encode_data_server as encode_data)
+from swh.indexer import get_indexer_storage
+
+
+DEFAULT_CONFIG = {
+    'storage': ('dict', {
+        'cls': 'local',
+        'args': {
+            'db': 'dbname=softwareheritage-indexer-dev',
+        },
+    })
+}
+
+
+app = SWHServerAPIApp(__name__)
+
+
+@app.errorhandler(Exception)
+def my_error_handler(exception):
+    return error_handler(exception, encode_data)
+
+
+@app.before_request
+def before_request():
+    g.storage = get_indexer_storage(**app.config['storage'])
+
+
+@app.route('/')
+def index():
+    return 'SWH Indexer Storage API server'
+
+
+@app.route('/check_config', methods=['POST'])
+def check_config():
+    return encode_data(g.storage.check_config(**decode_request(request)))
+
+
+@app.route('/content_mimetype/add', methods=['POST'])
+def content_mimetype_add():
+    return encode_data(
+        g.storage.content_mimetype_add(**decode_request(request)))
+
+
+@app.route('/content_mimetype/missing', methods=['POST'])
+def content_mimetype_missing():
+    return encode_data(
+        g.storage.content_mimetype_missing(**decode_request(request)))
+
+
+@app.route('/content_mimetype', methods=['POST'])
+def content_mimetype_get():
+    return encode_data(
+        g.storage.content_mimetype_get(**decode_request(request)))
+
+
+@app.route('/content_language/add', methods=['POST'])
+def content_language_add():
+    return encode_data(
+        g.storage.content_language_add(**decode_request(request)))
+
+
+@app.route('/content_language/missing', methods=['POST'])
+def content_language_missing():
+    return encode_data(
+        g.storage.content_language_missing(**decode_request(request)))
+
+
+@app.route('/content_language', methods=['POST'])
+def content_language_get():
+    return encode_data(
+        g.storage.content_language_get(**decode_request(request)))
+
+
+@app.route('/content/ctags/add', methods=['POST'])
+def content_ctags_add():
+    return encode_data(
+        g.storage.content_ctags_add(**decode_request(request)))
+
+
+@app.route('/content/ctags/search', methods=['POST'])
+def content_ctags_search():
+    return encode_data(
+        g.storage.content_ctags_search(**decode_request(request)))
+
+
+@app.route('/content/ctags/missing', methods=['POST'])
+def content_ctags_missing():
+    return encode_data(
+        g.storage.content_ctags_missing(**decode_request(request)))
+
+
+@app.route('/content/ctags', methods=['POST'])
+def content_ctags_get():
+    return encode_data(
+        g.storage.content_ctags_get(**decode_request(request)))
+
+
+@app.route('/content/fossology_license/add', methods=['POST'])
+def content_fossology_license_add():
+    return encode_data(
+        g.storage.content_fossology_license_add(**decode_request(request)))
+
+
+@app.route('/content/fossology_license', methods=['POST'])
+def content_fossology_license_get():
+    return encode_data(
+        g.storage.content_fossology_license_get(**decode_request(request)))
+
+
+@app.route('/indexer_configuration/data', methods=['POST'])
+def indexer_configuration_get():
+    return encode_data(g.storage.indexer_configuration_get(
+        **decode_request(request)))
+
+
+@app.route('/indexer_configuration/add', methods=['POST'])
+def indexer_configuration_add():
+    return encode_data(g.storage.indexer_configuration_add(
+        **decode_request(request)))
+
+
+@app.route('/content_metadata/add', methods=['POST'])
+def content_metadata_add():
+    return encode_data(
+        g.storage.content_metadata_add(**decode_request(request)))
+
+
+@app.route('/content_metadata/missing', methods=['POST'])
+def content_metadata_missing():
+    return encode_data(
+        g.storage.content_metadata_missing(**decode_request(request)))
+
+
+@app.route('/content_metadata', methods=['POST'])
+def content_metadata_get():
+    return encode_data(
+        g.storage.content_metadata_get(**decode_request(request)))
+
+
+@app.route('/revision_metadata/add', methods=['POST'])
+def revision_metadata_add():
+    return encode_data(
+        g.storage.revision_metadata_add(**decode_request(request)))
+
+
+@app.route('/revision_metadata/missing', methods=['POST'])
+def revision_metadata_missing():
+    return encode_data(
+        g.storage.revision_metadata_missing(**decode_request(request)))
+
+
+@app.route('/revision_metadata', methods=['POST'])
+def revision_metadata_get():
+    return encode_data(
+        g.storage.revision_metadata_get(**decode_request(request)))
+
+
+def run_from_webserver(environ, start_response):
+    """Run the WSGI app from the webserver, loading the configuration."""
+
+    config_path = '/etc/softwareheritage/indexer/storage.yml'
+
+    app.config.update(config.read(config_path, DEFAULT_CONFIG))
+
+    handler = logging.StreamHandler()
+    app.logger.addHandler(handler)
+
+    return app(environ, start_response)
+
+
+@click.command()
+@click.argument('config-path', required=1)
+@click.option('--host', default='0.0.0.0', help="Host to run the server")
+@click.option('--port', default=5007, type=click.INT,
+              help="Binding port of the server")
+@click.option('--debug/--nodebug', default=True,
+              help="Indicates if the server should run in debug mode")
+def launch(config_path, host, port, debug):
+    app.config.update(config.read(config_path, DEFAULT_CONFIG))
+    app.run(host, port=int(port), debug=bool(debug))
+
+
+if __name__ == '__main__':
+    launch()
diff --git a/swh/indexer/storage/converters.py b/swh/indexer/storage/converters.py
new file mode 100644
index 0000000..db7a295
--- /dev/null
+++ b/swh/indexer/storage/converters.py
@@ -0,0 +1,140 @@
+# Copyright (C) 2015-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def ctags_to_db(ctags):
+    """Convert a ctags entry into a ready ctags entry.
+
+    Args:
+        ctags (dict): ctags entry with the following keys:
+
+            - id (bytes): content's identifier
+            - tool_id (int): tool id used to compute ctags
+            - ctags ([dict]): List of dictionary with the following keys:
+
+              - name (str): symbol's name
+              - kind (str): symbol's kind
+              - line (int): symbol's line in the content
+              - language (str): language
+
+    Returns:
+        list: list of ctags entries as dicts with the following keys:
+
+            - id (bytes): content's identifier
+            - name (str): symbol's name
+            - kind (str): symbol's kind
+            - language (str): language for that content
+            - tool_id (int): tool id used to compute ctags
+
+    """
+    id = ctags['id']
+    tool_id = ctags['indexer_configuration_id']
+    for ctag in ctags['ctags']:
+        yield {
+            'id': id,
+            'name': ctag['name'],
+            'kind': ctag['kind'],
+            'line': ctag['line'],
+            'lang': ctag['lang'],
+            'indexer_configuration_id': tool_id,
+        }
+
+
+def db_to_ctags(ctag):
+    """Convert a ctags entry into a ready ctags entry.
+
+    Args:
+        ctags (dict): ctags entry with the following keys:
+        - id (bytes): content's identifier
+        - ctags ([dict]): List of dictionary with the following keys:
+          - name (str): symbol's name
+          - kind (str): symbol's kind
+          - line (int): symbol's line in the content
+          - language (str): language
+
+    Returns:
+        List of ctags ready entry (dict with the following keys):
+        - id (bytes): content's identifier
+        - name (str): symbol's name
+        - kind (str): symbol's kind
+        - language (str): language for that content
+        - tool (dict): tool used to compute the ctags
+
+    """
+    return {
+        'id': ctag['id'],
+        'name': ctag['name'],
+        'kind': ctag['kind'],
+        'line': ctag['line'],
+        'lang': ctag['lang'],
+        'tool': {
+            'id': ctag['tool_id'],
+            'name': ctag['tool_name'],
+            'version': ctag['tool_version'],
+            'configuration': ctag['tool_configuration']
+        }
+    }
+
+
+def db_to_mimetype(mimetype):
+    """Convert a ctags entry into a ready ctags output.
+
+    """
+    return {
+        'id': mimetype['id'],
+        'encoding': mimetype['encoding'],
+        'mimetype': mimetype['mimetype'],
+        'tool': {
+            'id': mimetype['tool_id'],
+            'name': mimetype['tool_name'],
+            'version': mimetype['tool_version'],
+            'configuration': mimetype['tool_configuration']
+        }
+    }
+
+
+def db_to_language(language):
+    """Convert a language entry into a ready language output.
+
+    """
+    return {
+        'id': language['id'],
+        'lang': language['lang'],
+        'tool': {
+            'id': language['tool_id'],
+            'name': language['tool_name'],
+            'version': language['tool_version'],
+            'configuration': language['tool_configuration']
+        }
+    }
+
+
+def db_to_metadata(metadata):
+    """Convert a metadata entry into a ready metadata output.
+
+    """
+    return {
+        'id': metadata['id'],
+        'translated_metadata': metadata['translated_metadata'],
+        'tool': {
+            'id': metadata['tool_id'],
+            'name': metadata['tool_name'],
+            'version': metadata['tool_version'],
+            'configuration': metadata['tool_configuration']
+        }
+    }
+
+
+def db_to_fossology_license(license):
+    return {
+        'id': license['id'],
+        'licenses': license['licenses'],
+        'tool': {
+            'id': license['tool_id'],
+            'name': license['tool_name'],
+            'version': license['tool_version'],
+            'configuration': license['tool_configuration'],
+        }
+    }
diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py
new file mode 100644
index 0000000..b51402e
--- /dev/null
+++ b/swh/indexer/storage/db.py
@@ -0,0 +1,245 @@
+# Copyright (C) 2015-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.model import hashutil
+
+from swh.storage.db import BaseDb, stored_procedure, cursor_to_bytes
+from swh.storage.db import line_to_bytes
+
+
+class Db(BaseDb):
+    """Proxy to the SWH Indexer DB, with wrappers around stored procedures
+
+    """
+    @stored_procedure('swh_mktemp_bytea')
+    def mktemp_bytea(self, cur=None): pass
+
+    def store_tmp_bytea(self, ids, cur=None):
+        """Store the given identifiers in a new tmp_bytea table"""
+        cur = self._cursor(cur)
+
+        self.mktemp_bytea(cur)
+        self.copy_to(({'id': elem} for elem in ids), 'tmp_bytea',
+                     ['id'], cur)
+
+    content_mimetype_cols = [
+        'id', 'mimetype', 'encoding',
+        'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
+
+    @stored_procedure('swh_mktemp_content_mimetype_missing')
+    def mktemp_content_mimetype_missing(self, cur=None): pass
+
+    def content_mimetype_missing_from_temp(self, cur=None):
+        """List missing mimetypes.
+
+        """
+        cur = self._cursor(cur)
+        cur.execute("SELECT * FROM swh_content_mimetype_missing()")
+        yield from cursor_to_bytes(cur)
+
+    @stored_procedure('swh_mktemp_content_mimetype')
+    def mktemp_content_mimetype(self, cur=None): pass
+
+    def content_mimetype_add_from_temp(self, conflict_update, cur=None):
+        self._cursor(cur).execute("SELECT swh_content_mimetype_add(%s)",
+                                  (conflict_update, ))
+
+    def content_mimetype_get_from_temp(self, cur=None):
+        cur = self._cursor(cur)
+        query = "SELECT %s FROM swh_content_mimetype_get()" % (
+            ','.join(self.content_mimetype_cols))
+        cur.execute(query)
+        yield from cursor_to_bytes(cur)
+
+    content_language_cols = [
+        'id', 'lang',
+        'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
+
+    @stored_procedure('swh_mktemp_content_language')
+    def mktemp_content_language(self, cur=None): pass
+
+    @stored_procedure('swh_mktemp_content_language_missing')
+    def mktemp_content_language_missing(self, cur=None): pass
+
+    def content_language_missing_from_temp(self, cur=None):
+        """List missing languages.
+
+        """
+        cur = self._cursor(cur)
+        cur.execute("SELECT * FROM swh_content_language_missing()")
+        yield from cursor_to_bytes(cur)
+
+    def content_language_add_from_temp(self, conflict_update, cur=None):
+        self._cursor(cur).execute("SELECT swh_content_language_add(%s)",
+                                  (conflict_update, ))
+
+    def content_language_get_from_temp(self, cur=None):
+        cur = self._cursor(cur)
+        query = "SELECT %s FROM swh_content_language_get()" % (
+            ','.join(self.content_language_cols))
+        cur.execute(query)
+        yield from cursor_to_bytes(cur)
+
+    content_ctags_cols = [
+        'id', 'name', 'kind', 'line', 'lang',
+        'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
+
+    @stored_procedure('swh_mktemp_content_ctags')
+    def mktemp_content_ctags(self, cur=None): pass
+
+    @stored_procedure('swh_mktemp_content_ctags_missing')
+    def mktemp_content_ctags_missing(self, cur=None): pass
+
+    def content_ctags_missing_from_temp(self, cur=None):
+        """List missing ctags.
+
+        """
+        cur = self._cursor(cur)
+        cur.execute("SELECT * FROM swh_content_ctags_missing()")
+        yield from cursor_to_bytes(cur)
+
+    def content_ctags_add_from_temp(self, conflict_update, cur=None):
+        self._cursor(cur).execute("SELECT swh_content_ctags_add(%s)",
+                                  (conflict_update, ))
+
+    def content_ctags_get_from_temp(self, cur=None):
+        cur = self._cursor(cur)
+        query = "SELECT %s FROM swh_content_ctags_get()" % (
+            ','.join(self.content_ctags_cols))
+        cur.execute(query)
+        yield from cursor_to_bytes(cur)
+
+    def content_ctags_search(self, expression, last_sha1, limit, cur=None):
+        cur = self._cursor(cur)
+        if not last_sha1:
+            query = """SELECT %s
+                       FROM swh_content_ctags_search(%%s, %%s)""" % (
+                           ','.join(self.content_ctags_cols))
+            cur.execute(query, (expression, limit))
+        else:
+            if last_sha1 and isinstance(last_sha1, bytes):
+                last_sha1 = '\\x%s' % hashutil.hash_to_hex(last_sha1)
+            elif last_sha1:
+                last_sha1 = '\\x%s' % last_sha1
+
+            query = """SELECT %s
+                       FROM swh_content_ctags_search(%%s, %%s, %%s)""" % (
+                           ','.join(self.content_ctags_cols))
+            cur.execute(query, (expression, limit, last_sha1))
+
+        yield from cursor_to_bytes(cur)
+
+    content_fossology_license_cols = [
+        'id', 'tool_id', 'tool_name', 'tool_version', 'tool_configuration',
+        'licenses']
+
+    @stored_procedure('swh_mktemp_content_fossology_license')
+    def mktemp_content_fossology_license(self, cur=None): pass
+
+    def content_fossology_license_add_from_temp(self, conflict_update,
+                                                cur=None):
+        """Add new licenses per content.
+
+        """
+        self._cursor(cur).execute(
+            "SELECT swh_content_fossology_license_add(%s)",
+            (conflict_update, ))
+
+    def content_fossology_license_get_from_temp(self, cur=None):
+        """Retrieve licenses per content.
+
+        """
+        cur = self._cursor(cur)
+        query = "SELECT %s FROM swh_content_fossology_license_get()" % (
+            ','.join(self.content_fossology_license_cols))
+        cur.execute(query)
+        yield from cursor_to_bytes(cur)
+
+    content_metadata_cols = [
+        'id', 'translated_metadata',
+        'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
+
+    @stored_procedure('swh_mktemp_content_metadata')
+    def mktemp_content_metadata(self, cur=None): pass
+
+    @stored_procedure('swh_mktemp_content_metadata_missing')
+    def mktemp_content_metadata_missing(self, cur=None): pass
+
+    def content_metadata_missing_from_temp(self, cur=None):
+        """List missing metadatas.
+
+        """
+        cur = self._cursor(cur)
+        cur.execute("SELECT * FROM swh_content_metadata_missing()")
+        yield from cursor_to_bytes(cur)
+
+    def content_metadata_add_from_temp(self, conflict_update, cur=None):
+        self._cursor(cur).execute("SELECT swh_content_metadata_add(%s)",
+                                  (conflict_update, ))
+
+    def content_metadata_get_from_temp(self, cur=None):
+        cur = self._cursor(cur)
+        query = "SELECT %s FROM swh_content_metadata_get()" % (
+            ','.join(self.content_metadata_cols))
+        cur.execute(query)
+        yield from cursor_to_bytes(cur)
+
+    revision_metadata_cols = [
+        'id', 'translated_metadata',
+        'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
+
+    @stored_procedure('swh_mktemp_revision_metadata')
+    def mktemp_revision_metadata(self, cur=None): pass
+
+    @stored_procedure('swh_mktemp_revision_metadata_missing')
+    def mktemp_revision_metadata_missing(self, cur=None): pass
+
+    def revision_metadata_missing_from_temp(self, cur=None):
+        """List missing metadatas.
+
+        """
+        cur = self._cursor(cur)
+        cur.execute("SELECT * FROM swh_revision_metadata_missing()")
+        yield from cursor_to_bytes(cur)
+
+    def revision_metadata_add_from_temp(self, conflict_update, cur=None):
+        self._cursor(cur).execute("SELECT swh_revision_metadata_add(%s)",
+                                  (conflict_update, ))
+
+    def revision_metadata_get_from_temp(self, cur=None):
+        cur = self._cursor(cur)
+        query = "SELECT %s FROM swh_revision_metadata_get()" % (
+            ','.join(self.revision_metadata_cols))
+        cur.execute(query)
+        yield from cursor_to_bytes(cur)
+
+    indexer_configuration_cols = ['id', 'tool_name', 'tool_version',
+                                  'tool_configuration']
+
+    @stored_procedure('swh_mktemp_indexer_configuration')
+    def mktemp_indexer_configuration(self, cur=None):
+        pass
+
+    def indexer_configuration_add_from_temp(self, cur=None):
+        cur = self._cursor(cur)
+        cur.execute("SELECT %s from swh_indexer_configuration_add()" % (
+            ','.join(self.indexer_configuration_cols), ))
+        yield from cursor_to_bytes(cur)
+
+    def indexer_configuration_get(self, tool_name,
+                                  tool_version, tool_configuration, cur=None):
+        cur = self._cursor(cur)
+        cur.execute('''select %s
+                       from indexer_configuration
+                       where tool_name=%%s and
+                             tool_version=%%s and
+                             tool_configuration=%%s''' % (
+                                 ','.join(self.indexer_configuration_cols)),
+                    (tool_name, tool_version, tool_configuration))
+
+        data = cur.fetchone()
+        if not data:
+            return None
+        return line_to_bytes(data)
diff --git a/swh/indexer/tests/__init__.py b/swh/indexer/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/indexer/tests/storage/__init__.py b/swh/indexer/tests/storage/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/indexer/tests/storage/test_api_client.py b/swh/indexer/tests/storage/test_api_client.py
new file mode 100644
index 0000000..9e47975
--- /dev/null
+++ b/swh/indexer/tests/storage/test_api_client.py
@@ -0,0 +1,36 @@
+# Copyright (C) 2015-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+
+from .test_storage import CommonTestStorage
+from swh.storage.tests.server_testing import ServerTestFixture
+from swh.indexer.storage.api.client import RemoteStorage
+from swh.indexer.storage.api.server import app
+
+
+class TestRemoteStorage(CommonTestStorage, ServerTestFixture,
+                        unittest.TestCase):
+    """Test the indexer's remote storage API.
+
+    This class doesn't define any tests as we want identical
+    functionality between local and remote storage. All the tests are
+    therefore defined in
+    `class`:swh.indexer.storage.test_storage.CommonTestStorage.
+
+    """
+
+    def setUp(self):
+        self.config = {
+            'storage': {
+                'cls': 'local',
+                'args': {
+                    'db': 'dbname=%s' % self.TEST_STORAGE_DB_NAME,
+                }
+            }
+        }
+        self.app = app
+        super().setUp()
+        self.storage = RemoteStorage(self.url())
diff --git a/swh/indexer/tests/storage/test_converters.py b/swh/indexer/tests/storage/test_converters.py
new file mode 100644
index 0000000..89946d4
--- /dev/null
+++ b/swh/indexer/tests/storage/test_converters.py
@@ -0,0 +1,199 @@
+# Copyright (C) 2015-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+
+from nose.tools import istest
+from nose.plugins.attrib import attr
+
+from swh.indexer.storage import converters
+
+
+@attr('!db')
+class TestConverters(unittest.TestCase):
+    def setUp(self):
+        self.maxDiff = None
+
+    @istest
+    def ctags_to_db(self):
+        input_ctag = {
+            'id': b'some-id',
+            'indexer_configuration_id': 100,
+            'ctags': [
+                {
+                    'name': 'some-name',
+                    'kind': 'some-kind',
+                    'line': 10,
+                    'lang': 'Yaml',
+                }, {
+                    'name': 'main',
+                    'kind': 'function',
+                    'line': 12,
+                    'lang': 'Yaml',
+                },
+            ]
+        }
+
+        expected_ctags = [
+            {
+                'id': b'some-id',
+                'name': 'some-name',
+                'kind': 'some-kind',
+                'line': 10,
+                'lang': 'Yaml',
+                'indexer_configuration_id': 100,
+            }, {
+                'id': b'some-id',
+                'name': 'main',
+                'kind': 'function',
+                'line': 12,
+                'lang': 'Yaml',
+                'indexer_configuration_id': 100,
+            }]
+
+        # when
+        actual_ctags = list(converters.ctags_to_db(input_ctag))
+
+        # then
+        self.assertEquals(actual_ctags, expected_ctags)
+
+    @istest
+    def db_to_ctags(self):
+        input_ctags = {
+            'id': b'some-id',
+            'name': 'some-name',
+            'kind': 'some-kind',
+            'line': 10,
+            'lang': 'Yaml',
+            'tool_id': 200,
+            'tool_name': 'some-toolname',
+            'tool_version': 'some-toolversion',
+            'tool_configuration': {}
+        }
+        expected_ctags = {
+            'id': b'some-id',
+            'name': 'some-name',
+            'kind': 'some-kind',
+            'line': 10,
+            'lang': 'Yaml',
+            'tool': {
+                'id': 200,
+                'name': 'some-toolname',
+                'version': 'some-toolversion',
+                'configuration': {},
+            }
+        }
+
+        # when
+        actual_ctags = converters.db_to_ctags(input_ctags)
+
+        # then
+        self.assertEquals(actual_ctags, expected_ctags)
+
+    @istest
+    def db_to_mimetype(self):
+        input_mimetype = {
+            'id': b'some-id',
+            'tool_id': 10,
+            'tool_name': 'some-toolname',
+            'tool_version': 'some-toolversion',
+            'tool_configuration': {},
+            'encoding': b'ascii',
+            'mimetype': b'text/plain',
+        }
+
+        expected_mimetype = {
+            'id': b'some-id',
+            'encoding': b'ascii',
+            'mimetype': b'text/plain',
+            'tool': {
+                'id': 10,
+                'name': 'some-toolname',
+                'version': 'some-toolversion',
+                'configuration': {},
+            }
+        }
+
+        actual_mimetype = converters.db_to_mimetype(input_mimetype)
+
+        self.assertEquals(actual_mimetype, expected_mimetype)
+
+    @istest
+    def db_to_language(self):
+        input_language = {
+            'id': b'some-id',
+            'tool_id': 20,
+            'tool_name': 'some-toolname',
+            'tool_version': 'some-toolversion',
+            'tool_configuration': {},
+            'lang': b'css',
+        }
+
+        expected_language = {
+            'id': b'some-id',
+            'lang': b'css',
+            'tool': {
+                'id': 20,
+                'name': 'some-toolname',
+                'version': 'some-toolversion',
+                'configuration': {},
+            }
+        }
+
+        actual_language = converters.db_to_language(input_language)
+
+        self.assertEquals(actual_language, expected_language)
+
+    @istest
+    def db_to_fossology_license(self):
+        input_license = {
+            'id': b'some-id',
+            'tool_id': 20,
+            'tool_name': 'nomossa',
+            'tool_version': '5.22',
+            'tool_configuration': {},
+            'licenses': ['GPL2.0'],
+        }
+
+        expected_license = {
+            'id': b'some-id',
+            'licenses': ['GPL2.0'],
+            'tool': {
+                'id': 20,
+                'name': 'nomossa',
+                'version': '5.22',
+                'configuration': {},
+            }
+        }
+
+        actual_license = converters.db_to_fossology_license(input_license)
+
+        self.assertEquals(actual_license, expected_license)
+
+    @istest
+    def db_to_metadata(self):
+        input_metadata = {
+            'id': b'some-id',
+            'tool_id': 20,
+            'tool_name': 'some-toolname',
+            'tool_version': 'some-toolversion',
+            'tool_configuration': {},
+            'translated_metadata': b'translated_metadata',
+        }
+
+        expected_metadata = {
+            'id': b'some-id',
+            'translated_metadata': b'translated_metadata',
+            'tool': {
+                'id': 20,
+                'name': 'some-toolname',
+                'version': 'some-toolversion',
+                'configuration': {},
+            }
+        }
+
+        actual_metadata = converters.db_to_metadata(input_metadata)
+
+        self.assertEquals(actual_metadata, expected_metadata)
diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py
new file mode 100644
index 0000000..65b77c8
--- /dev/null
+++ b/swh/indexer/tests/storage/test_storage.py
@@ -0,0 +1,1505 @@
+# Copyright (C) 2015-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pathlib
+import unittest
+
+from nose.tools import istest
+from nose.plugins.attrib import attr
+from swh.model.hashutil import hash_to_bytes
+
+from swh.indexer import get_indexer_storage
+from swh.core.tests.db_testing import DbTestFixture
+
+
+PATH_TO_STORAGE_TEST_DATA = '../../../../../swh-storage-testdata'
+
+
+class StorageTestFixture:
+    """Mix this in a test subject class to get Storage testing support.
+
+    This fixture requires to come before DbTestFixture in the inheritance list
+    as it uses its methods to setup its own internal database.
+
+    Usage example:
+
+        class TestStorage(StorageTestFixture, DbTestFixture):
+            ...
+    """
+    TEST_STORAGE_DB_NAME = 'softwareheritage-test-indexer'
+
+    @classmethod
+    def setUpClass(cls):
+        if not hasattr(cls, 'DB_TEST_FIXTURE_IMPORTED'):
+            raise RuntimeError("StorageTestFixture needs to be followed by "
+                               "DbTestFixture in the inheritance list.")
+
+        test_dir = pathlib.Path(__file__).absolute().parent
+        test_data_dir = test_dir / PATH_TO_STORAGE_TEST_DATA
+        test_db_dump = (test_data_dir / 'dumps/swh-indexer.dump').absolute()
+        cls.add_db(cls.TEST_STORAGE_DB_NAME, str(test_db_dump), 'pg_dump')
+        super().setUpClass()
+
+    def setUp(self):
+        super().setUp()
+
+        self.storage_config = {
+            'cls': 'local',
+            'args': {
+                'db': self.test_db[self.TEST_STORAGE_DB_NAME].conn,
+            },
+        }
+        self.storage = get_indexer_storage(**self.storage_config)
+
+    def tearDown(self):
+        super().tearDown()
+
+    def reset_storage_tables(self):
+        excluded = {'indexer_configuration'}
+        self.reset_db_tables(self.TEST_STORAGE_DB_NAME, excluded=excluded)
+
+        db = self.test_db[self.TEST_STORAGE_DB_NAME]
+        db.conn.commit()
+
+
+@attr('db')
+class BaseTestStorage(StorageTestFixture, DbTestFixture):
+    def setUp(self):
+        super().setUp()
+
+        db = self.test_db[self.TEST_STORAGE_DB_NAME]
+        self.conn = db.conn
+        self.cursor = db.cursor
+
+        self.sha1_1 = hash_to_bytes('34973274ccef6ab4dfaaf86599792fa9c3fe4689')
+        self.sha1_2 = hash_to_bytes('61c2b3a30496d329e21af70dd2d7e097046d07b7')
+        self.revision_id_1 = hash_to_bytes(
+            '7026b7c1a2af56521e951c01ed20f255fa054238')
+        self.revision_id_2 = hash_to_bytes(
+            '7026b7c1a2af56521e9587659012345678904321')
+
+    def tearDown(self):
+        self.reset_storage_tables()
+        super().tearDown()
+
+    def fetch_tools(self):
+        tools = {}
+        self.cursor.execute('''
+            select tool_name, id, tool_version, tool_configuration
+            from indexer_configuration
+            order by id''')
+        for row in self.cursor.fetchall():
+            key = row[0]
+            while key in tools:
+                key = '_' + key
+            tools[key] = {
+                'id': row[1],
+                'name': row[0],
+                'version': row[2],
+                'configuration': row[3]
+            }
+
+        return tools
+
+
+@attr('db')
+class CommonTestStorage(BaseTestStorage):
+    """Base class for Indexer Storage testing.
+
+    """
+
+    @istest
+    def check_config(self):
+        self.assertTrue(self.storage.check_config(check_write=True))
+        self.assertTrue(self.storage.check_config(check_write=False))
+
+    @istest
+    def content_mimetype_missing(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['file']['id']
+
+        mimetypes = [
+            {
+                'id': self.sha1_1,
+                'indexer_configuration_id': tool_id,
+            },
+            {
+                'id': self.sha1_2,
+                'indexer_configuration_id': tool_id,
+            }]
+
+        # when
+        actual_missing = self.storage.content_mimetype_missing(mimetypes)
+
+        # then
+        self.assertEqual(list(actual_missing), [
+            self.sha1_1,
+            self.sha1_2,
+        ])
+
+        # given
+        self.storage.content_mimetype_add([{
+            'id': self.sha1_2,
+            'mimetype': b'text/plain',
+            'encoding': b'utf-8',
+            'indexer_configuration_id': tool_id,
+        }])
+
+        # when
+        actual_missing = self.storage.content_mimetype_missing(mimetypes)
+
+        # then
+        self.assertEqual(list(actual_missing), [self.sha1_1])
+
+    @istest
+    def content_mimetype_add__drop_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['file']['id']
+
+        mimetype_v1 = {
+            'id': self.sha1_2,
+            'mimetype': b'text/plain',
+            'encoding': b'utf-8',
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.content_mimetype_add([mimetype_v1])
+
+        # when
+        actual_mimetypes = list(self.storage.content_mimetype_get(
+            [self.sha1_2]))
+
+        # then
+        expected_mimetypes_v1 = [{
+            'id': self.sha1_2,
+            'mimetype': b'text/plain',
+            'encoding': b'utf-8',
+            'tool': tools['file'],
+        }]
+        self.assertEqual(actual_mimetypes, expected_mimetypes_v1)
+
+        # given
+        mimetype_v2 = mimetype_v1.copy()
+        mimetype_v2.update({
+            'mimetype': b'text/html',
+            'encoding': b'us-ascii',
+        })
+
+        self.storage.content_mimetype_add([mimetype_v2])
+
+        actual_mimetypes = list(self.storage.content_mimetype_get(
+            [self.sha1_2]))
+
+        # mimetype did not change as the v2 was dropped.
+        self.assertEqual(actual_mimetypes, expected_mimetypes_v1)
+
+    @istest
+    def content_mimetype_add__update_in_place_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['file']['id']
+
+        mimetype_v1 = {
+            'id': self.sha1_2,
+            'mimetype': b'text/plain',
+            'encoding': b'utf-8',
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.content_mimetype_add([mimetype_v1])
+
+        # when
+        actual_mimetypes = list(self.storage.content_mimetype_get(
+            [self.sha1_2]))
+
+        expected_mimetypes_v1 = [{
+            'id': self.sha1_2,
+            'mimetype': b'text/plain',
+            'encoding': b'utf-8',
+            'tool': tools['file'],
+        }]
+
+        # then
+        self.assertEqual(actual_mimetypes, expected_mimetypes_v1)
+
+        # given
+        mimetype_v2 = mimetype_v1.copy()
+        mimetype_v2.update({
+            'mimetype': b'text/html',
+            'encoding': b'us-ascii',
+        })
+
+        self.storage.content_mimetype_add([mimetype_v2], conflict_update=True)
+
+        actual_mimetypes = list(self.storage.content_mimetype_get(
+            [self.sha1_2]))
+
+        expected_mimetypes_v2 = [{
+            'id': self.sha1_2,
+            'mimetype': b'text/html',
+            'encoding': b'us-ascii',
+            'tool': {
+                'id': 2,
+                'name': 'file',
+                'version': '5.22',
+                'configuration': {'command_line': 'file --mime <filepath>'}
+            }
+        }]
+
+        # mimetype did change as the v2 was used to overwrite v1
+        self.assertEqual(actual_mimetypes, expected_mimetypes_v2)
+
+    @istest
+    def content_mimetype_get(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['file']['id']
+
+        mimetypes = [self.sha1_2, self.sha1_1]
+
+        mimetype1 = {
+            'id': self.sha1_2,
+            'mimetype': b'text/plain',
+            'encoding': b'utf-8',
+            'indexer_configuration_id': tool_id,
+        }
+
+        # when
+        self.storage.content_mimetype_add([mimetype1])
+
+        # then
+        actual_mimetypes = list(self.storage.content_mimetype_get(mimetypes))
+
+        # then
+        expected_mimetypes = [{
+            'id': self.sha1_2,
+            'mimetype': b'text/plain',
+            'encoding': b'utf-8',
+            'tool': tools['file']
+        }]
+
+        self.assertEqual(actual_mimetypes, expected_mimetypes)
+
+    @istest
+    def content_language_missing(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['pygments']['id']
+
+        languages = [
+            {
+                'id': self.sha1_2,
+                'indexer_configuration_id': tool_id,
+            },
+            {
+                'id': self.sha1_1,
+                'indexer_configuration_id': tool_id,
+            }
+        ]
+
+        # when
+        actual_missing = list(self.storage.content_language_missing(languages))
+
+        # then
+        self.assertEqual(list(actual_missing), [
+            self.sha1_2,
+            self.sha1_1,
+        ])
+
+        # given
+        self.storage.content_language_add([{
+            'id': self.sha1_2,
+            'lang': 'haskell',
+            'indexer_configuration_id': tool_id,
+        }])
+
+        # when
+        actual_missing = list(self.storage.content_language_missing(languages))
+
+        # then
+        self.assertEqual(actual_missing, [self.sha1_1])
+
+    @istest
+    def content_language_get(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['pygments']['id']
+
+        language1 = {
+            'id': self.sha1_2,
+            'lang': 'common-lisp',
+            'indexer_configuration_id': tool_id,
+        }
+
+        # when
+        self.storage.content_language_add([language1])
+
+        # then
+        actual_languages = list(self.storage.content_language_get(
+            [self.sha1_2, self.sha1_1]))
+
+        # then
+        expected_languages = [{
+            'id': self.sha1_2,
+            'lang': 'common-lisp',
+            'tool': tools['pygments']
+        }]
+
+        self.assertEqual(actual_languages, expected_languages)
+
+    @istest
+    def content_language_add__drop_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['pygments']['id']
+
+        language_v1 = {
+            'id': self.sha1_2,
+            'lang': 'emacslisp',
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.content_language_add([language_v1])
+
+        # when
+        actual_languages = list(self.storage.content_language_get(
+            [self.sha1_2]))
+
+        # then
+        expected_languages_v1 = [{
+            'id': self.sha1_2,
+            'lang': 'emacslisp',
+            'tool': tools['pygments']
+        }]
+        self.assertEqual(actual_languages, expected_languages_v1)
+
+        # given
+        language_v2 = language_v1.copy()
+        language_v2.update({
+            'lang': 'common-lisp',
+        })
+
+        self.storage.content_language_add([language_v2])
+
+        actual_languages = list(self.storage.content_language_get(
+            [self.sha1_2]))
+
+        # language did not change as the v2 was dropped.
+        self.assertEqual(actual_languages, expected_languages_v1)
+
+    @istest
+    def content_language_add__update_in_place_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['pygments']['id']
+
+        language_v1 = {
+            'id': self.sha1_2,
+            'lang': 'common-lisp',
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.content_language_add([language_v1])
+
+        # when
+        actual_languages = list(self.storage.content_language_get(
+            [self.sha1_2]))
+
+        # then
+        expected_languages_v1 = [{
+            'id': self.sha1_2,
+            'lang': 'common-lisp',
+            'tool': tools['pygments']
+        }]
+        self.assertEqual(actual_languages, expected_languages_v1)
+
+        # given
+        language_v2 = language_v1.copy()
+        language_v2.update({
+            'lang': 'emacslisp',
+        })
+
+        self.storage.content_language_add([language_v2], conflict_update=True)
+
+        actual_languages = list(self.storage.content_language_get(
+            [self.sha1_2]))
+
+        # language did not change as the v2 was dropped.
+        expected_languages_v2 = [{
+            'id': self.sha1_2,
+            'lang': 'emacslisp',
+            'tool': tools['pygments']
+        }]
+
+        # language did change as the v2 was used to overwrite v1
+        self.assertEqual(actual_languages, expected_languages_v2)
+
+    @istest
+    def content_ctags_missing(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['universal-ctags']['id']
+
+        ctags = [
+            {
+                'id': self.sha1_2,
+                'indexer_configuration_id': tool_id,
+            },
+            {
+                'id': self.sha1_1,
+                'indexer_configuration_id': tool_id,
+            }
+        ]
+
+        # when
+        actual_missing = self.storage.content_ctags_missing(ctags)
+
+        # then
+        self.assertEqual(list(actual_missing), [
+            self.sha1_2,
+            self.sha1_1
+        ])
+
+        # given
+        self.storage.content_ctags_add([
+            {
+                'id': self.sha1_2,
+                'indexer_configuration_id': tool_id,
+                'ctags': [{
+                    'name': 'done',
+                    'kind': 'variable',
+                    'line': 119,
+                    'lang': 'OCaml',
+                }]
+            },
+        ])
+
+        # when
+        actual_missing = self.storage.content_ctags_missing(ctags)
+
+        # then
+        self.assertEqual(list(actual_missing), [self.sha1_1])
+
+    @istest
+    def content_ctags_get(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['universal-ctags']['id']
+
+        ctags = [self.sha1_2, self.sha1_1]
+
+        ctag1 = {
+            'id': self.sha1_2,
+            'indexer_configuration_id': tool_id,
+            'ctags': [
+                {
+                    'name': 'done',
+                    'kind': 'variable',
+                    'line': 100,
+                    'lang': 'Python',
+                },
+                {
+                    'name': 'main',
+                    'kind': 'function',
+                    'line': 119,
+                    'lang': 'Python',
+                }]
+        }
+
+        # when
+        self.storage.content_ctags_add([ctag1])
+
+        # then
+        actual_ctags = list(self.storage.content_ctags_get(ctags))
+
+        # then
+
+        expected_ctags = [
+            {
+                'id': self.sha1_2,
+                'tool': tools['universal-ctags'],
+                'name': 'done',
+                'kind': 'variable',
+                'line': 100,
+                'lang': 'Python',
+            },
+            {
+                'id': self.sha1_2,
+                'tool': tools['universal-ctags'],
+                'name': 'main',
+                'kind': 'function',
+                'line': 119,
+                'lang': 'Python',
+            }
+        ]
+
+        self.assertEqual(actual_ctags, expected_ctags)
+
+    @istest
+    def content_ctags_search(self):
+        # 1. given
+        tools = self.fetch_tools()
+        tool = tools['universal-ctags']
+        tool_id = tool['id']
+
+        ctag1 = {
+            'id': self.sha1_1,
+            'indexer_configuration_id': tool_id,
+            'ctags': [
+                {
+                    'name': 'hello',
+                    'kind': 'function',
+                    'line': 133,
+                    'lang': 'Python',
+                },
+                {
+                    'name': 'counter',
+                    'kind': 'variable',
+                    'line': 119,
+                    'lang': 'Python',
+                },
+            ]
+        }
+
+        ctag2 = {
+            'id': self.sha1_2,
+            'indexer_configuration_id': tool_id,
+            'ctags': [
+                {
+                    'name': 'hello',
+                    'kind': 'variable',
+                    'line': 100,
+                    'lang': 'C',
+                },
+            ]
+        }
+
+        self.storage.content_ctags_add([ctag1, ctag2])
+
+        # 1. when
+        actual_ctags = list(self.storage.content_ctags_search('hello',
+                                                              limit=1))
+
+        # 1. then
+        self.assertEqual(actual_ctags, [
+            {
+                'id': ctag1['id'],
+                'tool': tool,
+                'name': 'hello',
+                'kind': 'function',
+                'line': 133,
+                'lang': 'Python',
+            }
+        ])
+
+        # 2. when
+        actual_ctags = list(self.storage.content_ctags_search(
+            'hello',
+            limit=1,
+            last_sha1=ctag1['id']))
+
+        # 2. then
+        self.assertEqual(actual_ctags, [
+            {
+                'id': ctag2['id'],
+                'tool': tool,
+                'name': 'hello',
+                'kind': 'variable',
+                'line': 100,
+                'lang': 'C',
+            }
+        ])
+
+        # 3. when
+        actual_ctags = list(self.storage.content_ctags_search('hello'))
+
+        # 3. then
+        self.assertEqual(actual_ctags, [
+            {
+                'id': ctag1['id'],
+                'tool': tool,
+                'name': 'hello',
+                'kind': 'function',
+                'line': 133,
+                'lang': 'Python',
+            },
+            {
+                'id': ctag2['id'],
+                'tool': tool,
+                'name': 'hello',
+                'kind': 'variable',
+                'line': 100,
+                'lang': 'C',
+            },
+        ])
+
+        # 4. when
+        actual_ctags = list(self.storage.content_ctags_search('counter'))
+
+        # then
+        self.assertEqual(actual_ctags, [{
+            'id': ctag1['id'],
+            'tool': tool,
+            'name': 'counter',
+            'kind': 'variable',
+            'line': 119,
+            'lang': 'Python',
+        }])
+
+    @istest
+    def content_ctags_search_no_result(self):
+        actual_ctags = list(self.storage.content_ctags_search('counter'))
+
+        self.assertEquals(actual_ctags, [])
+
+    @istest
+    def content_ctags_add__add_new_ctags_added(self):
+        # given
+        tools = self.fetch_tools()
+        tool = tools['universal-ctags']
+        tool_id = tool['id']
+
+        ctag_v1 = {
+            'id': self.sha1_2,
+            'indexer_configuration_id': tool_id,
+            'ctags': [{
+                'name': 'done',
+                'kind': 'variable',
+                'line': 100,
+                'lang': 'Scheme',
+            }]
+        }
+
+        # given
+        self.storage.content_ctags_add([ctag_v1])
+        self.storage.content_ctags_add([ctag_v1])  # conflict does nothing
+
+        # when
+        actual_ctags = list(self.storage.content_ctags_get(
+            [self.sha1_2]))
+
+        # then
+        expected_ctags = [{
+            'id': self.sha1_2,
+            'name': 'done',
+            'kind': 'variable',
+            'line': 100,
+            'lang': 'Scheme',
+            'tool': tool,
+        }]
+
+        self.assertEqual(actual_ctags, expected_ctags)
+
+        # given
+        ctag_v2 = ctag_v1.copy()
+        ctag_v2.update({
+            'ctags': [
+                {
+                    'name': 'defn',
+                    'kind': 'function',
+                    'line': 120,
+                    'lang': 'Scheme',
+                }
+            ]
+        })
+
+        self.storage.content_ctags_add([ctag_v2])
+
+        expected_ctags = [
+            {
+                'id': self.sha1_2,
+                'name': 'done',
+                'kind': 'variable',
+                'line': 100,
+                'lang': 'Scheme',
+                'tool': tool,
+            }, {
+                'id': self.sha1_2,
+                'name': 'defn',
+                'kind': 'function',
+                'line': 120,
+                'lang': 'Scheme',
+                'tool': tool,
+            }
+        ]
+
+        actual_ctags = list(self.storage.content_ctags_get(
+            [self.sha1_2]))
+
+        self.assertEqual(actual_ctags, expected_ctags)
+
+    @istest
+    def content_ctags_add__update_in_place(self):
+        # given
+        tools = self.fetch_tools()
+        tool = tools['universal-ctags']
+        tool_id = tool['id']
+
+        ctag_v1 = {
+            'id': self.sha1_2,
+            'indexer_configuration_id': tool_id,
+            'ctags': [{
+                'name': 'done',
+                'kind': 'variable',
+                'line': 100,
+                'lang': 'Scheme',
+            }]
+        }
+
+        # given
+        self.storage.content_ctags_add([ctag_v1])
+
+        # when
+        actual_ctags = list(self.storage.content_ctags_get(
+            [self.sha1_2]))
+
+        # then
+        expected_ctags = [
+            {
+                'id': self.sha1_2,
+                'name': 'done',
+                'kind': 'variable',
+                'line': 100,
+                'lang': 'Scheme',
+                'tool': tool
+            }
+        ]
+        self.assertEqual(actual_ctags, expected_ctags)
+
+        # given
+        ctag_v2 = ctag_v1.copy()
+        ctag_v2.update({
+            'ctags': [
+                {
+                    'name': 'done',
+                    'kind': 'variable',
+                    'line': 100,
+                    'lang': 'Scheme',
+                },
+                {
+                    'name': 'defn',
+                    'kind': 'function',
+                    'line': 120,
+                    'lang': 'Scheme',
+                }
+            ]
+        })
+
+        self.storage.content_ctags_add([ctag_v2], conflict_update=True)
+
+        actual_ctags = list(self.storage.content_ctags_get(
+            [self.sha1_2]))
+
+        # ctag did change as the v2 was used to overwrite v1
+        expected_ctags = [
+            {
+                'id': self.sha1_2,
+                'name': 'done',
+                'kind': 'variable',
+                'line': 100,
+                'lang': 'Scheme',
+                'tool': tool,
+            },
+            {
+                'id': self.sha1_2,
+                'name': 'defn',
+                'kind': 'function',
+                'line': 120,
+                'lang': 'Scheme',
+                'tool': tool,
+            }
+        ]
+        self.assertEqual(actual_ctags, expected_ctags)
+
+    @istest
+    def content_fossology_license_get(self):
+        # given
+        tools = self.fetch_tools()
+        tool = tools['nomos']
+        tool_id = tool['id']
+
+        license1 = {
+            'id': self.sha1_1,
+            'licenses': ['GPL-2.0+'],
+            'indexer_configuration_id': tool_id,
+        }
+
+        # when
+        self.storage.content_fossology_license_add([license1])
+
+        # then
+        actual_licenses = list(self.storage.content_fossology_license_get(
+            [self.sha1_2, self.sha1_1]))
+
+        expected_license = {
+            'id': self.sha1_1,
+            'licenses': ['GPL-2.0+'],
+            'tool': tool,
+        }
+
+        # then
+        self.assertEqual(actual_licenses, [expected_license])
+
+    @istest
+    def content_fossology_license_add__new_license_added(self):
+        # given
+        tools = self.fetch_tools()
+        tool = tools['nomos']
+        tool_id = tool['id']
+
+        license_v1 = {
+            'id': self.sha1_1,
+            'licenses': ['Apache-2.0'],
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.content_fossology_license_add([license_v1])
+        # conflict does nothing
+        self.storage.content_fossology_license_add([license_v1])
+
+        # when
+        actual_licenses = list(self.storage.content_fossology_license_get(
+            [self.sha1_1]))
+
+        # then
+        expected_license = {
+            'id': self.sha1_1,
+            'licenses': ['Apache-2.0'],
+            'tool': tool,
+        }
+        self.assertEqual(actual_licenses, [expected_license])
+
+        # given
+        license_v2 = license_v1.copy()
+        license_v2.update({
+            'licenses': ['BSD-2-Clause'],
+        })
+
+        self.storage.content_fossology_license_add([license_v2])
+
+        actual_licenses = list(self.storage.content_fossology_license_get(
+            [self.sha1_1]))
+
+        expected_license.update({
+            'licenses': ['Apache-2.0', 'BSD-2-Clause'],
+        })
+
+        # license did not change as the v2 was dropped.
+        self.assertEqual(actual_licenses, [expected_license])
+
+    @istest
+    def content_fossology_license_add__update_in_place_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool = tools['nomos']
+        tool_id = tool['id']
+
+        license_v1 = {
+            'id': self.sha1_1,
+            'licenses': ['CECILL'],
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.content_fossology_license_add([license_v1])
+        # conflict does nothing
+        self.storage.content_fossology_license_add([license_v1])
+
+        # when
+        actual_licenses = list(self.storage.content_fossology_license_get(
+            [self.sha1_1]))
+
+        # then
+        expected_license = {
+            'id': self.sha1_1,
+            'licenses': ['CECILL'],
+            'tool': tool,
+        }
+        self.assertEqual(actual_licenses, [expected_license])
+
+        # given
+        license_v2 = license_v1.copy()
+        license_v2.update({
+            'licenses': ['CECILL-2.0']
+        })
+
+        self.storage.content_fossology_license_add([license_v2],
+                                                   conflict_update=True)
+
+        actual_licenses = list(self.storage.content_fossology_license_get(
+            [self.sha1_1]))
+
+        # license did change as the v2 was used to overwrite v1
+        expected_license.update({
+            'licenses': ['CECILL-2.0']
+        })
+        self.assertEqual(actual_licenses, [expected_license])
+
+    @istest
+    def content_metadata_missing(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['swh-metadata-translator']['id']
+
+        metadatas = [
+            {
+                'id': self.sha1_2,
+                'indexer_configuration_id': tool_id,
+            },
+            {
+                'id': self.sha1_1,
+                'indexer_configuration_id': tool_id,
+            }
+        ]
+
+        # when
+        actual_missing = list(self.storage.content_metadata_missing(metadatas))
+
+        # then
+        self.assertEqual(list(actual_missing), [
+            self.sha1_2,
+            self.sha1_1,
+        ])
+
+        # given
+        self.storage.content_metadata_add([{
+            'id': self.sha1_2,
+            'translated_metadata': {
+                'other': {},
+                'codeRepository': {
+                    'type': 'git',
+                    'url': 'https://github.com/moranegg/metadata_test'
+                },
+                'description': 'Simple package.json test for indexer',
+                'name': 'test_metadata',
+                'version': '0.0.1'
+            },
+            'indexer_configuration_id': tool_id
+        }])
+
+        # when
+        actual_missing = list(self.storage.content_metadata_missing(metadatas))
+
+        # then
+        self.assertEqual(actual_missing, [self.sha1_1])
+
+    @istest
+    def content_metadata_get(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['swh-metadata-translator']['id']
+
+        metadata1 = {
+            'id': self.sha1_2,
+            'translated_metadata': {
+                'other': {},
+                'codeRepository': {
+                    'type': 'git',
+                    'url': 'https://github.com/moranegg/metadata_test'
+                },
+                'description': 'Simple package.json test for indexer',
+                'name': 'test_metadata',
+                'version': '0.0.1'
+            },
+            'indexer_configuration_id': tool_id,
+        }
+
+        # when
+        self.storage.content_metadata_add([metadata1])
+
+        # then
+        actual_metadatas = list(self.storage.content_metadata_get(
+            [self.sha1_2, self.sha1_1]))
+
+        expected_metadatas = [{
+            'id': self.sha1_2,
+            'translated_metadata': {
+                'other': {},
+                'codeRepository': {
+                    'type': 'git',
+                    'url': 'https://github.com/moranegg/metadata_test'
+                },
+                'description': 'Simple package.json test for indexer',
+                'name': 'test_metadata',
+                'version': '0.0.1'
+            },
+            'tool': tools['swh-metadata-translator']
+        }]
+
+        self.assertEqual(actual_metadatas, expected_metadatas)
+
+    @istest
+    def content_metadata_add_drop_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['swh-metadata-translator']['id']
+
+        metadata_v1 = {
+            'id': self.sha1_2,
+            'translated_metadata': {
+                'other': {},
+                'name': 'test_metadata',
+                'version': '0.0.1'
+            },
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.content_metadata_add([metadata_v1])
+
+        # when
+        actual_metadatas = list(self.storage.content_metadata_get(
+            [self.sha1_2]))
+
+        expected_metadatas_v1 = [{
+            'id': self.sha1_2,
+            'translated_metadata': {
+                'other': {},
+                'name': 'test_metadata',
+                'version': '0.0.1'
+            },
+            'tool': tools['swh-metadata-translator']
+        }]
+
+        self.assertEqual(actual_metadatas, expected_metadatas_v1)
+
+        # given
+        metadata_v2 = metadata_v1.copy()
+        metadata_v2.update({
+            'translated_metadata': {
+                'other': {},
+                'name': 'test_drop_duplicated_metadata',
+                'version': '0.0.1'
+            },
+        })
+
+        self.storage.content_metadata_add([metadata_v2])
+
+        # then
+        actual_metadatas = list(self.storage.content_metadata_get(
+            [self.sha1_2]))
+
+        # metadata did not change as the v2 was dropped.
+        self.assertEqual(actual_metadatas, expected_metadatas_v1)
+
+    @istest
+    def content_metadata_add_update_in_place_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['swh-metadata-translator']['id']
+
+        metadata_v1 = {
+            'id': self.sha1_2,
+            'translated_metadata': {
+                'other': {},
+                'name': 'test_metadata',
+                'version': '0.0.1'
+            },
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.content_metadata_add([metadata_v1])
+
+        # when
+        actual_metadatas = list(self.storage.content_metadata_get(
+            [self.sha1_2]))
+
+        # then
+        expected_metadatas_v1 = [{
+            'id': self.sha1_2,
+            'translated_metadata': {
+                'other': {},
+                'name': 'test_metadata',
+                'version': '0.0.1'
+            },
+            'tool': tools['swh-metadata-translator']
+        }]
+        self.assertEqual(actual_metadatas, expected_metadatas_v1)
+
+        # given
+        metadata_v2 = metadata_v1.copy()
+        metadata_v2.update({
+            'translated_metadata': {
+                'other': {},
+                'name': 'test_update_duplicated_metadata',
+                'version': '0.0.1'
+            },
+        })
+        self.storage.content_metadata_add([metadata_v2], conflict_update=True)
+
+        actual_metadatas = list(self.storage.content_metadata_get(
+            [self.sha1_2]))
+
+        # language did not change as the v2 was dropped.
+        expected_metadatas_v2 = [{
+            'id': self.sha1_2,
+            'translated_metadata': {
+                'other': {},
+                'name': 'test_update_duplicated_metadata',
+                'version': '0.0.1'
+            },
+            'tool': tools['swh-metadata-translator']
+        }]
+
+        # metadata did change as the v2 was used to overwrite v1
+        self.assertEqual(actual_metadatas, expected_metadatas_v2)
+
+    @istest
+    def revision_metadata_missing(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['swh-metadata-detector']['id']
+
+        metadatas = [
+            {
+                'id': self.revision_id_1,
+                'indexer_configuration_id': tool_id,
+            },
+            {
+                'id': self.revision_id_2,
+                'indexer_configuration_id': tool_id,
+            }
+        ]
+
+        # when
+        actual_missing = list(self.storage.revision_metadata_missing(
+                              metadatas))
+
+        # then
+        self.assertEqual(list(actual_missing), [
+            self.revision_id_1,
+            self.revision_id_2,
+        ])
+
+        # given
+        self.storage.revision_metadata_add([{
+            'id': self.revision_id_1,
+            'translated_metadata': {
+                'developmentStatus': None,
+                'version': None,
+                'operatingSystem': None,
+                'description': None,
+                'keywords': None,
+                'issueTracker': None,
+                'name': None,
+                'author': None,
+                'relatedLink': None,
+                'url': None,
+                'type': None,
+                'license': None,
+                'maintainer': None,
+                'email': None,
+                'softwareRequirements': None,
+                'identifier': None
+            },
+            'indexer_configuration_id': tool_id
+        }])
+
+        # when
+        actual_missing = list(self.storage.revision_metadata_missing(
+                              metadatas))
+
+        # then
+        self.assertEqual(actual_missing, [self.revision_id_2])
+
+    @istest
+    def revision_metadata_get(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['swh-metadata-detector']['id']
+
+        metadata_rev = {
+            'id': self.revision_id_2,
+            'translated_metadata': {
+                'developmentStatus': None,
+                'version': None,
+                'operatingSystem': None,
+                'description': None,
+                'keywords': None,
+                'issueTracker': None,
+                'name': None,
+                'author': None,
+                'relatedLink': None,
+                'url': None,
+                'type': None,
+                'license': None,
+                'maintainer': None,
+                'email': None,
+                'softwareRequirements': None,
+                'identifier': None
+            },
+            'indexer_configuration_id': tool_id
+        }
+
+        # when
+        self.storage.revision_metadata_add([metadata_rev])
+
+        # then
+        actual_metadatas = list(self.storage.revision_metadata_get(
+            [self.revision_id_2, self.revision_id_1]))
+
+        expected_metadatas = [{
+            'id': self.revision_id_2,
+            'translated_metadata': metadata_rev['translated_metadata'],
+            'tool': tools['swh-metadata-detector']
+        }]
+
+        self.assertEqual(actual_metadatas, expected_metadatas)
+
+    @istest
+    def revision_metadata_add_drop_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['swh-metadata-detector']['id']
+
+        metadata_v1 = {
+            'id': self.revision_id_1,
+            'translated_metadata':  {
+                'developmentStatus': None,
+                'version': None,
+                'operatingSystem': None,
+                'description': None,
+                'keywords': None,
+                'issueTracker': None,
+                'name': None,
+                'author': None,
+                'relatedLink': None,
+                'url': None,
+                'type': None,
+                'license': None,
+                'maintainer': None,
+                'email': None,
+                'softwareRequirements': None,
+                'identifier': None
+            },
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.revision_metadata_add([metadata_v1])
+
+        # when
+        actual_metadatas = list(self.storage.revision_metadata_get(
+            [self.revision_id_1]))
+
+        expected_metadatas_v1 = [{
+            'id': self.revision_id_1,
+            'translated_metadata':  metadata_v1['translated_metadata'],
+            'tool': tools['swh-metadata-detector']
+        }]
+
+        self.assertEqual(actual_metadatas, expected_metadatas_v1)
+
+        # given
+        metadata_v2 = metadata_v1.copy()
+        metadata_v2.update({
+            'translated_metadata':  {
+                'name': 'test_metadata',
+                'author': 'MG',
+            },
+        })
+
+        self.storage.revision_metadata_add([metadata_v2])
+
+        # then
+        actual_metadatas = list(self.storage.revision_metadata_get(
+            [self.revision_id_1]))
+
+        # metadata did not change as the v2 was dropped.
+        self.assertEqual(actual_metadatas, expected_metadatas_v1)
+
+    @istest
+    def revision_metadata_add_update_in_place_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['swh-metadata-detector']['id']
+
+        metadata_v1 = {
+            'id': self.revision_id_2,
+            'translated_metadata': {
+                'developmentStatus': None,
+                'version': None,
+                'operatingSystem': None,
+                'description': None,
+                'keywords': None,
+                'issueTracker': None,
+                'name': None,
+                'author': None,
+                'relatedLink': None,
+                'url': None,
+                'type': None,
+                'license': None,
+                'maintainer': None,
+                'email': None,
+                'softwareRequirements': None,
+                'identifier': None
+            },
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.revision_metadata_add([metadata_v1])
+
+        # when
+        actual_metadatas = list(self.storage.revision_metadata_get(
+            [self.revision_id_2]))
+
+        # then
+        expected_metadatas_v1 = [{
+            'id': self.revision_id_2,
+            'translated_metadata':  metadata_v1['translated_metadata'],
+            'tool': tools['swh-metadata-detector']
+        }]
+        self.assertEqual(actual_metadatas, expected_metadatas_v1)
+
+        # given
+        metadata_v2 = metadata_v1.copy()
+        metadata_v2.update({
+            'translated_metadata':  {
+                'name': 'test_update_duplicated_metadata',
+                'author': 'MG'
+            },
+        })
+        self.storage.revision_metadata_add([metadata_v2], conflict_update=True)
+
+        actual_metadatas = list(self.storage.revision_metadata_get(
+            [self.revision_id_2]))
+
+        # language did not change as the v2 was dropped.
+        expected_metadatas_v2 = [{
+            'id': self.revision_id_2,
+            'translated_metadata': metadata_v2['translated_metadata'],
+            'tool': tools['swh-metadata-detector']
+        }]
+
+        # metadata did change as the v2 was used to overwrite v1
+        self.assertEqual(actual_metadatas, expected_metadatas_v2)
+
+    @istest
+    def indexer_configuration_add(self):
+        tool = {
+            'tool_name': 'some-unknown-tool',
+            'tool_version': 'some-version',
+            'tool_configuration': {"debian-package": "some-package"},
+        }
+
+        actual_tool = self.storage.indexer_configuration_get(tool)
+        self.assertIsNone(actual_tool)  # does not exist
+
+        # add it
+        actual_tools = list(self.storage.indexer_configuration_add([tool]))
+
+        self.assertEquals(len(actual_tools), 1)
+        actual_tool = actual_tools[0]
+        self.assertIsNotNone(actual_tool)  # now it exists
+        new_id = actual_tool.pop('id')
+        self.assertEquals(actual_tool, tool)
+
+        actual_tools2 = list(self.storage.indexer_configuration_add([tool]))
+        actual_tool2 = actual_tools2[0]
+        self.assertIsNotNone(actual_tool2)  # now it exists
+        new_id2 = actual_tool2.pop('id')
+
+        self.assertEqual(new_id, new_id2)
+        self.assertEqual(actual_tool, actual_tool2)
+
+    @istest
+    def indexer_configuration_add_multiple(self):
+        tool = {
+            'tool_name': 'some-unknown-tool',
+            'tool_version': 'some-version',
+            'tool_configuration': {"debian-package": "some-package"},
+        }
+
+        actual_tools = list(self.storage.indexer_configuration_add([tool]))
+        self.assertEqual(len(actual_tools), 1)
+
+        new_tools = [tool, {
+            'tool_name': 'yet-another-tool',
+            'tool_version': 'version',
+            'tool_configuration': {},
+        }]
+
+        actual_tools = list(self.storage.indexer_configuration_add(new_tools))
+        self.assertEqual(len(actual_tools), 2)
+
+        # order not guaranteed, so we iterate over results to check
+        for tool in actual_tools:
+            _id = tool.pop('id')
+            self.assertIsNotNone(_id)
+            self.assertIn(tool, new_tools)
+
+    @istest
+    def indexer_configuration_get_missing(self):
+        tool = {
+            'tool_name': 'unknown-tool',
+            'tool_version': '3.1.0rc2-31-ga2cbb8c',
+            'tool_configuration': {"command_line": "nomossa <filepath>"},
+        }
+
+        actual_tool = self.storage.indexer_configuration_get(tool)
+
+        self.assertIsNone(actual_tool)
+
+    @istest
+    def indexer_configuration_get(self):
+        tool = {
+            'tool_name': 'nomos',
+            'tool_version': '3.1.0rc2-31-ga2cbb8c',
+            'tool_configuration': {"command_line": "nomossa <filepath>"},
+        }
+
+        actual_tool = self.storage.indexer_configuration_get(tool)
+
+        expected_tool = tool.copy()
+        expected_tool['id'] = 1
+
+        self.assertEqual(expected_tool, actual_tool)
+
+    @istest
+    def indexer_configuration_metadata_get_missing_context(self):
+        tool = {
+            'tool_name': 'swh-metadata-translator',
+            'tool_version': '0.0.1',
+            'tool_configuration': {"context": "unknown-context"},
+        }
+
+        actual_tool = self.storage.indexer_configuration_get(tool)
+
+        self.assertIsNone(actual_tool)
+
+    @istest
+    def indexer_configuration_metadata_get(self):
+        tool = {
+            'tool_name': 'swh-metadata-translator',
+            'tool_version': '0.0.1',
+            'tool_configuration': {"type": "local", "context": "npm"},
+        }
+
+        actual_tool = self.storage.indexer_configuration_get(tool)
+
+        expected_tool = tool.copy()
+        expected_tool['id'] = actual_tool['id']
+
+        self.assertEqual(expected_tool, actual_tool)
+
+
+class IndexerTestStorage(CommonTestStorage, unittest.TestCase):
+    """Running the tests locally.
+
+    For the client api tests (remote storage), see
+    `class`:swh.indexer.storage.test_api_client:TestRemoteStorage
+    class.
+
+    """
+    pass
diff --git a/swh/indexer/tests/test_language.py b/swh/indexer/tests/test_language.py
index 96a431d..048f309 100644
--- a/swh/indexer/tests/test_language.py
+++ b/swh/indexer/tests/test_language.py
@@ -1,113 +1,113 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
 import logging
 from nose.tools import istest
 from swh.indexer import language
 from swh.indexer.language import ContentLanguageIndexer
 from swh.indexer.tests.test_utils import MockObjStorage
 
 
-class MockStorage():
+class _MockIndexerStorage():
     """Mock storage to simplify reading indexers' outputs.
     """
     def content_language_add(self, languages, conflict_update=None):
         self.state = languages
         self.conflict_update = conflict_update
 
     def indexer_configuration_add(self, tools):
         return [{
             'id': 20,
         }]
 
 
 class TestLanguageIndexer(ContentLanguageIndexer):
     """Specific language whose configuration is enough to satisfy the
        indexing tests.
     """
     def prepare(self):
         self.config = {
             'destination_queue': None,
             'rescheduling_task': None,
             'tools':  {
                 'name': 'pygments',
                 'version': '2.0.1+dfsg-1.1+deb8u1',
                 'configuration': {
                     'type': 'library',
                     'debian-package': 'python3-pygments',
                     'max_content_size': 10240,
                 },
             }
         }
-        self.storage = MockStorage()
+        self.idx_storage = _MockIndexerStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.task_destination = None
         self.rescheduling_task = self.config['rescheduling_task']
         self.tool_config = self.config['tools']['configuration']
         self.max_content_size = self.tool_config['max_content_size']
         self.tools = self.register_tools(self.config['tools'])
         self.tool = self.tools[0]
 
 
 class Language(unittest.TestCase):
     """
     Tests pygments tool for language detection
     """
     def setUp(self):
         self.maxDiff = None
 
     @istest
     def test_compute_language_none(self):
         # given
         self.content = ""
         self.declared_language = {
             'lang': None
         }
         # when
         result = language.compute_language(self.content)
         # then
         self.assertEqual(self.declared_language, result)
 
     @istest
     def test_index_content_language_python(self):
         # given
         # testing python
         sha1s = ['02fb2c89e14f7fab46701478c83779c7beb7b069']
         lang_indexer = TestLanguageIndexer()
 
         # when
         lang_indexer.run(sha1s, policy_update='ignore-dups')
-        results = lang_indexer.storage.state
+        results = lang_indexer.idx_storage.state
 
         expected_results = [{
             'id': '02fb2c89e14f7fab46701478c83779c7beb7b069',
             'indexer_configuration_id': 20,
             'lang': 'python'
         }]
         # then
         self.assertEqual(expected_results, results)
 
     @istest
     def test_index_content_language_c(self):
         # given
         # testing c
         sha1s = ['103bc087db1d26afc3a0283f38663d081e9b01e6']
         lang_indexer = TestLanguageIndexer()
 
         # when
         lang_indexer.run(sha1s, policy_update='ignore-dups')
-        results = lang_indexer.storage.state
+        results = lang_indexer.idx_storage.state
 
         expected_results = [{
             'id': '103bc087db1d26afc3a0283f38663d081e9b01e6',
             'indexer_configuration_id': 20,
             'lang': 'c'
         }]
 
         # then
         self.assertEqual('c', results[0]['lang'])
         self.assertEqual(expected_results, results)
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
index 74b8309..2953bfc 100644
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -1,298 +1,305 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
 import logging
 from nose.tools import istest
 
 from swh.indexer.metadata_dictionary import compute_metadata
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata import ContentMetadataIndexer
 from swh.indexer.metadata import RevisionMetadataIndexer
-from swh.indexer.tests.test_utils import MockObjStorage
-from swh.indexer.tests.test_utils import MockStorage
+from swh.indexer.tests.test_utils import MockObjStorage, MockStorage
+from swh.indexer.tests.test_utils import MockIndexerStorage
 
 
 class TestContentMetadataIndexer(ContentMetadataIndexer):
     """Specific Metadata whose configuration is enough to satisfy the
        indexing tests.
     """
     def prepare(self):
         self.config.update({
             'rescheduling_task': None,
         })
-        self.storage = MockStorage()
+        self.idx_storage = MockIndexerStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.task_destination = None
         self.rescheduling_task = self.config['rescheduling_task']
         self.tools = self.register_tools(self.config['tools'])
         self.tool = self.tools[0]
         self.results = []
 
 
 class TestRevisionMetadataIndexer(RevisionMetadataIndexer):
     """Specific indexer whose configuration is enough to satisfy the
        indexing tests.
     """
     def prepare(self):
         self.config = {
             'rescheduling_task': None,
+            'storage': {
+                'cls': 'remote',
+                'args': {
+                    'url': 'http://localhost:9999',
+                }
+            },
             'tools': {
                 'name': 'swh-metadata-detector',
                 'version': '0.0.1',
                 'configuration': {
                     'type': 'local',
                     'context': 'npm'
                 }
             }
         }
         self.storage = MockStorage()
+        self.idx_storage = MockIndexerStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.task_destination = None
         self.rescheduling_task = self.config['rescheduling_task']
         self.tools = self.register_tools(self.config['tools'])
         self.tool = self.tools[0]
         self.results = []
 
 
 class Metadata(unittest.TestCase):
     """
     Tests metadata_mock_tool tool for Metadata detection
     """
     def setUp(self):
         """
         shows the entire diff in the results
         """
         self.maxDiff = None
         self.content_tool = {
             'name': 'swh-metadata-translator',
             'version': '0.0.1',
             'configuration': {
                 'type': 'local',
                 'context': 'npm'
             }
         }
 
     @istest
     def test_compute_metadata_none(self):
         """
         testing content empty content is empty
         should return None
         """
         # given
         content = b""
         context = "npm"
 
         # None if no metadata was found or an error occurred
         declared_metadata = None
         # when
         result = compute_metadata(context, content)
         # then
         self.assertEqual(declared_metadata, result)
 
     @istest
     def test_compute_metadata_npm(self):
         """
         testing only computation of metadata with hard_mapping_npm
         """
         # given
         content = b"""
             {
                 "name": "test_metadata",
                 "version": "0.0.1",
                 "description": "Simple package.json test for indexer",
                   "repository": {
                     "type": "git",
                     "url": "https://github.com/moranegg/metadata_test"
                 }
             }
         """
         declared_metadata = {
             'name': 'test_metadata',
             'version': '0.0.1',
             'description': 'Simple package.json test for indexer',
             'codeRepository': {
                 'type': 'git',
                 'url': 'https://github.com/moranegg/metadata_test'
               },
             'other': {}
         }
 
         # when
         result = compute_metadata("npm", content)
         # then
         self.assertEqual(declared_metadata, result)
 
     @istest
     def test_index_content_metadata_npm(self):
         """
         testing NPM with package.json
         - one sha1 uses a file that can't be translated to metadata and
           should return None in the translated metadata
         """
         # given
         sha1s = ['26a9f72a7c87cc9205725cfd879f514ff4f3d8d5',
                  'd4c647f0fc257591cc9ba1722484229780d1c607',
                  '02fb2c89e14f7fab46701478c83779c7beb7b069']
         # this metadata indexer computes only metadata for package.json
         # in npm context with a hard mapping
         metadata_indexer = TestContentMetadataIndexer(
             tool=self.content_tool, config={})
 
         # when
         metadata_indexer.run(sha1s, policy_update='ignore-dups')
-        results = metadata_indexer.storage.state
+        results = metadata_indexer.idx_storage.state
 
         expected_results = [{
             'indexer_configuration_id': 30,
             'translated_metadata': {
                 'other': {},
                 'codeRepository': {
                     'type': 'git',
                     'url': 'https://github.com/moranegg/metadata_test'
                 },
                 'description': 'Simple package.json test for indexer',
                 'name': 'test_metadata',
                 'version': '0.0.1'
             },
             'id': '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5'
             }, {
             'indexer_configuration_id': 30,
             'translated_metadata': {
                 'softwareRequirements': {
                         'JSONStream': '~1.3.1',
                         'abbrev': '~1.1.0',
                         'ansi-regex': '~2.1.1',
                         'ansicolors': '~0.3.2',
                         'ansistyles': '~0.1.3'
                 },
                 'issueTracker': {
                     'url': 'https://github.com/npm/npm/issues'
                 },
                 'author':
                     'Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)',
                 'codeRepository': {
                     'type': 'git',
                     'url': 'https://github.com/npm/npm'
                 },
                 'description': 'a package manager for JavaScript',
                 'softwareSuggestions': {
                         'tacks': '~1.2.6',
                         'tap': '~10.3.2'
                 },
                 'license': 'Artistic-2.0',
                 'version': '5.0.3',
                 'other': {
                     'preferGlobal': True,
                     'config': {
                         'publishtest': False
                     }
                 },
                 'name': 'npm',
                 'keywords': [
                     'install',
                     'modules',
                     'package manager',
                     'package.json'
                 ],
                 'url': 'https://docs.npmjs.com/'
             },
             'id': 'd4c647f0fc257591cc9ba1722484229780d1c607'
             }, {
             'indexer_configuration_id': 30,
             'translated_metadata': None,
             'id': '02fb2c89e14f7fab46701478c83779c7beb7b069'
         }]
 
         # The assertion bellow returns False sometimes because of nested lists
         self.assertEqual(expected_results, results)
 
     @istest
     def test_detect_metadata_package_json(self):
         # given
         df = [{
                 'sha1_git': b'abc',
                 'name': b'index.js',
                 'target': b'abc',
                 'length': 897,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'dir_a',
                 'sha1': b'bcd'
             },
             {
                 'sha1_git': b'aab',
                 'name': b'package.json',
                 'target': b'aab',
                 'length': 712,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'dir_a',
                 'sha1': b'cde'
         }]
         # when
         results = detect_metadata(df)
 
         expected_results = {
             'npm': [
                 b'cde'
             ]
         }
         # then
         self.assertEqual(expected_results, results)
 
     @istest
     def test_revision_metadata_indexer(self):
         metadata_indexer = TestRevisionMetadataIndexer()
 
         sha1_gits = [
             b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
         ]
         metadata_indexer.run(sha1_gits, 'update-dups')
 
-        results = metadata_indexer.storage.state
+        results = metadata_indexer.idx_storage.state
 
         expected_results = [{
             'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
             'translated_metadata': {
                 'identifier': None,
                 'maintainer': None,
                 'url': [
                     'https://github.com/librariesio/yarn-parser#readme'
                 ],
                 'codeRepository': [{
                     'type': 'git',
                     'url': 'git+https://github.com/librariesio/yarn-parser.git'
                 }],
                 'author': ['Andrew Nesbitt'],
                 'license': ['AGPL-3.0'],
                 'version': ['1.0.0'],
                 'description': [
                     'Tiny web service for parsing yarn.lock files'
                 ],
                 'relatedLink': None,
                 'developmentStatus': None,
                 'operatingSystem': None,
                 'issueTracker': [{
                     'url': 'https://github.com/librariesio/yarn-parser/issues'
                 }],
                 'softwareRequirements': [{
                     'express': '^4.14.0',
                     'yarn': '^0.21.0',
                     'body-parser': '^1.15.2'
                 }],
                 'name': ['yarn-parser'],
                 'keywords': [['yarn', 'parse', 'lock', 'dependencies']],
                 'type': None,
                 'email': None
             },
             'indexer_configuration_id': 7
         }]
         # then
         self.assertEqual(expected_results, results)
diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py
index a15b971..63f6044 100644
--- a/swh/indexer/tests/test_mimetype.py
+++ b/swh/indexer/tests/test_mimetype.py
@@ -1,158 +1,158 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
 import logging
 from nose.tools import istest
 
 from swh.indexer.mimetype import ContentMimetypeIndexer
 
 from swh.indexer.tests.test_utils import MockObjStorage
 
 
-class _MockStorage():
+class _MockIndexerStorage():
     """Mock storage to simplify reading indexers' outputs.
 
     """
     def content_mimetype_add(self, mimetypes, conflict_update=None):
         self.state = mimetypes
         self.conflict_update = conflict_update
 
     def indexer_configuration_add(self, tools):
         return [{
             'id': 10,
         }]
 
 
 class TestMimetypeIndexer(ContentMimetypeIndexer):
     """Specific mimetype whose configuration is enough to satisfy the
        indexing tests.
 
     """
     def prepare(self):
         self.config = {
             'destination_queue': None,
             'rescheduling_task': None,
             'tools': {
                 'name': 'file',
                 'version': '1:5.30-1+deb9u1',
                 'configuration': {
                     "type": "library",
                     "debian-package": "python3-magic"
                 },
             },
         }
-        self.storage = _MockStorage()
+        self.idx_storage = _MockIndexerStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.task_destination = None
         self.rescheduling_task = self.config['rescheduling_task']
         self.destination_queue = self.config['destination_queue']
         self.tools = self.register_tools(self.config['tools'])
         self.tool = self.tools[0]
 
 
 class TestMimetypeIndexerUnknownToolStorage(TestMimetypeIndexer):
     """Specific mimetype whose configuration is not enough to satisfy the
        indexing tests.
 
     """
     def prepare(self):
         super().prepare()
         self.tools = None
 
 
 class TestMimetypeIndexerWithErrors(unittest.TestCase):
     @istest
     def wrong_unknown_configuration_tool(self):
         """Indexer with unknown configuration tool should fail the check"""
         with self.assertRaisesRegex(ValueError, 'Tools None is unknown'):
             TestMimetypeIndexerUnknownToolStorage()
 
 
 class TestMimetypeIndexerTest(unittest.TestCase):
     def setUp(self):
         self.indexer = TestMimetypeIndexer()
 
     @istest
     def test_index_no_update(self):
         # given
         sha1s = [
             '01c9379dfc33803963d07c1ccc748d3fe4c96bb5',
             '688a5ef812c53907562fe379d4b3851e69c7cb15',
         ]
 
         # when
         self.indexer.run(sha1s, policy_update='ignore-dups')
 
         # then
         expected_results = [{
             'id': '01c9379dfc33803963d07c1ccc748d3fe4c96bb5',
             'indexer_configuration_id': 10,
             'mimetype': b'text/plain',
             'encoding': b'us-ascii',
         }, {
             'id': '688a5ef812c53907562fe379d4b3851e69c7cb15',
             'indexer_configuration_id': 10,
             'mimetype': b'text/plain',
             'encoding': b'us-ascii',
         }]
 
-        self.assertFalse(self.indexer.storage.conflict_update)
-        self.assertEquals(expected_results, self.indexer.storage.state)
+        self.assertFalse(self.indexer.idx_storage.conflict_update)
+        self.assertEquals(expected_results, self.indexer.idx_storage.state)
 
     @istest
     def test_index_update(self):
         # given
         sha1s = [
             '01c9379dfc33803963d07c1ccc748d3fe4c96bb5',
             '688a5ef812c53907562fe379d4b3851e69c7cb15',
             'da39a3ee5e6b4b0d3255bfef95601890afd80709',  # empty content
         ]
 
         # when
         self.indexer.run(sha1s, policy_update='update-dups')
 
         # then
         expected_results = [{
             'id': '01c9379dfc33803963d07c1ccc748d3fe4c96bb5',
             'indexer_configuration_id': 10,
             'mimetype': b'text/plain',
             'encoding': b'us-ascii',
         }, {
             'id': '688a5ef812c53907562fe379d4b3851e69c7cb15',
             'indexer_configuration_id': 10,
             'mimetype': b'text/plain',
             'encoding': b'us-ascii',
         }, {
             'id': 'da39a3ee5e6b4b0d3255bfef95601890afd80709',
             'indexer_configuration_id': 10,
             'mimetype': b'application/x-empty',
             'encoding': b'binary',
         }]
 
-        self.assertTrue(self.indexer.storage.conflict_update)
-        self.assertEquals(expected_results, self.indexer.storage.state)
+        self.assertTrue(self.indexer.idx_storage.conflict_update)
+        self.assertEquals(expected_results, self.indexer.idx_storage.state)
 
     @istest
     def test_index_one_unknown_sha1(self):
         # given
         sha1s = ['688a5ef812c53907562fe379d4b3851e69c7cb15',
                  '799a5ef812c53907562fe379d4b3851e69c7cb15',  # unknown
                  '800a5ef812c53907562fe379d4b3851e69c7cb15']  # unknown
 
         # when
         self.indexer.run(sha1s, policy_update='update-dups')
 
         # then
         expected_results = [{
             'id': '688a5ef812c53907562fe379d4b3851e69c7cb15',
             'indexer_configuration_id': 10,
             'mimetype': b'text/plain',
             'encoding': b'us-ascii',
         }]
 
-        self.assertTrue(self.indexer.storage.conflict_update)
-        self.assertEquals(expected_results, self.indexer.storage.state)
+        self.assertTrue(self.indexer.idx_storage.conflict_update)
+        self.assertEquals(expected_results, self.indexer.idx_storage.state)
diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py
index 3626af8..41c9068 100644
--- a/swh/indexer/tests/test_utils.py
+++ b/swh/indexer/tests/test_utils.py
@@ -1,253 +1,261 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+
 from swh.objstorage.exc import ObjNotFoundError
 
 
 class MockObjStorage:
-    """Mock objstorage with predefined contents.
+    """Mock an swh-objstorage objstorage with predefined contents.
 
     """
     data = {}
 
     def __init__(self):
         self.data = {
             '01c9379dfc33803963d07c1ccc748d3fe4c96bb5': b'this is some text',
             '688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text',
             '8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text',
             '02fb2c89e14f7fab46701478c83779c7beb7b069': b"""
             import unittest
             import logging
             from nose.tools import istest
             from swh.indexer.mimetype import ContentMimetypeIndexer
             from swh.indexer.tests.test_utils import MockObjStorage
 
             class MockStorage():
                 def content_mimetype_add(self, mimetypes):
                     self.state = mimetypes
                     self.conflict_update = conflict_update
 
                 def indexer_configuration_add(self, tools):
                     return [{
                         'id': 10,
                     }]
             """,
             '103bc087db1d26afc3a0283f38663d081e9b01e6': b"""
                 #ifndef __AVL__
                 #define __AVL__
 
                 typedef struct _avl_tree avl_tree;
 
                 typedef struct _data_t {
                   int content;
                 } data_t;
             """,
             '93666f74f1cf635c8c8ac118879da6ec5623c410': b"""
             (should 'pygments (recognize 'lisp 'easily))
 
             """,
             '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5': b"""
             {
                 "name": "test_metadata",
                 "version": "0.0.1",
                 "description": "Simple package.json test for indexer",
                 "repository": {
                   "type": "git",
                   "url": "https://github.com/moranegg/metadata_test"
               }
             }
             """,
             'd4c647f0fc257591cc9ba1722484229780d1c607': b"""
             {
               "version": "5.0.3",
               "name": "npm",
               "description": "a package manager for JavaScript",
               "keywords": [
                 "install",
                 "modules",
                 "package manager",
                 "package.json"
               ],
               "preferGlobal": true,
               "config": {
                 "publishtest": false
               },
               "homepage": "https://docs.npmjs.com/",
               "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
               "repository": {
                 "type": "git",
                 "url": "https://github.com/npm/npm"
               },
               "bugs": {
                 "url": "https://github.com/npm/npm/issues"
               },
               "dependencies": {
                 "JSONStream": "~1.3.1",
                 "abbrev": "~1.1.0",
                 "ansi-regex": "~2.1.1",
                 "ansicolors": "~0.3.2",
                 "ansistyles": "~0.1.3"
               },
               "devDependencies": {
                 "tacks": "~1.2.6",
                 "tap": "~10.3.2"
               },
               "license": "Artistic-2.0"
             }
 
             """,
             'a7ab314d8a11d2c93e3dcf528ca294e7b431c449': b"""
             """,
             'da39a3ee5e6b4b0d3255bfef95601890afd80709': b'',
         }
 
     def __iter__(self):
         yield from self.data.keys()
 
     def __contains__(self, sha1):
         return self.data.get(sha1) is not None
 
     def get(self, sha1):
         raw_content = self.data.get(sha1)
         if raw_content is None:
             raise ObjNotFoundError(sha1)
         return raw_content
 
 
-class MockStorage():
-    """Mock storage to simplify reading indexers' outputs.
-    """
-    def content_metadata_missing(self, sha1s):
-        yield from []
-
-    def content_metadata_add(self, metadata, conflict_update=None):
-        self.state = metadata
-        self.conflict_update = conflict_update
-
-    def revision_metadata_add(self, metadata, conflict_update=None):
-        self.state = metadata
-        self.conflict_update = conflict_update
+class MockIndexerStorage():
+    """Mock an swh-indexer storage.
 
+    """
     def indexer_configuration_add(self, tools):
         tool = tools[0]
         if tool['tool_name'] == 'swh-metadata-translator':
             return [{
                 'id': 30,
                 'tool_name': 'swh-metadata-translator',
                 'tool_version': '0.0.1',
                 'tool_configuration': {
                     'type': 'local',
                     'context': 'npm'
                 },
             }]
         elif tool['tool_name'] == 'swh-metadata-detector':
             return [{
                 'id': 7,
                 'tool_name': 'swh-metadata-detector',
                 'tool_version': '0.0.1',
                 'tool_configuration': {
                     'type': 'local',
                     'context': 'npm'
                 },
             }]
 
+    def content_metadata_missing(self, sha1s):
+        yield from []
+
+    def content_metadata_add(self, metadata, conflict_update=None):
+        self.state = metadata
+        self.conflict_update = conflict_update
+
+    def revision_metadata_add(self, metadata, conflict_update=None):
+        self.state = metadata
+        self.conflict_update = conflict_update
+
+    def content_metadata_get(self, sha1s):
+        return [{
+            'tool': {
+                'configuration': {
+                    'type': 'local',
+                    'context': 'npm'
+                    },
+                'version': '0.0.1',
+                'id': 6,
+                'name': 'swh-metadata-translator'
+            },
+            'id': b'cde',
+            'translated_metadata': {
+                'issueTracker': {
+                    'url': 'https://github.com/librariesio/yarn-parser/issues'
+                },
+                'version': '1.0.0',
+                'name': 'yarn-parser',
+                'author': 'Andrew Nesbitt',
+                'url': 'https://github.com/librariesio/yarn-parser#readme',
+                'processorRequirements': {'node': '7.5'},
+                'other': {
+                    'scripts': {
+                                    'start': 'node index.js'
+                    },
+                    'main': 'index.js'
+                },
+                'license': 'AGPL-3.0',
+                'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
+                'codeRepository': {
+                    'type': 'git',
+                    'url': 'git+https://github.com/librariesio/yarn-parser.git'
+                },
+                'description': 'Tiny web service for parsing yarn.lock files',
+                'softwareRequirements': {
+                    'yarn': '^0.21.0',
+                    'express': '^4.14.0',
+                    'body-parser': '^1.15.2'}
+                }
+        }]
+
+
+class MockStorage():
+    """Mock a real swh-storage storage to simplify reading indexers'
+    outputs.
+
+    """
     def revision_get(self, revisions):
         return [{
             'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
             'committer': {
                 'id': 26,
                 'name': b'Andrew Nesbitt',
                 'fullname': b'Andrew Nesbitt <andrewnez@gmail.com>',
                 'email': b'andrewnez@gmail.com'
             },
             'synthetic': False,
             'date': {
                 'negative_utc': False,
                 'timestamp': {
                     'seconds': 1487596456,
                     'microseconds': 0
                 },
                 'offset': 0
             },
             'directory': b'10'
         }]
 
     def directory_ls(self, directory, recursive=False, cur=None):
         # with directory: b'\x9d',
         return [{
                 'sha1_git': b'abc',
                 'name': b'index.js',
                 'target': b'abc',
                 'length': 897,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'10',
                 'sha1': b'bcd'
                 },
                 {
                 'sha1_git': b'aab',
                 'name': b'package.json',
                 'target': b'aab',
                 'length': 712,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'10',
                 'sha1': b'cde'
                 },
                 {
                 'dir_id': b'10',
                 'target': b'11',
                 'type': 'dir',
                 'length': None,
                 'name': b'.github',
                 'sha1': None,
                 'perms': 16384,
                 'sha1_git': None,
                 'status': None,
                 'sha256': None
                 }]
-
-    def content_metadata_get(self, sha1s):
-        return [{
-            'tool': {
-                'configuration': {
-                    'type': 'local',
-                    'context': 'npm'
-                    },
-                'version': '0.0.1',
-                'id': 6,
-                'name': 'swh-metadata-translator'
-            },
-            'id': b'cde',
-            'translated_metadata': {
-                'issueTracker': {
-                    'url': 'https://github.com/librariesio/yarn-parser/issues'
-                },
-                'version': '1.0.0',
-                'name': 'yarn-parser',
-                'author': 'Andrew Nesbitt',
-                'url': 'https://github.com/librariesio/yarn-parser#readme',
-                'processorRequirements': {'node': '7.5'},
-                'other': {
-                    'scripts': {
-                                    'start': 'node index.js'
-                    },
-                    'main': 'index.js'
-                },
-                'license': 'AGPL-3.0',
-                'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
-                'codeRepository': {
-                    'type': 'git',
-                    'url': 'git+https://github.com/librariesio/yarn-parser.git'
-                },
-                'description': 'Tiny web service for parsing yarn.lock files',
-                'softwareRequirements': {
-                    'yarn': '^0.21.0',
-                    'express': '^4.14.0',
-                    'body-parser': '^1.15.2'}
-                }
-        }]
diff --git a/version.txt b/version.txt
index a1e375b..61c6c3c 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-v0.0.43-0-g3e4641b
\ No newline at end of file
+v0.0.44-0-g30a35bf
\ No newline at end of file