diff --git a/swh/indexer/__init__.py b/swh/indexer/__init__.py
index a5f3dfd..0ea142a 100644
--- a/swh/indexer/__init__.py
+++ b/swh/indexer/__init__.py
@@ -1,29 +1,57 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 INDEXER_CLASSES = {
     'mimetype': 'swh.indexer.mimetype.ContentMimetypeIndexer',
     'language': 'swh.indexer.language.ContentLanguageIndexer',
     'ctags': 'swh.indexer.ctags.CtagsIndexer',
     'fossology_license':
     'swh.indexer.fossology_license.ContentFossologyLicenseIndexer',
 }
 
 
 TASK_NAMES = {
     'orchestrator_all': 'swh.indexer.tasks.SWHOrchestratorAllContentsTask',
     'orchestrator_text': 'swh.indexer.tasks.SWHOrchestratorTextContentsTask',
     'mimetype': 'swh.indexer.tasks.SWHContentMimetypeTask',
     'language': 'swh.indexer.tasks.SWHContentLanguageTask',
     'ctags': 'swh.indexer.tasks.SWHCtagsTask',
     'fossology_license': 'swh.indexer.tasks.SWHContentFossologyLicenseTask',
     'rehash': 'swh.indexer.tasks.SWHRecomputeChecksumsTask',
 }
 
 
 __all__ = [
     'INDEXER_CLASSES', 'TASK_NAMES',
 ]
+
+
+def get_storage(cls, args):
+    """
+    Get a storage object of class `storage_class` with arguments
+    `storage_args`.
+
+    Args:
+        storage (dict): dictionary with keys:
+        - cls (str): storage's class, either 'local' or 'remote'
+        - args (dict): dictionary with keys
+
+    Returns:
+        an instance of swh.indexer's storage (either local or remote)
+
+    Raises:
+        ValueError if passed an unknown storage class.
+
+    """
+
+    if cls == 'remote':
+        from .api.client import RemoteStorage as IndexerStorage
+    elif cls == 'local':
+        from .storage import IndexerStorage
+    else:
+        raise ValueError('Unknown storage class `%s`' % cls)
+
+    return IndexerStorage(**args)
diff --git a/swh/indexer/converters.py b/swh/indexer/converters.py
new file mode 100644
index 0000000..db7a295
--- /dev/null
+++ b/swh/indexer/converters.py
@@ -0,0 +1,140 @@
+# Copyright (C) 2015-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def ctags_to_db(ctags):
+    """Convert a ctags entry into a ready ctags entry.
+
+    Args:
+        ctags (dict): ctags entry with the following keys:
+
+            - id (bytes): content's identifier
+            - tool_id (int): tool id used to compute ctags
+            - ctags ([dict]): List of dictionary with the following keys:
+
+              - name (str): symbol's name
+              - kind (str): symbol's kind
+              - line (int): symbol's line in the content
+              - language (str): language
+
+    Returns:
+        list: list of ctags entries as dicts with the following keys:
+
+            - id (bytes): content's identifier
+            - name (str): symbol's name
+            - kind (str): symbol's kind
+            - language (str): language for that content
+            - tool_id (int): tool id used to compute ctags
+
+    """
+    id = ctags['id']
+    tool_id = ctags['indexer_configuration_id']
+    for ctag in ctags['ctags']:
+        yield {
+            'id': id,
+            'name': ctag['name'],
+            'kind': ctag['kind'],
+            'line': ctag['line'],
+            'lang': ctag['lang'],
+            'indexer_configuration_id': tool_id,
+        }
+
+
+def db_to_ctags(ctag):
+    """Convert a ctags entry into a ready ctags entry.
+
+    Args:
+        ctags (dict): ctags entry with the following keys:
+        - id (bytes): content's identifier
+        - ctags ([dict]): List of dictionary with the following keys:
+          - name (str): symbol's name
+          - kind (str): symbol's kind
+          - line (int): symbol's line in the content
+          - language (str): language
+
+    Returns:
+        List of ctags ready entry (dict with the following keys):
+        - id (bytes): content's identifier
+        - name (str): symbol's name
+        - kind (str): symbol's kind
+        - language (str): language for that content
+        - tool (dict): tool used to compute the ctags
+
+    """
+    return {
+        'id': ctag['id'],
+        'name': ctag['name'],
+        'kind': ctag['kind'],
+        'line': ctag['line'],
+        'lang': ctag['lang'],
+        'tool': {
+            'id': ctag['tool_id'],
+            'name': ctag['tool_name'],
+            'version': ctag['tool_version'],
+            'configuration': ctag['tool_configuration']
+        }
+    }
+
+
+def db_to_mimetype(mimetype):
+    """Convert a ctags entry into a ready ctags output.
+
+    """
+    return {
+        'id': mimetype['id'],
+        'encoding': mimetype['encoding'],
+        'mimetype': mimetype['mimetype'],
+        'tool': {
+            'id': mimetype['tool_id'],
+            'name': mimetype['tool_name'],
+            'version': mimetype['tool_version'],
+            'configuration': mimetype['tool_configuration']
+        }
+    }
+
+
+def db_to_language(language):
+    """Convert a language entry into a ready language output.
+
+    """
+    return {
+        'id': language['id'],
+        'lang': language['lang'],
+        'tool': {
+            'id': language['tool_id'],
+            'name': language['tool_name'],
+            'version': language['tool_version'],
+            'configuration': language['tool_configuration']
+        }
+    }
+
+
+def db_to_metadata(metadata):
+    """Convert a metadata entry into a ready metadata output.
+
+    """
+    return {
+        'id': metadata['id'],
+        'translated_metadata': metadata['translated_metadata'],
+        'tool': {
+            'id': metadata['tool_id'],
+            'name': metadata['tool_name'],
+            'version': metadata['tool_version'],
+            'configuration': metadata['tool_configuration']
+        }
+    }
+
+
+def db_to_fossology_license(license):
+    return {
+        'id': license['id'],
+        'licenses': license['licenses'],
+        'tool': {
+            'id': license['tool_id'],
+            'name': license['tool_name'],
+            'version': license['tool_version'],
+            'configuration': license['tool_configuration'],
+        }
+    }
diff --git a/swh/indexer/db.py b/swh/indexer/db.py
new file mode 100644
index 0000000..b51402e
--- /dev/null
+++ b/swh/indexer/db.py
@@ -0,0 +1,245 @@
+# Copyright (C) 2015-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.model import hashutil
+
+from swh.storage.db import BaseDb, stored_procedure, cursor_to_bytes
+from swh.storage.db import line_to_bytes
+
+
+class Db(BaseDb):
+    """Proxy to the SWH Indexer DB, with wrappers around stored procedures
+
+    """
+    @stored_procedure('swh_mktemp_bytea')
+    def mktemp_bytea(self, cur=None): pass
+
+    def store_tmp_bytea(self, ids, cur=None):
+        """Store the given identifiers in a new tmp_bytea table"""
+        cur = self._cursor(cur)
+
+        self.mktemp_bytea(cur)
+        self.copy_to(({'id': elem} for elem in ids), 'tmp_bytea',
+                     ['id'], cur)
+
+    content_mimetype_cols = [
+        'id', 'mimetype', 'encoding',
+        'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
+
+    @stored_procedure('swh_mktemp_content_mimetype_missing')
+    def mktemp_content_mimetype_missing(self, cur=None): pass
+
+    def content_mimetype_missing_from_temp(self, cur=None):
+        """List missing mimetypes.
+
+        """
+        cur = self._cursor(cur)
+        cur.execute("SELECT * FROM swh_content_mimetype_missing()")
+        yield from cursor_to_bytes(cur)
+
+    @stored_procedure('swh_mktemp_content_mimetype')
+    def mktemp_content_mimetype(self, cur=None): pass
+
+    def content_mimetype_add_from_temp(self, conflict_update, cur=None):
+        self._cursor(cur).execute("SELECT swh_content_mimetype_add(%s)",
+                                  (conflict_update, ))
+
+    def content_mimetype_get_from_temp(self, cur=None):
+        cur = self._cursor(cur)
+        query = "SELECT %s FROM swh_content_mimetype_get()" % (
+            ','.join(self.content_mimetype_cols))
+        cur.execute(query)
+        yield from cursor_to_bytes(cur)
+
+    content_language_cols = [
+        'id', 'lang',
+        'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
+
+    @stored_procedure('swh_mktemp_content_language')
+    def mktemp_content_language(self, cur=None): pass
+
+    @stored_procedure('swh_mktemp_content_language_missing')
+    def mktemp_content_language_missing(self, cur=None): pass
+
+    def content_language_missing_from_temp(self, cur=None):
+        """List missing languages.
+
+        """
+        cur = self._cursor(cur)
+        cur.execute("SELECT * FROM swh_content_language_missing()")
+        yield from cursor_to_bytes(cur)
+
+    def content_language_add_from_temp(self, conflict_update, cur=None):
+        self._cursor(cur).execute("SELECT swh_content_language_add(%s)",
+                                  (conflict_update, ))
+
+    def content_language_get_from_temp(self, cur=None):
+        cur = self._cursor(cur)
+        query = "SELECT %s FROM swh_content_language_get()" % (
+            ','.join(self.content_language_cols))
+        cur.execute(query)
+        yield from cursor_to_bytes(cur)
+
+    content_ctags_cols = [
+        'id', 'name', 'kind', 'line', 'lang',
+        'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
+
+    @stored_procedure('swh_mktemp_content_ctags')
+    def mktemp_content_ctags(self, cur=None): pass
+
+    @stored_procedure('swh_mktemp_content_ctags_missing')
+    def mktemp_content_ctags_missing(self, cur=None): pass
+
+    def content_ctags_missing_from_temp(self, cur=None):
+        """List missing ctags.
+
+        """
+        cur = self._cursor(cur)
+        cur.execute("SELECT * FROM swh_content_ctags_missing()")
+        yield from cursor_to_bytes(cur)
+
+    def content_ctags_add_from_temp(self, conflict_update, cur=None):
+        self._cursor(cur).execute("SELECT swh_content_ctags_add(%s)",
+                                  (conflict_update, ))
+
+    def content_ctags_get_from_temp(self, cur=None):
+        cur = self._cursor(cur)
+        query = "SELECT %s FROM swh_content_ctags_get()" % (
+            ','.join(self.content_ctags_cols))
+        cur.execute(query)
+        yield from cursor_to_bytes(cur)
+
+    def content_ctags_search(self, expression, last_sha1, limit, cur=None):
+        cur = self._cursor(cur)
+        if not last_sha1:
+            query = """SELECT %s
+                       FROM swh_content_ctags_search(%%s, %%s)""" % (
+                           ','.join(self.content_ctags_cols))
+            cur.execute(query, (expression, limit))
+        else:
+            if last_sha1 and isinstance(last_sha1, bytes):
+                last_sha1 = '\\x%s' % hashutil.hash_to_hex(last_sha1)
+            elif last_sha1:
+                last_sha1 = '\\x%s' % last_sha1
+
+            query = """SELECT %s
+                       FROM swh_content_ctags_search(%%s, %%s, %%s)""" % (
+                           ','.join(self.content_ctags_cols))
+            cur.execute(query, (expression, limit, last_sha1))
+
+        yield from cursor_to_bytes(cur)
+
+    content_fossology_license_cols = [
+        'id', 'tool_id', 'tool_name', 'tool_version', 'tool_configuration',
+        'licenses']
+
+    @stored_procedure('swh_mktemp_content_fossology_license')
+    def mktemp_content_fossology_license(self, cur=None): pass
+
+    def content_fossology_license_add_from_temp(self, conflict_update,
+                                                cur=None):
+        """Add new licenses per content.
+
+        """
+        self._cursor(cur).execute(
+            "SELECT swh_content_fossology_license_add(%s)",
+            (conflict_update, ))
+
+    def content_fossology_license_get_from_temp(self, cur=None):
+        """Retrieve licenses per content.
+
+        """
+        cur = self._cursor(cur)
+        query = "SELECT %s FROM swh_content_fossology_license_get()" % (
+            ','.join(self.content_fossology_license_cols))
+        cur.execute(query)
+        yield from cursor_to_bytes(cur)
+
+    content_metadata_cols = [
+        'id', 'translated_metadata',
+        'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
+
+    @stored_procedure('swh_mktemp_content_metadata')
+    def mktemp_content_metadata(self, cur=None): pass
+
+    @stored_procedure('swh_mktemp_content_metadata_missing')
+    def mktemp_content_metadata_missing(self, cur=None): pass
+
+    def content_metadata_missing_from_temp(self, cur=None):
+        """List missing metadatas.
+
+        """
+        cur = self._cursor(cur)
+        cur.execute("SELECT * FROM swh_content_metadata_missing()")
+        yield from cursor_to_bytes(cur)
+
+    def content_metadata_add_from_temp(self, conflict_update, cur=None):
+        self._cursor(cur).execute("SELECT swh_content_metadata_add(%s)",
+                                  (conflict_update, ))
+
+    def content_metadata_get_from_temp(self, cur=None):
+        cur = self._cursor(cur)
+        query = "SELECT %s FROM swh_content_metadata_get()" % (
+            ','.join(self.content_metadata_cols))
+        cur.execute(query)
+        yield from cursor_to_bytes(cur)
+
+    revision_metadata_cols = [
+        'id', 'translated_metadata',
+        'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
+
+    @stored_procedure('swh_mktemp_revision_metadata')
+    def mktemp_revision_metadata(self, cur=None): pass
+
+    @stored_procedure('swh_mktemp_revision_metadata_missing')
+    def mktemp_revision_metadata_missing(self, cur=None): pass
+
+    def revision_metadata_missing_from_temp(self, cur=None):
+        """List missing metadatas.
+
+        """
+        cur = self._cursor(cur)
+        cur.execute("SELECT * FROM swh_revision_metadata_missing()")
+        yield from cursor_to_bytes(cur)
+
+    def revision_metadata_add_from_temp(self, conflict_update, cur=None):
+        self._cursor(cur).execute("SELECT swh_revision_metadata_add(%s)",
+                                  (conflict_update, ))
+
+    def revision_metadata_get_from_temp(self, cur=None):
+        cur = self._cursor(cur)
+        query = "SELECT %s FROM swh_revision_metadata_get()" % (
+            ','.join(self.revision_metadata_cols))
+        cur.execute(query)
+        yield from cursor_to_bytes(cur)
+
+    indexer_configuration_cols = ['id', 'tool_name', 'tool_version',
+                                  'tool_configuration']
+
+    @stored_procedure('swh_mktemp_indexer_configuration')
+    def mktemp_indexer_configuration(self, cur=None):
+        pass
+
+    def indexer_configuration_add_from_temp(self, cur=None):
+        cur = self._cursor(cur)
+        cur.execute("SELECT %s from swh_indexer_configuration_add()" % (
+            ','.join(self.indexer_configuration_cols), ))
+        yield from cursor_to_bytes(cur)
+
+    def indexer_configuration_get(self, tool_name,
+                                  tool_version, tool_configuration, cur=None):
+        cur = self._cursor(cur)
+        cur.execute('''select %s
+                       from indexer_configuration
+                       where tool_name=%%s and
+                             tool_version=%%s and
+                             tool_configuration=%%s''' % (
+                                 ','.join(self.indexer_configuration_cols)),
+                    (tool_name, tool_version, tool_configuration))
+
+        data = cur.fetchone()
+        if not data:
+            return None
+        return line_to_bytes(data)
diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
index 5e2ee14..56a0e54 100644
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -1,211 +1,210 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 import json
 
 
 def convert(raw_content):
     """
     convert raw_content recursively:
 
     - from bytes to string
     - from string to dict
 
     Args:
         raw_content (bytes / string / dict)
 
     Returns:
         dict: content (if string was json, otherwise returns string)
 
     """
     if isinstance(raw_content, bytes):
         return convert(raw_content.decode())
     if isinstance(raw_content, str):
         try:
             content = json.loads(raw_content)
             if content:
                 return content
             else:
                 return raw_content
         except json.decoder.JSONDecodeError:
             return raw_content
     if isinstance(raw_content, dict):
         return raw_content
 
 
 class BaseMapping():
     """Base class for mappings to inherit from
 
     To implement a new mapping:
 
     - inherit this class
     - add a local property self.mapping
     - override translate function
     """
 
     def translate(self, content_dict):
         """
         Tranlsates content  by parsing content to a json object
         and translating with the npm mapping (for now hard_coded mapping)
 
         Args:
             context_text (text): should be json
 
         Returns:
             dict: translated metadata in jsonb form needed for the indexer
 
         """
         translated_metadata = {}
         default = 'other'
         translated_metadata['other'] = {}
         try:
             for k, v in content_dict.items():
                 try:
                     term = self.mapping.get(k, default)
                     if term not in translated_metadata:
                         translated_metadata[term] = v
                         continue
                     if isinstance(translated_metadata[term], str):
                         in_value = translated_metadata[term]
                         translated_metadata[term] = [in_value, v]
                         continue
                     if isinstance(translated_metadata[term], list):
                         translated_metadata[term].append(v)
                         continue
                     if isinstance(translated_metadata[term], dict):
                         translated_metadata[term][k] = v
                         continue
                 except KeyError:
                     self.log.exception(
                         "Problem during item mapping")
                     continue
         except:
             return None
         return translated_metadata
 
 
 class NpmMapping(BaseMapping):
     """
     dedicated class for NPM (package.json) mapping and translation
     """
     mapping = {
         'repository': 'codeRepository',
         'os': 'operatingSystem',
         'cpu': 'processorRequirements',
         'engines': 'processorRequirements',
         'dependencies': 'softwareRequirements',
         'bundleDependencies': 'softwareRequirements',
         'peerDependencies': 'softwareRequirements',
         'author': 'author',
         'contributor': 'contributor',
         'keywords': 'keywords',
         'license': 'license',
         'version': 'version',
         'description': 'description',
         'name': 'name',
         'devDependencies': 'softwareSuggestions',
         'optionalDependencies': 'softwareSuggestions',
         'bugs': 'issueTracker',
         'homepage': 'url'
     }
 
     def translate(self, raw_content):
         content_dict = convert(raw_content)
         return super().translate(content_dict)
 
 
 class MavenMapping(BaseMapping):
     """
     dedicated class for Maven (pom.xml) mapping and translation
     """
     mapping = {
         'license': 'license',
         'version': 'version',
         'description': 'description',
         'name': 'name',
         'prerequisites': 'softwareRequirements',
         'repositories': 'codeRepository',
         'groupId': 'identifier',
         'ciManagement': 'contIntegration',
         'issuesManagement': 'issueTracker',
     }
 
     def translate(self, raw_content):
         content = convert(raw_content)
         # parse content from xml to dict
         return super().translate(content)
 
 
 class DoapMapping(BaseMapping):
     mapping = {
 
     }
 
     def translate(self, raw_content):
         content = convert(raw_content)
         # parse content from xml to dict
         return super().translate(content)
 
 
 def parse_xml(content):
     """
     Parses content from xml to a python dict
     Args:
         - content (text): the string form of the raw_content ( in xml)
 
     Returns:
         - parsed_xml (dict): a python dict of the content after parsing
     """
     # check if xml
     # use xml parser to dict
     return content
 
 
 mapping_tool_fn = {
     "npm": NpmMapping(),
     "maven": MavenMapping(),
     "doap_xml": DoapMapping()
 }
 
 
 def compute_metadata(context, raw_content):
     """
     first landing method: a dispatcher that sends content
     to the right function to carry out the real parsing of syntax
     and translation of terms
 
     Args:
         context (text): defines to which function/tool the content is sent
         content (text): the string form of the raw_content
 
     Returns:
         dict: translated metadata jsonb dictionary needed for the indexer to
           store in storage
 
     """
     if raw_content is None or raw_content is b"":
         return None
 
     # TODO: keep mapping not in code (maybe fetch crosswalk from storage?)
     # if fetched from storage should be done once for batch of sha1s
     dictionary = mapping_tool_fn[context]
     translated_metadata = dictionary.translate(raw_content)
-    # print(translated_metadata)
     return translated_metadata
 
 
 def main():
     raw_content = """{"name": "test_name", "unknown_term": "ut"}"""
     raw_content1 = b"""{"name": "test_name",
                         "unknown_term": "ut",
                         "prerequisites" :"packageXYZ"}"""
     result = compute_metadata("npm", raw_content)
     result1 = compute_metadata("maven", raw_content1)
 
     print(result)
     print(result1)
 
 
 if __name__ == "__main__":
     main()
diff --git a/swh/indexer/storage.py b/swh/indexer/storage.py
new file mode 100644
index 0000000..8325954
--- /dev/null
+++ b/swh/indexer/storage.py
@@ -0,0 +1,521 @@
+# Copyright (C) 2015-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+import json
+import dateutil.parser
+import psycopg2
+
+from swh.storage.common import db_transaction_generator, db_transaction
+from swh.storage.exc import StorageDBError
+from .db import Db
+
+from . import converters
+
+
+class IndexerStorage():
+    """SWH Indexer Storage
+
+    """
+    def __init__(self, db):
+        """
+        Args:
+            db_conn: either a libpq connection string, or a psycopg2 connection
+            obj_root: path to the root of the object storage
+
+        """
+        try:
+            if isinstance(db, psycopg2.extensions.connection):
+                self.db = Db(db)
+            else:
+                self.db = Db.connect(db)
+        except psycopg2.OperationalError as e:
+            raise StorageDBError(e)
+
+    def check_config(self, *, check_write):
+        """Check that the storage is configured and ready to go."""
+        # Check permissions on one of the tables
+        with self.db.transaction() as cur:
+            if check_write:
+                check = 'INSERT'
+            else:
+                check = 'SELECT'
+
+            cur.execute(
+                "select has_table_privilege(current_user, 'content_mimetype', %s)",  # noqa
+                (check,)
+            )
+            return cur.fetchone()[0]
+
+        return True
+
+    @db_transaction_generator
+    def content_mimetype_missing(self, mimetypes, cur=None):
+        """List mimetypes missing from storage.
+
+        Args:
+            mimetypes (iterable): iterable of dict with keys:
+
+                - id (bytes): sha1 identifier
+                - tool_name (str): tool used to compute the results
+                - tool_version (str): associated tool's version
+
+        Returns:
+            iterable: an iterable of missing id for the triplets id, tool_name,
+            tool_version
+
+        """
+        db = self.db
+        db.mktemp_content_mimetype_missing(cur)
+        db.copy_to(mimetypes, 'tmp_content_mimetype_missing',
+                   ['id', 'indexer_configuration_id'],
+                   cur)
+        for obj in db.content_mimetype_missing_from_temp(cur):
+            yield obj[0]
+
+    @db_transaction
+    def content_mimetype_add(self, mimetypes, conflict_update=False, cur=None):
+        """Add mimetypes not present in storage.
+
+        Args:
+            mimetypes (iterable): dictionaries with keys:
+
+                - id (bytes): sha1 identifier
+                - mimetype (bytes): raw content's mimetype
+                - encoding (bytes): raw content's encoding
+                - indexer_configuration_id (int): tool's id used to
+                  compute the results
+                - conflict_update: Flag to determine if we want to
+                  overwrite (true) or skip duplicates (false, the default)
+
+        """
+        db = self.db
+        db.mktemp_content_mimetype(cur)
+        db.copy_to(mimetypes, 'tmp_content_mimetype',
+                   ['id', 'mimetype', 'encoding', 'indexer_configuration_id'],
+                   cur)
+        db.content_mimetype_add_from_temp(conflict_update, cur)
+
+    @db_transaction_generator
+    def content_mimetype_get(self, ids, cur=None):
+        db = self.db
+        db.store_tmp_bytea(ids, cur)
+        for c in db.content_mimetype_get_from_temp():
+            yield converters.db_to_mimetype(
+                dict(zip(db.content_mimetype_cols, c)))
+
+    @db_transaction_generator
+    def content_language_missing(self, languages, cur=None):
+        """List languages missing from storage.
+
+        Args:
+            languages (iterable): dictionaries with keys:
+
+                - id (bytes): sha1 identifier
+                - tool_name (str): tool used to compute the results
+                - tool_version (str): associated tool's version
+
+        Returns:
+            iterable: identifiers of missing languages
+
+        """
+        db = self.db
+        db.mktemp_content_language_missing(cur)
+        db.copy_to(languages, 'tmp_content_language_missing',
+                   ['id', 'indexer_configuration_id'], cur)
+        for obj in db.content_language_missing_from_temp(cur):
+            yield obj[0]
+
+    @db_transaction_generator
+    def content_language_get(self, ids, cur=None):
+        db = self.db
+        db.store_tmp_bytea(ids, cur)
+        for c in db.content_language_get_from_temp():
+            yield converters.db_to_language(
+                dict(zip(db.content_language_cols, c)))
+
+    @db_transaction
+    def content_language_add(self, languages, conflict_update=False, cur=None):
+        """Add languages not present in storage.
+
+        Args:
+            languages (iterable): dictionaries with keys:
+
+                - id: sha1
+                - lang: bytes
+
+            conflict_update: Flag to determine if we want to overwrite (true)
+                or skip duplicates (false, the default)
+
+        """
+        db = self.db
+        db.mktemp_content_language(cur)
+        # empty language is mapped to 'unknown'
+        db.copy_to(
+            ({
+                'id': l['id'],
+                'lang': 'unknown' if not l['lang'] else l['lang'],
+                'indexer_configuration_id': l['indexer_configuration_id'],
+            } for l in languages),
+            'tmp_content_language',
+            ['id', 'lang', 'indexer_configuration_id'], cur)
+
+        db.content_language_add_from_temp(conflict_update, cur)
+
+    @db_transaction_generator
+    def content_ctags_missing(self, ctags, cur=None):
+        """List ctags missing from storage.
+
+        Args:
+            ctags (iterable): dicts with keys:
+
+            - id (bytes): sha1 identifier
+            - tool_name (str): tool name used
+            - tool_version (str): associated version
+
+        Returns:
+            an iterable of missing id
+
+        """
+        db = self.db
+
+        db.mktemp_content_ctags_missing(cur)
+        db.copy_to(ctags,
+                   tblname='tmp_content_ctags_missing',
+                   columns=['id', 'indexer_configuration_id'],
+                   cur=cur)
+        for obj in db.content_ctags_missing_from_temp(cur):
+            yield obj[0]
+
+    @db_transaction_generator
+    def content_ctags_get(self, ids, cur=None):
+        """Retrieve ctags per id.
+
+        Args:
+            ids (iterable): sha1 checksums
+
+        """
+        db = self.db
+        db.store_tmp_bytea(ids, cur)
+        for c in db.content_ctags_get_from_temp():
+            yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, c)))
+
+    @db_transaction
+    def content_ctags_add(self, ctags, conflict_update=False, cur=None):
+        """Add ctags not present in storage
+
+        Args:
+            ctags (iterable): dictionaries with keys:
+
+                - id (bytes): sha1
+                - ctags ([list): List of dictionary with keys: name, kind,
+                  line, language
+
+        """
+        db = self.db
+
+        def _convert_ctags(__ctags):
+            """Convert ctags dict to list of ctags.
+
+            """
+            for ctags in __ctags:
+                yield from converters.ctags_to_db(ctags)
+
+        db.mktemp_content_ctags(cur)
+        db.copy_to(list(_convert_ctags(ctags)),
+                   tblname='tmp_content_ctags',
+                   columns=['id', 'name', 'kind', 'line',
+                            'lang', 'indexer_configuration_id'],
+                   cur=cur)
+
+        db.content_ctags_add_from_temp(conflict_update, cur)
+
+    @db_transaction_generator
+    def content_ctags_search(self, expression,
+                             limit=10, last_sha1=None, cur=None):
+        """Search through content's raw ctags symbols.
+
+        Args:
+            expression (str): Expression to search for
+            limit (int): Number of rows to return (default to 10).
+            last_sha1 (str): Offset from which retrieving data (default to '').
+
+        Yields:
+            rows of ctags including id, name, lang, kind, line, etc...
+
+        """
+        db = self.db
+
+        for obj in db.content_ctags_search(expression, last_sha1, limit,
+                                           cur=cur):
+            yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, obj)))
+
+    @db_transaction_generator
+    def content_fossology_license_get(self, ids, cur=None):
+        """Retrieve licenses per id.
+
+        Args:
+            ids (iterable): sha1 checksums
+
+        Yields:
+            list: dictionaries with the following keys:
+
+            - id (bytes)
+            - licenses ([str]): associated licenses for that content
+
+        """
+        db = self.db
+        db.store_tmp_bytea(ids, cur)
+
+        for c in db.content_fossology_license_get_from_temp():
+            license = dict(zip(db.content_fossology_license_cols, c))
+            yield converters.db_to_fossology_license(license)
+
+    @db_transaction
+    def content_fossology_license_add(self, licenses,
+                                      conflict_update=False, cur=None):
+        """Add licenses not present in storage.
+
+        Args:
+            licenses (iterable): dictionaries with keys:
+
+                - id: sha1
+                - license ([bytes]): List of licenses associated to sha1
+                - tool (str): nomossa
+
+            conflict_update: Flag to determine if we want to overwrite (true)
+                or skip duplicates (false, the default)
+
+        Returns:
+            list: content_license entries which failed due to unknown licenses
+
+        """
+        db = self.db
+
+        # Then, we add the correct ones
+        db.mktemp_content_fossology_license(cur)
+        db.copy_to(
+            ({
+                'id': sha1['id'],
+                'indexer_configuration_id': sha1['indexer_configuration_id'],
+                'license': license,
+              } for sha1 in licenses
+                for license in sha1['licenses']),
+            tblname='tmp_content_fossology_license',
+            columns=['id', 'license', 'indexer_configuration_id'],
+            cur=cur)
+        db.content_fossology_license_add_from_temp(conflict_update, cur)
+
+    @db_transaction_generator
+    def content_metadata_missing(self, metadatas, cur=None):
+        """List metadatas missing from storage.
+
+        Args:
+            metadatas (iterable): dictionaries with keys:
+
+                - id (bytes): sha1 identifier
+                - tool_name (str): tool used to compute the results
+                - tool_version (str): associated tool's version
+
+        Returns:
+            iterable: missing ids
+
+        """
+        db = self.db
+        db.mktemp_content_metadata_missing(cur)
+        db.copy_to(metadatas, 'tmp_content_metadata_missing',
+                   ['id', 'indexer_configuration_id'], cur)
+        for obj in db.content_metadata_missing_from_temp(cur):
+            yield obj[0]
+
+    @db_transaction_generator
+    def content_metadata_get(self, ids, cur=None):
+        db = self.db
+        db.store_tmp_bytea(ids, cur)
+        for c in db.content_metadata_get_from_temp():
+            yield converters.db_to_metadata(
+                dict(zip(db.content_metadata_cols, c)))
+
+    @db_transaction
+    def content_metadata_add(self, metadatas, conflict_update=False, cur=None):
+        """Add metadatas not present in storage.
+
+        Args:
+            metadatas (iterable): dictionaries with keys:
+
+                - id: sha1
+                - translated_metadata: bytes / jsonb ?
+
+            conflict_update: Flag to determine if we want to overwrite (true)
+                or skip duplicates (false, the default)
+
+        """
+        db = self.db
+        db.mktemp_content_metadata(cur)
+        # empty metadata is mapped to 'unknown'
+
+        db.copy_to(metadatas, 'tmp_content_metadata',
+                   ['id', 'translated_metadata', 'indexer_configuration_id'],
+                   cur)
+        db.content_metadata_add_from_temp(conflict_update, cur)
+
+    @db_transaction_generator
+    def revision_metadata_missing(self, metadatas, cur=None):
+        """List metadatas missing from storage.
+
+        Args:
+            metadatas (iterable): dictionaries with keys:
+
+               - id (bytes): sha1_git revision identifier
+               - tool_name (str): tool used to compute the results
+               - tool_version (str): associated tool's version
+
+        Returns:
+            iterable: missing ids
+
+        """
+        db = self.db
+        db.mktemp_revision_metadata_missing(cur)
+        db.copy_to(metadatas, 'tmp_revision_metadata_missing',
+                   ['id', 'indexer_configuration_id'], cur)
+        for obj in db.revision_metadata_missing_from_temp(cur):
+            yield obj[0]
+
+    @db_transaction_generator
+    def revision_metadata_get(self, ids, cur=None):
+        db = self.db
+        db.store_tmp_bytea(ids, cur)
+        for c in db.revision_metadata_get_from_temp():
+            yield converters.db_to_metadata(
+                dict(zip(db.revision_metadata_cols, c)))
+
+    @db_transaction
+    def revision_metadata_add(self, metadatas,
+                              conflict_update=False, cur=None):
+        """Add metadatas not present in storage.
+
+        Args:
+            metadatas (iterable): dictionaries with keys:
+
+                - id: sha1_git of revision
+                - translated_metadata: bytes / jsonb ?
+
+            conflict_update: Flag to determine if we want to overwrite (true)
+              or skip duplicates (false, the default)
+
+        """
+        db = self.db
+        db.mktemp_revision_metadata(cur)
+        # empty metadata is mapped to 'unknown'
+
+        db.copy_to(metadatas, 'tmp_revision_metadata',
+                   ['id', 'translated_metadata', 'indexer_configuration_id'],
+                   cur)
+        db.revision_metadata_add_from_temp(conflict_update, cur)
+
+    @db_transaction
+    def origin_metadata_add(self, origin_id, ts, provider, tool, metadata,
+                            cur=None):
+        """ Add an origin_metadata for the origin at ts with provenance and
+        metadata.
+
+        Args:
+            origin_id (int): the origin's id for which the metadata is added
+            ts (datetime): timestamp of the found metadata
+            provider (int): the provider of metadata (ex:'hal')
+            tool (int): tool used to extract metadata
+            metadata (jsonb): the metadata retrieved at the time and location
+
+        Returns:
+            id (int): the origin_metadata unique id
+        """
+        if isinstance(ts, str):
+            ts = dateutil.parser.parse(ts)
+
+        return self.db.origin_metadata_add(origin_id, ts, provider, tool,
+                                           metadata, cur)
+
+    @db_transaction_generator
+    def origin_metadata_get_by(self, origin_id, provider_type=None, cur=None):
+        """Retrieve list of all origin_metadata entries for the origin_id
+
+        Args:
+            origin_id (int): the unique origin identifier
+            provider_type (str): (optional) type of provider
+
+        Returns:
+            list of dicts: the origin_metadata dictionary with the keys:
+
+            - id (int): origin_metadata's id
+            - origin_id (int): origin's id
+            - discovery_date (datetime): timestamp of discovery
+            - tool_id (int): metadata's extracting tool
+            - metadata (jsonb)
+            - provider_id (int): metadata's provider
+            - provider_name (str)
+            - provider_type (str)
+            - provider_url (str)
+
+        """
+        db = self.db
+        for line in db.origin_metadata_get_by(origin_id, provider_type, cur):
+            yield dict(zip(db.origin_metadata_get_cols, line))
+
+    @db_transaction_generator
+    def indexer_configuration_add(self, tools, cur=None):
+        """Add new tools to the storage.
+
+        Args:
+            tools ([dict]): List of dictionary representing tool to
+            insert in the db. Dictionary with the following keys::
+
+                tool_name (str): tool's name
+                tool_version (str): tool's version
+                tool_configuration (dict): tool's configuration (free form
+                                           dict)
+
+        Returns:
+            List of dict inserted in the db (holding the id key as
+            well).  The order of the list is not guaranteed to match
+            the order of the initial list.
+
+        """
+        db = self.db
+        db.mktemp_indexer_configuration(cur)
+        db.copy_to(tools, 'tmp_indexer_configuration',
+                   ['tool_name', 'tool_version', 'tool_configuration'],
+                   cur)
+
+        tools = db.indexer_configuration_add_from_temp(cur)
+        for line in tools:
+            yield dict(zip(db.indexer_configuration_cols, line))
+
+    @db_transaction
+    def indexer_configuration_get(self, tool, cur=None):
+        """Retrieve tool information.
+
+        Args:
+            tool (dict): Dictionary representing a tool with the
+            following keys::
+
+                tool_name (str): tool's name
+                tool_version (str): tool's version
+                tool_configuration (dict): tool's configuration (free form
+                                           dict)
+
+        Returns:
+            The identifier of the tool if it exists, None otherwise.
+
+        """
+        db = self.db
+        tool_conf = tool['tool_configuration']
+        if isinstance(tool_conf, dict):
+            tool_conf = json.dumps(tool_conf)
+        idx = db.indexer_configuration_get(tool['tool_name'],
+                                           tool['tool_version'],
+                                           tool_conf)
+        if not idx:
+            return None
+        return dict(zip(self.db.indexer_configuration_cols, idx))
diff --git a/swh/indexer/tests/__init__.py b/swh/indexer/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/indexer/tests/common.py b/swh/indexer/tests/common.py
new file mode 100644
index 0000000..e397b48
--- /dev/null
+++ b/swh/indexer/tests/common.py
@@ -0,0 +1,56 @@
+# Copyright (C) 2015-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pathlib
+
+from swh.indexer import get_storage
+
+
+class StorageTestFixture:
+    """Mix this in a test subject class to get Storage testing support.
+
+    This fixture requires to come before DbTestFixture in the inheritance list
+    as it uses its methods to setup its own internal database.
+
+    Usage example:
+
+        class TestStorage(StorageTestFixture, DbTestFixture):
+            ...
+    """
+    TEST_STORAGE_DB_NAME = 'softwareheritage-test-indexer'
+
+    @classmethod
+    def setUpClass(cls):
+        if not hasattr(cls, 'DB_TEST_FIXTURE_IMPORTED'):
+            raise RuntimeError("StorageTestFixture needs to be followed by "
+                               "DbTestFixture in the inheritance list.")
+
+        test_dir = pathlib.Path(__file__).absolute().parent
+        test_data_dir = test_dir / '../../../../swh-storage-testdata'
+        test_db_dump = (test_data_dir / 'dumps/swh.dump').absolute()
+        cls.add_db(cls.TEST_STORAGE_DB_NAME, str(test_db_dump), 'pg_dump')
+        super().setUpClass()
+
+    def setUp(self):
+        super().setUp()
+
+        self.storage_config = {
+            'cls': 'local',
+            'args': {
+                'db': self.test_db[self.TEST_STORAGE_DB_NAME].conn,
+            },
+        }
+        self.storage = get_storage(**self.storage_config)
+
+    def tearDown(self):
+        self.objtmp.cleanup()
+        super().tearDown()
+
+    def reset_storage_tables(self):
+        excluded = {'indexer_configuration'}
+        self.reset_db_tables(self.TEST_STORAGE_DB_NAME, excluded=excluded)
+
+        db = self.test_db[self.TEST_STORAGE_DB_NAME]
+        db.conn.commit()
diff --git a/swh/indexer/tests/test_converters.py b/swh/indexer/tests/test_converters.py
new file mode 100644
index 0000000..61410eb
--- /dev/null
+++ b/swh/indexer/tests/test_converters.py
@@ -0,0 +1,199 @@
+# Copyright (C) 2015-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+
+from nose.tools import istest
+from nose.plugins.attrib import attr
+
+from swh.indexer import converters
+
+
+@attr('!db')
+class TestConverters(unittest.TestCase):
+    def setUp(self):
+        self.maxDiff = None
+
+    @istest
+    def ctags_to_db(self):
+        input_ctag = {
+            'id': b'some-id',
+            'indexer_configuration_id': 100,
+            'ctags': [
+                {
+                    'name': 'some-name',
+                    'kind': 'some-kind',
+                    'line': 10,
+                    'lang': 'Yaml',
+                }, {
+                    'name': 'main',
+                    'kind': 'function',
+                    'line': 12,
+                    'lang': 'Yaml',
+                },
+            ]
+        }
+
+        expected_ctags = [
+            {
+                'id': b'some-id',
+                'name': 'some-name',
+                'kind': 'some-kind',
+                'line': 10,
+                'lang': 'Yaml',
+                'indexer_configuration_id': 100,
+            }, {
+                'id': b'some-id',
+                'name': 'main',
+                'kind': 'function',
+                'line': 12,
+                'lang': 'Yaml',
+                'indexer_configuration_id': 100,
+            }]
+
+        # when
+        actual_ctags = list(converters.ctags_to_db(input_ctag))
+
+        # then
+        self.assertEquals(actual_ctags, expected_ctags)
+
+    @istest
+    def db_to_ctags(self):
+        input_ctags = {
+            'id': b'some-id',
+            'name': 'some-name',
+            'kind': 'some-kind',
+            'line': 10,
+            'lang': 'Yaml',
+            'tool_id': 200,
+            'tool_name': 'some-toolname',
+            'tool_version': 'some-toolversion',
+            'tool_configuration': {}
+        }
+        expected_ctags = {
+            'id': b'some-id',
+            'name': 'some-name',
+            'kind': 'some-kind',
+            'line': 10,
+            'lang': 'Yaml',
+            'tool': {
+                'id': 200,
+                'name': 'some-toolname',
+                'version': 'some-toolversion',
+                'configuration': {},
+            }
+        }
+
+        # when
+        actual_ctags = converters.db_to_ctags(input_ctags)
+
+        # then
+        self.assertEquals(actual_ctags, expected_ctags)
+
+    @istest
+    def db_to_mimetype(self):
+        input_mimetype = {
+            'id': b'some-id',
+            'tool_id': 10,
+            'tool_name': 'some-toolname',
+            'tool_version': 'some-toolversion',
+            'tool_configuration': {},
+            'encoding': b'ascii',
+            'mimetype': b'text/plain',
+        }
+
+        expected_mimetype = {
+            'id': b'some-id',
+            'encoding': b'ascii',
+            'mimetype': b'text/plain',
+            'tool': {
+                'id': 10,
+                'name': 'some-toolname',
+                'version': 'some-toolversion',
+                'configuration': {},
+            }
+        }
+
+        actual_mimetype = converters.db_to_mimetype(input_mimetype)
+
+        self.assertEquals(actual_mimetype, expected_mimetype)
+
+    @istest
+    def db_to_language(self):
+        input_language = {
+            'id': b'some-id',
+            'tool_id': 20,
+            'tool_name': 'some-toolname',
+            'tool_version': 'some-toolversion',
+            'tool_configuration': {},
+            'lang': b'css',
+        }
+
+        expected_language = {
+            'id': b'some-id',
+            'lang': b'css',
+            'tool': {
+                'id': 20,
+                'name': 'some-toolname',
+                'version': 'some-toolversion',
+                'configuration': {},
+            }
+        }
+
+        actual_language = converters.db_to_language(input_language)
+
+        self.assertEquals(actual_language, expected_language)
+
+    @istest
+    def db_to_fossology_license(self):
+        input_license = {
+            'id': b'some-id',
+            'tool_id': 20,
+            'tool_name': 'nomossa',
+            'tool_version': '5.22',
+            'tool_configuration': {},
+            'licenses': ['GPL2.0'],
+        }
+
+        expected_license = {
+            'id': b'some-id',
+            'licenses': ['GPL2.0'],
+            'tool': {
+                'id': 20,
+                'name': 'nomossa',
+                'version': '5.22',
+                'configuration': {},
+            }
+        }
+
+        actual_license = converters.db_to_fossology_license(input_license)
+
+        self.assertEquals(actual_license, expected_license)
+
+    @istest
+    def db_to_metadata(self):
+        input_metadata = {
+            'id': b'some-id',
+            'tool_id': 20,
+            'tool_name': 'some-toolname',
+            'tool_version': 'some-toolversion',
+            'tool_configuration': {},
+            'translated_metadata': b'translated_metadata',
+        }
+
+        expected_metadata = {
+            'id': b'some-id',
+            'translated_metadata': b'translated_metadata',
+            'tool': {
+                'id': 20,
+                'name': 'some-toolname',
+                'version': 'some-toolversion',
+                'configuration': {},
+            }
+        }
+
+        actual_metadata = converters.db_to_metadata(input_metadata)
+
+        self.assertEquals(actual_metadata, expected_metadata)
diff --git a/swh/indexer/tests/test_storage.py b/swh/indexer/tests/test_storage.py
new file mode 100644
index 0000000..37172ff
--- /dev/null
+++ b/swh/indexer/tests/test_storage.py
@@ -0,0 +1,1439 @@
+# Copyright (C) 2015-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+
+from nose.tools import istest
+from nose.plugins.attrib import attr
+from swh.model.hashutil import hash_to_bytes
+
+from swh.core.tests.db_testing import DbTestFixture
+from .test_utils import StorageTestFixture
+
+
+@attr('db')
+class IndexerTestStorage(StorageTestFixture, DbTestFixture, unittest.TestCase):
+    """Base class for Indexer Storage testing.
+
+    """
+    def setUp(self):
+        super().setUp()
+
+        db = self.test_db[self.TEST_STORAGE_DB_NAME]
+        self.conn = db.conn
+        self.cursor = db.cursor
+
+        self.sha1_1 = hash_to_bytes('34973274ccef6ab4dfaaf86599792fa9c3fe4689')
+        self.sha1_2 = hash_to_bytes('61c2b3a30496d329e21af70dd2d7e097046d07b7')
+        self.revision_id_1 = hash_to_bytes(
+            '7026b7c1a2af56521e951c01ed20f255fa054238')
+        self.revision_id_2 = hash_to_bytes(
+            '7026b7c1a2af56521e9587659012345678904321')
+
+    def tearDown(self):
+        self.reset_storage_tables()
+        super().tearDown()
+
+    def fetch_tools(self):
+        tools = {}
+        self.cursor.execute('''
+            select tool_name, id, tool_version, tool_configuration
+            from indexer_configuration
+            order by id''')
+        for row in self.cursor.fetchall():
+            key = row[0]
+            while key in tools:
+                key = '_' + key
+            tools[key] = {
+                'id': row[1],
+                'name': row[0],
+                'version': row[2],
+                'configuration': row[3]
+            }
+
+        return tools
+
+    @istest
+    def check_config(self):
+        self.assertTrue(self.storage.check_config(check_write=True))
+        self.assertTrue(self.storage.check_config(check_write=False))
+
+    @istest
+    def content_mimetype_missing(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['file']['id']
+
+        mimetypes = [
+            {
+                'id': self.sha1_1,
+                'indexer_configuration_id': tool_id,
+            },
+            {
+                'id': self.sha1_2,
+                'indexer_configuration_id': tool_id,
+            }]
+
+        # when
+        actual_missing = self.storage.content_mimetype_missing(mimetypes)
+
+        # then
+        self.assertEqual(list(actual_missing), [
+            self.sha1_1,
+            self.sha1_2,
+        ])
+
+        # given
+        self.storage.content_mimetype_add([{
+            'id': self.sha1_2,
+            'mimetype': b'text/plain',
+            'encoding': b'utf-8',
+            'indexer_configuration_id': tool_id,
+        }])
+
+        # when
+        actual_missing = self.storage.content_mimetype_missing(mimetypes)
+
+        # then
+        self.assertEqual(list(actual_missing), [self.sha1_1])
+
+    @istest
+    def content_mimetype_add__drop_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['file']['id']
+
+        mimetype_v1 = {
+            'id': self.sha1_2,
+            'mimetype': b'text/plain',
+            'encoding': b'utf-8',
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.content_mimetype_add([mimetype_v1])
+
+        # when
+        actual_mimetypes = list(self.storage.content_mimetype_get(
+            [self.sha1_2]))
+
+        # then
+        expected_mimetypes_v1 = [{
+            'id': self.sha1_2,
+            'mimetype': b'text/plain',
+            'encoding': b'utf-8',
+            'tool': tools['file'],
+        }]
+        self.assertEqual(actual_mimetypes, expected_mimetypes_v1)
+
+        # given
+        mimetype_v2 = mimetype_v1.copy()
+        mimetype_v2.update({
+            'mimetype': b'text/html',
+            'encoding': b'us-ascii',
+        })
+
+        self.storage.content_mimetype_add([mimetype_v2])
+
+        actual_mimetypes = list(self.storage.content_mimetype_get(
+            [self.sha1_2]))
+
+        # mimetype did not change as the v2 was dropped.
+        self.assertEqual(actual_mimetypes, expected_mimetypes_v1)
+
+    @istest
+    def content_mimetype_add__update_in_place_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['file']['id']
+
+        mimetype_v1 = {
+            'id': self.sha1_2,
+            'mimetype': b'text/plain',
+            'encoding': b'utf-8',
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.content_mimetype_add([mimetype_v1])
+
+        # when
+        actual_mimetypes = list(self.storage.content_mimetype_get(
+            [self.sha1_2]))
+
+        expected_mimetypes_v1 = [{
+            'id': self.sha1_2,
+            'mimetype': b'text/plain',
+            'encoding': b'utf-8',
+            'tool': tools['file'],
+        }]
+
+        # then
+        self.assertEqual(actual_mimetypes, expected_mimetypes_v1)
+
+        # given
+        mimetype_v2 = mimetype_v1.copy()
+        mimetype_v2.update({
+            'mimetype': b'text/html',
+            'encoding': b'us-ascii',
+        })
+
+        self.storage.content_mimetype_add([mimetype_v2], conflict_update=True)
+
+        actual_mimetypes = list(self.storage.content_mimetype_get(
+            [self.sha1_2]))
+
+        expected_mimetypes_v2 = [{
+            'id': self.sha1_2,
+            'mimetype': b'text/html',
+            'encoding': b'us-ascii',
+            'tool': {
+                'id': 2,
+                'name': 'file',
+                'version': '5.22',
+                'configuration': {'command_line': 'file --mime <filepath>'}
+            }
+        }]
+
+        # mimetype did change as the v2 was used to overwrite v1
+        self.assertEqual(actual_mimetypes, expected_mimetypes_v2)
+
+    @istest
+    def content_mimetype_get(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['file']['id']
+
+        mimetypes = [self.sha1_2, self.sha1_1]
+
+        mimetype1 = {
+            'id': self.sha1_2,
+            'mimetype': b'text/plain',
+            'encoding': b'utf-8',
+            'indexer_configuration_id': tool_id,
+        }
+
+        # when
+        self.storage.content_mimetype_add([mimetype1])
+
+        # then
+        actual_mimetypes = list(self.storage.content_mimetype_get(mimetypes))
+
+        # then
+        expected_mimetypes = [{
+            'id': self.sha1_2,
+            'mimetype': b'text/plain',
+            'encoding': b'utf-8',
+            'tool': tools['file']
+        }]
+
+        self.assertEqual(actual_mimetypes, expected_mimetypes)
+
+    @istest
+    def content_language_missing(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['pygments']['id']
+
+        languages = [
+            {
+                'id': self.sha1_2,
+                'indexer_configuration_id': tool_id,
+            },
+            {
+                'id': self.sha1_1,
+                'indexer_configuration_id': tool_id,
+            }
+        ]
+
+        # when
+        actual_missing = list(self.storage.content_language_missing(languages))
+
+        # then
+        self.assertEqual(list(actual_missing), [
+            self.sha1_2,
+            self.sha1_1,
+        ])
+
+        # given
+        self.storage.content_language_add([{
+            'id': self.sha1_2,
+            'lang': 'haskell',
+            'indexer_configuration_id': tool_id,
+        }])
+
+        # when
+        actual_missing = list(self.storage.content_language_missing(languages))
+
+        # then
+        self.assertEqual(actual_missing, [self.sha1_1])
+
+    @istest
+    def content_language_get(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['pygments']['id']
+
+        language1 = {
+            'id': self.sha1_2,
+            'lang': 'common-lisp',
+            'indexer_configuration_id': tool_id,
+        }
+
+        # when
+        self.storage.content_language_add([language1])
+
+        # then
+        actual_languages = list(self.storage.content_language_get(
+            [self.sha1_2, self.sha1_1]))
+
+        # then
+        expected_languages = [{
+            'id': self.sha1_2,
+            'lang': 'common-lisp',
+            'tool': tools['pygments']
+        }]
+
+        self.assertEqual(actual_languages, expected_languages)
+
+    @istest
+    def content_language_add__drop_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['pygments']['id']
+
+        language_v1 = {
+            'id': self.sha1_2,
+            'lang': 'emacslisp',
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.content_language_add([language_v1])
+
+        # when
+        actual_languages = list(self.storage.content_language_get(
+            [self.sha1_2]))
+
+        # then
+        expected_languages_v1 = [{
+            'id': self.sha1_2,
+            'lang': 'emacslisp',
+            'tool': tools['pygments']
+        }]
+        self.assertEqual(actual_languages, expected_languages_v1)
+
+        # given
+        language_v2 = language_v1.copy()
+        language_v2.update({
+            'lang': 'common-lisp',
+        })
+
+        self.storage.content_language_add([language_v2])
+
+        actual_languages = list(self.storage.content_language_get(
+            [self.sha1_2]))
+
+        # language did not change as the v2 was dropped.
+        self.assertEqual(actual_languages, expected_languages_v1)
+
+    @istest
+    def content_language_add__update_in_place_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['pygments']['id']
+
+        language_v1 = {
+            'id': self.sha1_2,
+            'lang': 'common-lisp',
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.content_language_add([language_v1])
+
+        # when
+        actual_languages = list(self.storage.content_language_get(
+            [self.sha1_2]))
+
+        # then
+        expected_languages_v1 = [{
+            'id': self.sha1_2,
+            'lang': 'common-lisp',
+            'tool': tools['pygments']
+        }]
+        self.assertEqual(actual_languages, expected_languages_v1)
+
+        # given
+        language_v2 = language_v1.copy()
+        language_v2.update({
+            'lang': 'emacslisp',
+        })
+
+        self.storage.content_language_add([language_v2], conflict_update=True)
+
+        actual_languages = list(self.storage.content_language_get(
+            [self.sha1_2]))
+
+        # language did not change as the v2 was dropped.
+        expected_languages_v2 = [{
+            'id': self.sha1_2,
+            'lang': 'emacslisp',
+            'tool': tools['pygments']
+        }]
+
+        # language did change as the v2 was used to overwrite v1
+        self.assertEqual(actual_languages, expected_languages_v2)
+
+    @istest
+    def content_ctags_missing(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['universal-ctags']['id']
+
+        ctags = [
+            {
+                'id': self.sha1_2,
+                'indexer_configuration_id': tool_id,
+            },
+            {
+                'id': self.sha1_1,
+                'indexer_configuration_id': tool_id,
+            }
+        ]
+
+        # when
+        actual_missing = self.storage.content_ctags_missing(ctags)
+
+        # then
+        self.assertEqual(list(actual_missing), [
+            self.sha1_2,
+            self.sha1_1
+        ])
+
+        # given
+        self.storage.content_ctags_add([
+            {
+                'id': self.sha1_2,
+                'indexer_configuration_id': tool_id,
+                'ctags': [{
+                    'name': 'done',
+                    'kind': 'variable',
+                    'line': 119,
+                    'lang': 'OCaml',
+                }]
+            },
+        ])
+
+        # when
+        actual_missing = self.storage.content_ctags_missing(ctags)
+
+        # then
+        self.assertEqual(list(actual_missing), [self.sha1_1])
+
+    @istest
+    def content_ctags_get(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['universal-ctags']['id']
+
+        ctags = [self.sha1_2, self.sha1_1]
+
+        ctag1 = {
+            'id': self.sha1_2,
+            'indexer_configuration_id': tool_id,
+            'ctags': [
+                {
+                    'name': 'done',
+                    'kind': 'variable',
+                    'line': 100,
+                    'lang': 'Python',
+                },
+                {
+                    'name': 'main',
+                    'kind': 'function',
+                    'line': 119,
+                    'lang': 'Python',
+                }]
+        }
+
+        # when
+        self.storage.content_ctags_add([ctag1])
+
+        # then
+        actual_ctags = list(self.storage.content_ctags_get(ctags))
+
+        # then
+
+        expected_ctags = [
+            {
+                'id': self.sha1_2,
+                'tool': tools['universal-ctags'],
+                'name': 'done',
+                'kind': 'variable',
+                'line': 100,
+                'lang': 'Python',
+            },
+            {
+                'id': self.sha1_2,
+                'tool': tools['universal-ctags'],
+                'name': 'main',
+                'kind': 'function',
+                'line': 119,
+                'lang': 'Python',
+            }
+        ]
+
+        self.assertEqual(actual_ctags, expected_ctags)
+
+    @istest
+    def content_ctags_search(self):
+        # 1. given
+        tools = self.fetch_tools()
+        tool = tools['universal-ctags']
+        tool_id = tool['id']
+
+        ctag1 = {
+            'id': self.sha1_1,
+            'indexer_configuration_id': tool_id,
+            'ctags': [
+                {
+                    'name': 'hello',
+                    'kind': 'function',
+                    'line': 133,
+                    'lang': 'Python',
+                },
+                {
+                    'name': 'counter',
+                    'kind': 'variable',
+                    'line': 119,
+                    'lang': 'Python',
+                },
+            ]
+        }
+
+        ctag2 = {
+            'id': self.sha1_2,
+            'indexer_configuration_id': tool_id,
+            'ctags': [
+                {
+                    'name': 'hello',
+                    'kind': 'variable',
+                    'line': 100,
+                    'lang': 'C',
+                },
+            ]
+        }
+
+        self.storage.content_ctags_add([ctag1, ctag2])
+
+        # 1. when
+        actual_ctags = list(self.storage.content_ctags_search('hello',
+                                                              limit=1))
+
+        # 1. then
+        self.assertEqual(actual_ctags, [
+            {
+                'id': ctag1['id'],
+                'tool': tool,
+                'name': 'hello',
+                'kind': 'function',
+                'line': 133,
+                'lang': 'Python',
+            }
+        ])
+
+        # 2. when
+        actual_ctags = list(self.storage.content_ctags_search(
+            'hello',
+            limit=1,
+            last_sha1=ctag1['id']))
+
+        # 2. then
+        self.assertEqual(actual_ctags, [
+            {
+                'id': ctag2['id'],
+                'tool': tool,
+                'name': 'hello',
+                'kind': 'variable',
+                'line': 100,
+                'lang': 'C',
+            }
+        ])
+
+        # 3. when
+        actual_ctags = list(self.storage.content_ctags_search('hello'))
+
+        # 3. then
+        self.assertEqual(actual_ctags, [
+            {
+                'id': ctag1['id'],
+                'tool': tool,
+                'name': 'hello',
+                'kind': 'function',
+                'line': 133,
+                'lang': 'Python',
+            },
+            {
+                'id': ctag2['id'],
+                'tool': tool,
+                'name': 'hello',
+                'kind': 'variable',
+                'line': 100,
+                'lang': 'C',
+            },
+        ])
+
+        # 4. when
+        actual_ctags = list(self.storage.content_ctags_search('counter'))
+
+        # then
+        self.assertEqual(actual_ctags, [{
+            'id': ctag1['id'],
+            'tool': tool,
+            'name': 'counter',
+            'kind': 'variable',
+            'line': 119,
+            'lang': 'Python',
+        }])
+
+    @istest
+    def content_ctags_search_no_result(self):
+        actual_ctags = list(self.storage.content_ctags_search('counter'))
+
+        self.assertEquals(actual_ctags, [])
+
+    @istest
+    def content_ctags_add__add_new_ctags_added(self):
+        # given
+        tools = self.fetch_tools()
+        tool = tools['universal-ctags']
+        tool_id = tool['id']
+
+        ctag_v1 = {
+            'id': self.sha1_2,
+            'indexer_configuration_id': tool_id,
+            'ctags': [{
+                'name': 'done',
+                'kind': 'variable',
+                'line': 100,
+                'lang': 'Scheme',
+            }]
+        }
+
+        # given
+        self.storage.content_ctags_add([ctag_v1])
+        self.storage.content_ctags_add([ctag_v1])  # conflict does nothing
+
+        # when
+        actual_ctags = list(self.storage.content_ctags_get(
+            [self.sha1_2]))
+
+        # then
+        expected_ctags = [{
+            'id': self.sha1_2,
+            'name': 'done',
+            'kind': 'variable',
+            'line': 100,
+            'lang': 'Scheme',
+            'tool': tool,
+        }]
+
+        self.assertEqual(actual_ctags, expected_ctags)
+
+        # given
+        ctag_v2 = ctag_v1.copy()
+        ctag_v2.update({
+            'ctags': [
+                {
+                    'name': 'defn',
+                    'kind': 'function',
+                    'line': 120,
+                    'lang': 'Scheme',
+                }
+            ]
+        })
+
+        self.storage.content_ctags_add([ctag_v2])
+
+        expected_ctags = [
+            {
+                'id': self.sha1_2,
+                'name': 'done',
+                'kind': 'variable',
+                'line': 100,
+                'lang': 'Scheme',
+                'tool': tool,
+            }, {
+                'id': self.sha1_2,
+                'name': 'defn',
+                'kind': 'function',
+                'line': 120,
+                'lang': 'Scheme',
+                'tool': tool,
+            }
+        ]
+
+        actual_ctags = list(self.storage.content_ctags_get(
+            [self.sha1_2]))
+
+        self.assertEqual(actual_ctags, expected_ctags)
+
+    @istest
+    def content_ctags_add__update_in_place(self):
+        # given
+        tools = self.fetch_tools()
+        tool = tools['universal-ctags']
+        tool_id = tool['id']
+
+        ctag_v1 = {
+            'id': self.sha1_2,
+            'indexer_configuration_id': tool_id,
+            'ctags': [{
+                'name': 'done',
+                'kind': 'variable',
+                'line': 100,
+                'lang': 'Scheme',
+            }]
+        }
+
+        # given
+        self.storage.content_ctags_add([ctag_v1])
+
+        # when
+        actual_ctags = list(self.storage.content_ctags_get(
+            [self.sha1_2]))
+
+        # then
+        expected_ctags = [
+            {
+                'id': self.sha1_2,
+                'name': 'done',
+                'kind': 'variable',
+                'line': 100,
+                'lang': 'Scheme',
+                'tool': tool
+            }
+        ]
+        self.assertEqual(actual_ctags, expected_ctags)
+
+        # given
+        ctag_v2 = ctag_v1.copy()
+        ctag_v2.update({
+            'ctags': [
+                {
+                    'name': 'done',
+                    'kind': 'variable',
+                    'line': 100,
+                    'lang': 'Scheme',
+                },
+                {
+                    'name': 'defn',
+                    'kind': 'function',
+                    'line': 120,
+                    'lang': 'Scheme',
+                }
+            ]
+        })
+
+        self.storage.content_ctags_add([ctag_v2], conflict_update=True)
+
+        actual_ctags = list(self.storage.content_ctags_get(
+            [self.sha1_2]))
+
+        # ctag did change as the v2 was used to overwrite v1
+        expected_ctags = [
+            {
+                'id': self.sha1_2,
+                'name': 'done',
+                'kind': 'variable',
+                'line': 100,
+                'lang': 'Scheme',
+                'tool': tool,
+            },
+            {
+                'id': self.sha1_2,
+                'name': 'defn',
+                'kind': 'function',
+                'line': 120,
+                'lang': 'Scheme',
+                'tool': tool,
+            }
+        ]
+        self.assertEqual(actual_ctags, expected_ctags)
+
+    @istest
+    def content_fossology_license_get(self):
+        # given
+        tools = self.fetch_tools()
+        tool = tools['nomos']
+        tool_id = tool['id']
+
+        license1 = {
+            'id': self.sha1_1,
+            'licenses': ['GPL-2.0+'],
+            'indexer_configuration_id': tool_id,
+        }
+
+        # when
+        self.storage.content_fossology_license_add([license1])
+
+        # then
+        actual_licenses = list(self.storage.content_fossology_license_get(
+            [self.sha1_2, self.sha1_1]))
+
+        expected_license = {
+            'id': self.sha1_1,
+            'licenses': ['GPL-2.0+'],
+            'tool': tool,
+        }
+
+        # then
+        self.assertEqual(actual_licenses, [expected_license])
+
+    @istest
+    def content_fossology_license_add__new_license_added(self):
+        # given
+        tools = self.fetch_tools()
+        tool = tools['nomos']
+        tool_id = tool['id']
+
+        license_v1 = {
+            'id': self.sha1_1,
+            'licenses': ['Apache-2.0'],
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.content_fossology_license_add([license_v1])
+        # conflict does nothing
+        self.storage.content_fossology_license_add([license_v1])
+
+        # when
+        actual_licenses = list(self.storage.content_fossology_license_get(
+            [self.sha1_1]))
+
+        # then
+        expected_license = {
+            'id': self.sha1_1,
+            'licenses': ['Apache-2.0'],
+            'tool': tool,
+        }
+        self.assertEqual(actual_licenses, [expected_license])
+
+        # given
+        license_v2 = license_v1.copy()
+        license_v2.update({
+            'licenses': ['BSD-2-Clause'],
+        })
+
+        self.storage.content_fossology_license_add([license_v2])
+
+        actual_licenses = list(self.storage.content_fossology_license_get(
+            [self.sha1_1]))
+
+        expected_license.update({
+            'licenses': ['Apache-2.0', 'BSD-2-Clause'],
+        })
+
+        # license did not change as the v2 was dropped.
+        self.assertEqual(actual_licenses, [expected_license])
+
+    @istest
+    def content_fossology_license_add__update_in_place_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool = tools['nomos']
+        tool_id = tool['id']
+
+        license_v1 = {
+            'id': self.sha1_1,
+            'licenses': ['CECILL'],
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.content_fossology_license_add([license_v1])
+        # conflict does nothing
+        self.storage.content_fossology_license_add([license_v1])
+
+        # when
+        actual_licenses = list(self.storage.content_fossology_license_get(
+            [self.sha1_1]))
+
+        # then
+        expected_license = {
+            'id': self.sha1_1,
+            'licenses': ['CECILL'],
+            'tool': tool,
+        }
+        self.assertEqual(actual_licenses, [expected_license])
+
+        # given
+        license_v2 = license_v1.copy()
+        license_v2.update({
+            'licenses': ['CECILL-2.0']
+        })
+
+        self.storage.content_fossology_license_add([license_v2],
+                                                   conflict_update=True)
+
+        actual_licenses = list(self.storage.content_fossology_license_get(
+            [self.sha1_1]))
+
+        # license did change as the v2 was used to overwrite v1
+        expected_license.update({
+            'licenses': ['CECILL-2.0']
+        })
+        self.assertEqual(actual_licenses, [expected_license])
+
+    @istest
+    def content_metadata_missing(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['swh-metadata-translator']['id']
+
+        metadatas = [
+            {
+                'id': self.sha1_2,
+                'indexer_configuration_id': tool_id,
+            },
+            {
+                'id': self.sha1_1,
+                'indexer_configuration_id': tool_id,
+            }
+        ]
+
+        # when
+        actual_missing = list(self.storage.content_metadata_missing(metadatas))
+
+        # then
+        self.assertEqual(list(actual_missing), [
+            self.sha1_2,
+            self.sha1_1,
+        ])
+
+        # given
+        self.storage.content_metadata_add([{
+            'id': self.sha1_2,
+            'translated_metadata': {
+                'other': {},
+                'codeRepository': {
+                    'type': 'git',
+                    'url': 'https://github.com/moranegg/metadata_test'
+                },
+                'description': 'Simple package.json test for indexer',
+                'name': 'test_metadata',
+                'version': '0.0.1'
+            },
+            'indexer_configuration_id': tool_id
+        }])
+
+        # when
+        actual_missing = list(self.storage.content_metadata_missing(metadatas))
+
+        # then
+        self.assertEqual(actual_missing, [self.sha1_1])
+
+    @istest
+    def content_metadata_get(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['swh-metadata-translator']['id']
+
+        metadata1 = {
+            'id': self.sha1_2,
+            'translated_metadata': {
+                'other': {},
+                'codeRepository': {
+                    'type': 'git',
+                    'url': 'https://github.com/moranegg/metadata_test'
+                },
+                'description': 'Simple package.json test for indexer',
+                'name': 'test_metadata',
+                'version': '0.0.1'
+            },
+            'indexer_configuration_id': tool_id,
+        }
+
+        # when
+        self.storage.content_metadata_add([metadata1])
+
+        # then
+        actual_metadatas = list(self.storage.content_metadata_get(
+            [self.sha1_2, self.sha1_1]))
+
+        expected_metadatas = [{
+            'id': self.sha1_2,
+            'translated_metadata': {
+                'other': {},
+                'codeRepository': {
+                    'type': 'git',
+                    'url': 'https://github.com/moranegg/metadata_test'
+                },
+                'description': 'Simple package.json test for indexer',
+                'name': 'test_metadata',
+                'version': '0.0.1'
+            },
+            'tool': tools['swh-metadata-translator']
+        }]
+
+        self.assertEqual(actual_metadatas, expected_metadatas)
+
+    @istest
+    def content_metadata_add_drop_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['swh-metadata-translator']['id']
+
+        metadata_v1 = {
+            'id': self.sha1_2,
+            'translated_metadata': {
+                'other': {},
+                'name': 'test_metadata',
+                'version': '0.0.1'
+            },
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.content_metadata_add([metadata_v1])
+
+        # when
+        actual_metadatas = list(self.storage.content_metadata_get(
+            [self.sha1_2]))
+
+        expected_metadatas_v1 = [{
+            'id': self.sha1_2,
+            'translated_metadata': {
+                'other': {},
+                'name': 'test_metadata',
+                'version': '0.0.1'
+            },
+            'tool': tools['swh-metadata-translator']
+        }]
+
+        self.assertEqual(actual_metadatas, expected_metadatas_v1)
+
+        # given
+        metadata_v2 = metadata_v1.copy()
+        metadata_v2.update({
+            'translated_metadata': {
+                'other': {},
+                'name': 'test_drop_duplicated_metadata',
+                'version': '0.0.1'
+            },
+        })
+
+        self.storage.content_metadata_add([metadata_v2])
+
+        # then
+        actual_metadatas = list(self.storage.content_metadata_get(
+            [self.sha1_2]))
+
+        # metadata did not change as the v2 was dropped.
+        self.assertEqual(actual_metadatas, expected_metadatas_v1)
+
+    @istest
+    def content_metadata_add_update_in_place_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['swh-metadata-translator']['id']
+
+        metadata_v1 = {
+            'id': self.sha1_2,
+            'translated_metadata': {
+                'other': {},
+                'name': 'test_metadata',
+                'version': '0.0.1'
+            },
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.content_metadata_add([metadata_v1])
+
+        # when
+        actual_metadatas = list(self.storage.content_metadata_get(
+            [self.sha1_2]))
+
+        # then
+        expected_metadatas_v1 = [{
+            'id': self.sha1_2,
+            'translated_metadata': {
+                'other': {},
+                'name': 'test_metadata',
+                'version': '0.0.1'
+            },
+            'tool': tools['swh-metadata-translator']
+        }]
+        self.assertEqual(actual_metadatas, expected_metadatas_v1)
+
+        # given
+        metadata_v2 = metadata_v1.copy()
+        metadata_v2.update({
+            'translated_metadata': {
+                'other': {},
+                'name': 'test_update_duplicated_metadata',
+                'version': '0.0.1'
+            },
+        })
+        self.storage.content_metadata_add([metadata_v2], conflict_update=True)
+
+        actual_metadatas = list(self.storage.content_metadata_get(
+            [self.sha1_2]))
+
+        # language did not change as the v2 was dropped.
+        expected_metadatas_v2 = [{
+            'id': self.sha1_2,
+            'translated_metadata': {
+                'other': {},
+                'name': 'test_update_duplicated_metadata',
+                'version': '0.0.1'
+            },
+            'tool': tools['swh-metadata-translator']
+        }]
+
+        # metadata did change as the v2 was used to overwrite v1
+        self.assertEqual(actual_metadatas, expected_metadatas_v2)
+
+    @istest
+    def revision_metadata_missing(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['swh-metadata-detector']['id']
+
+        metadatas = [
+            {
+                'id': self.revision_id_1,
+                'indexer_configuration_id': tool_id,
+            },
+            {
+                'id': self.revision_id_2,
+                'indexer_configuration_id': tool_id,
+            }
+        ]
+
+        # when
+        actual_missing = list(self.storage.revision_metadata_missing(
+                              metadatas))
+
+        # then
+        self.assertEqual(list(actual_missing), [
+            self.revision_id_1,
+            self.revision_id_2,
+        ])
+
+        # given
+        self.storage.revision_metadata_add([{
+            'id': self.revision_id_1,
+            'translated_metadata': {
+                'developmentStatus': None,
+                'version': None,
+                'operatingSystem': None,
+                'description': None,
+                'keywords': None,
+                'issueTracker': None,
+                'name': None,
+                'author': None,
+                'relatedLink': None,
+                'url': None,
+                'type': None,
+                'license': None,
+                'maintainer': None,
+                'email': None,
+                'softwareRequirements': None,
+                'identifier': None
+            },
+            'indexer_configuration_id': tool_id
+        }])
+
+        # when
+        actual_missing = list(self.storage.revision_metadata_missing(
+                              metadatas))
+
+        # then
+        self.assertEqual(actual_missing, [self.revision_id_2])
+
+    @istest
+    def revision_metadata_get(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['swh-metadata-detector']['id']
+
+        metadata_rev = {
+            'id': self.revision_id_2,
+            'translated_metadata': {
+                'developmentStatus': None,
+                'version': None,
+                'operatingSystem': None,
+                'description': None,
+                'keywords': None,
+                'issueTracker': None,
+                'name': None,
+                'author': None,
+                'relatedLink': None,
+                'url': None,
+                'type': None,
+                'license': None,
+                'maintainer': None,
+                'email': None,
+                'softwareRequirements': None,
+                'identifier': None
+            },
+            'indexer_configuration_id': tool_id
+        }
+
+        # when
+        self.storage.revision_metadata_add([metadata_rev])
+
+        # then
+        actual_metadatas = list(self.storage.revision_metadata_get(
+            [self.revision_id_2, self.revision_id_1]))
+
+        expected_metadatas = [{
+            'id': self.revision_id_2,
+            'translated_metadata': metadata_rev['translated_metadata'],
+            'tool': tools['swh-metadata-detector']
+        }]
+
+        self.assertEqual(actual_metadatas, expected_metadatas)
+
+    @istest
+    def revision_metadata_add_drop_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['swh-metadata-detector']['id']
+
+        metadata_v1 = {
+            'id': self.revision_id_1,
+            'translated_metadata':  {
+                'developmentStatus': None,
+                'version': None,
+                'operatingSystem': None,
+                'description': None,
+                'keywords': None,
+                'issueTracker': None,
+                'name': None,
+                'author': None,
+                'relatedLink': None,
+                'url': None,
+                'type': None,
+                'license': None,
+                'maintainer': None,
+                'email': None,
+                'softwareRequirements': None,
+                'identifier': None
+            },
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.revision_metadata_add([metadata_v1])
+
+        # when
+        actual_metadatas = list(self.storage.revision_metadata_get(
+            [self.revision_id_1]))
+
+        expected_metadatas_v1 = [{
+            'id': self.revision_id_1,
+            'translated_metadata':  metadata_v1['translated_metadata'],
+            'tool': tools['swh-metadata-detector']
+        }]
+
+        self.assertEqual(actual_metadatas, expected_metadatas_v1)
+
+        # given
+        metadata_v2 = metadata_v1.copy()
+        metadata_v2.update({
+            'translated_metadata':  {
+                'name': 'test_metadata',
+                'author': 'MG',
+            },
+        })
+
+        self.storage.revision_metadata_add([metadata_v2])
+
+        # then
+        actual_metadatas = list(self.storage.revision_metadata_get(
+            [self.revision_id_1]))
+
+        # metadata did not change as the v2 was dropped.
+        self.assertEqual(actual_metadatas, expected_metadatas_v1)
+
+    @istest
+    def revision_metadata_add_update_in_place_duplicate(self):
+        # given
+        tools = self.fetch_tools()
+        tool_id = tools['swh-metadata-detector']['id']
+
+        metadata_v1 = {
+            'id': self.revision_id_2,
+            'translated_metadata': {
+                'developmentStatus': None,
+                'version': None,
+                'operatingSystem': None,
+                'description': None,
+                'keywords': None,
+                'issueTracker': None,
+                'name': None,
+                'author': None,
+                'relatedLink': None,
+                'url': None,
+                'type': None,
+                'license': None,
+                'maintainer': None,
+                'email': None,
+                'softwareRequirements': None,
+                'identifier': None
+            },
+            'indexer_configuration_id': tool_id,
+        }
+
+        # given
+        self.storage.revision_metadata_add([metadata_v1])
+
+        # when
+        actual_metadatas = list(self.storage.revision_metadata_get(
+            [self.revision_id_2]))
+
+        # then
+        expected_metadatas_v1 = [{
+            'id': self.revision_id_2,
+            'translated_metadata':  metadata_v1['translated_metadata'],
+            'tool': tools['swh-metadata-detector']
+        }]
+        self.assertEqual(actual_metadatas, expected_metadatas_v1)
+
+        # given
+        metadata_v2 = metadata_v1.copy()
+        metadata_v2.update({
+            'translated_metadata':  {
+                'name': 'test_update_duplicated_metadata',
+                'author': 'MG'
+            },
+        })
+        self.storage.revision_metadata_add([metadata_v2], conflict_update=True)
+
+        actual_metadatas = list(self.storage.revision_metadata_get(
+            [self.revision_id_2]))
+
+        # language did not change as the v2 was dropped.
+        expected_metadatas_v2 = [{
+            'id': self.revision_id_2,
+            'translated_metadata': metadata_v2['translated_metadata'],
+            'tool': tools['swh-metadata-detector']
+        }]
+
+        # metadata did change as the v2 was used to overwrite v1
+        self.assertEqual(actual_metadatas, expected_metadatas_v2)
+
+    @istest
+    def indexer_configuration_add(self):
+        tool = {
+            'tool_name': 'some-unknown-tool',
+            'tool_version': 'some-version',
+            'tool_configuration': {"debian-package": "some-package"},
+        }
+
+        actual_tool = self.storage.indexer_configuration_get(tool)
+        self.assertIsNone(actual_tool)  # does not exist
+
+        # add it
+        actual_tools = list(self.storage.indexer_configuration_add([tool]))
+
+        self.assertEquals(len(actual_tools), 1)
+        actual_tool = actual_tools[0]
+        self.assertIsNotNone(actual_tool)  # now it exists
+        new_id = actual_tool.pop('id')
+        self.assertEquals(actual_tool, tool)
+
+        actual_tools2 = list(self.storage.indexer_configuration_add([tool]))
+        actual_tool2 = actual_tools2[0]
+        self.assertIsNotNone(actual_tool2)  # now it exists
+        new_id2 = actual_tool2.pop('id')
+
+        self.assertEqual(new_id, new_id2)
+        self.assertEqual(actual_tool, actual_tool2)
+
+    @istest
+    def indexer_configuration_add_multiple(self):
+        tool = {
+            'tool_name': 'some-unknown-tool',
+            'tool_version': 'some-version',
+            'tool_configuration': {"debian-package": "some-package"},
+        }
+
+        actual_tools = list(self.storage.indexer_configuration_add([tool]))
+        self.assertEqual(len(actual_tools), 1)
+
+        new_tools = [tool, {
+            'tool_name': 'yet-another-tool',
+            'tool_version': 'version',
+            'tool_configuration': {},
+        }]
+
+        actual_tools = list(self.storage.indexer_configuration_add(new_tools))
+        self.assertEqual(len(actual_tools), 2)
+
+        # order not guaranteed, so we iterate over results to check
+        for tool in actual_tools:
+            _id = tool.pop('id')
+            self.assertIsNotNone(_id)
+            self.assertIn(tool, new_tools)
+
+    @istest
+    def indexer_configuration_get_missing(self):
+        tool = {
+            'tool_name': 'unknown-tool',
+            'tool_version': '3.1.0rc2-31-ga2cbb8c',
+            'tool_configuration': {"command_line": "nomossa <filepath>"},
+        }
+
+        actual_tool = self.storage.indexer_configuration_get(tool)
+
+        self.assertIsNone(actual_tool)
+
+    @istest
+    def indexer_configuration_get(self):
+        tool = {
+            'tool_name': 'nomos',
+            'tool_version': '3.1.0rc2-31-ga2cbb8c',
+            'tool_configuration': {"command_line": "nomossa <filepath>"},
+        }
+
+        actual_tool = self.storage.indexer_configuration_get(tool)
+
+        expected_tool = tool.copy()
+        expected_tool['id'] = 1
+
+        self.assertEqual(expected_tool, actual_tool)
+
+    @istest
+    def indexer_configuration_metadata_get_missing_context(self):
+        tool = {
+            'tool_name': 'swh-metadata-translator',
+            'tool_version': '0.0.1',
+            'tool_configuration': {"context": "unknown-context"},
+        }
+
+        actual_tool = self.storage.indexer_configuration_get(tool)
+
+        self.assertIsNone(actual_tool)
+
+    @istest
+    def indexer_configuration_metadata_get(self):
+        tool = {
+            'tool_name': 'swh-metadata-translator',
+            'tool_version': '0.0.1',
+            'tool_configuration': {"type": "local", "context": "npm"},
+        }
+
+        actual_tool = self.storage.indexer_configuration_get(tool)
+
+        expected_tool = tool.copy()
+        expected_tool['id'] = actual_tool['id']
+
+        self.assertEqual(expected_tool, actual_tool)
diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py
index 3626af8..f7599de 100644
--- a/swh/indexer/tests/test_utils.py
+++ b/swh/indexer/tests/test_utils.py
@@ -1,253 +1,303 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import pathlib
+
 from swh.objstorage.exc import ObjNotFoundError
+from swh.indexer import get_storage
+
+
+class StorageTestFixture:
+    """Mix this in a test subject class to get Storage testing support.
+
+    This fixture requires to come before DbTestFixture in the inheritance list
+    as it uses its methods to setup its own internal database.
+
+    Usage example:
+
+        class TestStorage(StorageTestFixture, DbTestFixture):
+            ...
+    """
+    TEST_STORAGE_DB_NAME = 'softwareheritage-test-indexer'
+
+    @classmethod
+    def setUpClass(cls):
+        if not hasattr(cls, 'DB_TEST_FIXTURE_IMPORTED'):
+            raise RuntimeError("StorageTestFixture needs to be followed by "
+                               "DbTestFixture in the inheritance list.")
+
+        test_dir = pathlib.Path(__file__).absolute().parent
+        test_data_dir = test_dir / '../../../../swh-storage-testdata'
+        test_db_dump = (test_data_dir / 'dumps/swh-indexer.dump').absolute()
+        cls.add_db(cls.TEST_STORAGE_DB_NAME, str(test_db_dump), 'pg_dump')
+        super().setUpClass()
+
+    def setUp(self):
+        super().setUp()
+
+        self.storage_config = {
+            'cls': 'local',
+            'args': {
+                'db': self.test_db[self.TEST_STORAGE_DB_NAME].conn,
+            },
+        }
+        self.storage = get_storage(**self.storage_config)
+
+    def tearDown(self):
+        super().tearDown()
+
+    def reset_storage_tables(self):
+        excluded = {'indexer_configuration'}
+        self.reset_db_tables(self.TEST_STORAGE_DB_NAME, excluded=excluded)
+
+        db = self.test_db[self.TEST_STORAGE_DB_NAME]
+        db.conn.commit()
 
 
 class MockObjStorage:
     """Mock objstorage with predefined contents.
 
     """
     data = {}
 
     def __init__(self):
         self.data = {
             '01c9379dfc33803963d07c1ccc748d3fe4c96bb5': b'this is some text',
             '688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text',
             '8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text',
             '02fb2c89e14f7fab46701478c83779c7beb7b069': b"""
             import unittest
             import logging
             from nose.tools import istest
             from swh.indexer.mimetype import ContentMimetypeIndexer
             from swh.indexer.tests.test_utils import MockObjStorage
 
             class MockStorage():
                 def content_mimetype_add(self, mimetypes):
                     self.state = mimetypes
                     self.conflict_update = conflict_update
 
                 def indexer_configuration_add(self, tools):
                     return [{
                         'id': 10,
                     }]
             """,
             '103bc087db1d26afc3a0283f38663d081e9b01e6': b"""
                 #ifndef __AVL__
                 #define __AVL__
 
                 typedef struct _avl_tree avl_tree;
 
                 typedef struct _data_t {
                   int content;
                 } data_t;
             """,
             '93666f74f1cf635c8c8ac118879da6ec5623c410': b"""
             (should 'pygments (recognize 'lisp 'easily))
 
             """,
             '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5': b"""
             {
                 "name": "test_metadata",
                 "version": "0.0.1",
                 "description": "Simple package.json test for indexer",
                 "repository": {
                   "type": "git",
                   "url": "https://github.com/moranegg/metadata_test"
               }
             }
             """,
             'd4c647f0fc257591cc9ba1722484229780d1c607': b"""
             {
               "version": "5.0.3",
               "name": "npm",
               "description": "a package manager for JavaScript",
               "keywords": [
                 "install",
                 "modules",
                 "package manager",
                 "package.json"
               ],
               "preferGlobal": true,
               "config": {
                 "publishtest": false
               },
               "homepage": "https://docs.npmjs.com/",
               "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
               "repository": {
                 "type": "git",
                 "url": "https://github.com/npm/npm"
               },
               "bugs": {
                 "url": "https://github.com/npm/npm/issues"
               },
               "dependencies": {
                 "JSONStream": "~1.3.1",
                 "abbrev": "~1.1.0",
                 "ansi-regex": "~2.1.1",
                 "ansicolors": "~0.3.2",
                 "ansistyles": "~0.1.3"
               },
               "devDependencies": {
                 "tacks": "~1.2.6",
                 "tap": "~10.3.2"
               },
               "license": "Artistic-2.0"
             }
 
             """,
             'a7ab314d8a11d2c93e3dcf528ca294e7b431c449': b"""
             """,
             'da39a3ee5e6b4b0d3255bfef95601890afd80709': b'',
         }
 
     def __iter__(self):
         yield from self.data.keys()
 
     def __contains__(self, sha1):
         return self.data.get(sha1) is not None
 
     def get(self, sha1):
         raw_content = self.data.get(sha1)
         if raw_content is None:
             raise ObjNotFoundError(sha1)
         return raw_content
 
 
 class MockStorage():
     """Mock storage to simplify reading indexers' outputs.
     """
     def content_metadata_missing(self, sha1s):
         yield from []
 
     def content_metadata_add(self, metadata, conflict_update=None):
         self.state = metadata
         self.conflict_update = conflict_update
 
     def revision_metadata_add(self, metadata, conflict_update=None):
         self.state = metadata
         self.conflict_update = conflict_update
 
     def indexer_configuration_add(self, tools):
         tool = tools[0]
         if tool['tool_name'] == 'swh-metadata-translator':
             return [{
                 'id': 30,
                 'tool_name': 'swh-metadata-translator',
                 'tool_version': '0.0.1',
                 'tool_configuration': {
                     'type': 'local',
                     'context': 'npm'
                 },
             }]
         elif tool['tool_name'] == 'swh-metadata-detector':
             return [{
                 'id': 7,
                 'tool_name': 'swh-metadata-detector',
                 'tool_version': '0.0.1',
                 'tool_configuration': {
                     'type': 'local',
                     'context': 'npm'
                 },
             }]
 
     def revision_get(self, revisions):
         return [{
             'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
             'committer': {
                 'id': 26,
                 'name': b'Andrew Nesbitt',
                 'fullname': b'Andrew Nesbitt <andrewnez@gmail.com>',
                 'email': b'andrewnez@gmail.com'
             },
             'synthetic': False,
             'date': {
                 'negative_utc': False,
                 'timestamp': {
                     'seconds': 1487596456,
                     'microseconds': 0
                 },
                 'offset': 0
             },
             'directory': b'10'
         }]
 
     def directory_ls(self, directory, recursive=False, cur=None):
         # with directory: b'\x9d',
         return [{
                 'sha1_git': b'abc',
                 'name': b'index.js',
                 'target': b'abc',
                 'length': 897,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'10',
                 'sha1': b'bcd'
                 },
                 {
                 'sha1_git': b'aab',
                 'name': b'package.json',
                 'target': b'aab',
                 'length': 712,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'10',
                 'sha1': b'cde'
                 },
                 {
                 'dir_id': b'10',
                 'target': b'11',
                 'type': 'dir',
                 'length': None,
                 'name': b'.github',
                 'sha1': None,
                 'perms': 16384,
                 'sha1_git': None,
                 'status': None,
                 'sha256': None
                 }]
 
     def content_metadata_get(self, sha1s):
         return [{
             'tool': {
                 'configuration': {
                     'type': 'local',
                     'context': 'npm'
                     },
                 'version': '0.0.1',
                 'id': 6,
                 'name': 'swh-metadata-translator'
             },
             'id': b'cde',
             'translated_metadata': {
                 'issueTracker': {
                     'url': 'https://github.com/librariesio/yarn-parser/issues'
                 },
                 'version': '1.0.0',
                 'name': 'yarn-parser',
                 'author': 'Andrew Nesbitt',
                 'url': 'https://github.com/librariesio/yarn-parser#readme',
                 'processorRequirements': {'node': '7.5'},
                 'other': {
                     'scripts': {
                                     'start': 'node index.js'
                     },
                     'main': 'index.js'
                 },
                 'license': 'AGPL-3.0',
                 'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
                 'codeRepository': {
                     'type': 'git',
                     'url': 'git+https://github.com/librariesio/yarn-parser.git'
                 },
                 'description': 'Tiny web service for parsing yarn.lock files',
                 'softwareRequirements': {
                     'yarn': '^0.21.0',
                     'express': '^4.14.0',
                     'body-parser': '^1.15.2'}
                 }
         }]