diff --git a/requirements-swh.txt b/requirements-swh.txt
index 330716a..c49aa91 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,6 +1,6 @@
-swh.core[db,http] >= 0.0.65
+swh.core[db,http] >= 0.0.87
 swh.model >= 0.0.15
 swh.objstorage >= 0.0.28
 swh.scheduler >= 0.0.47
 swh.storage >= 0.0.156
 swh.journal >= 0.0.17
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
index 4eaa7d5..8c61aff 100644
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -1,938 +1,451 @@
-# Copyright (C) 2015-2018  The Software Heritage developers
+# Copyright (C) 2015-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 import json
 import psycopg2
 
 from collections import defaultdict
 
-from swh.core.api import remote_api_endpoint
 from swh.storage.common import db_transaction_generator, db_transaction
 from swh.storage.exc import StorageDBError
 from .db import Db
 
 from . import converters
 
 
 INDEXER_CFG_KEY = 'indexer_storage'
 
 
 MAPPING_NAMES = ['codemeta', 'gemspec', 'maven', 'npm', 'pkg-info']
 
 
 def get_indexer_storage(cls, args):
     """Get an indexer storage object of class `storage_class` with
     arguments `storage_args`.
 
     Args:
         cls (str): storage's class, either 'local' or 'remote'
         args (dict): dictionary of arguments passed to the
             storage class constructor
 
     Returns:
         an instance of swh.indexer's storage (either local or remote)
 
     Raises:
         ValueError if passed an unknown storage class.
 
     """
     if cls == 'remote':
         from .api.client import RemoteStorage as IndexerStorage
     elif cls == 'local':
         from . import IndexerStorage
     elif cls == 'memory':
         from .in_memory import IndexerStorage
     else:
         raise ValueError('Unknown indexer storage class `%s`' % cls)
 
     return IndexerStorage(**args)
 
 
 def _check_id_duplicates(data):
     """
     If any two dictionaries in `data` have the same id, raises
     a `ValueError`.
 
     Values associated to the key must be hashable.
 
     Args:
         data (List[dict]): List of dictionaries to be inserted
 
     >>> _check_id_duplicates([
     ...     {'id': 'foo', 'data': 'spam'},
     ...     {'id': 'bar', 'data': 'egg'},
     ... ])
     >>> _check_id_duplicates([
     ...     {'id': 'foo', 'data': 'spam'},
     ...     {'id': 'foo', 'data': 'egg'},
     ... ])
     Traceback (most recent call last):
       ...
     ValueError: The same id is present more than once.
     """
     if len({item['id'] for item in data}) < len(data):
         raise ValueError('The same id is present more than once.')
 
 
 class IndexerStorage:
     """SWH Indexer Storage
 
     """
     def __init__(self, db, min_pool_conns=1, max_pool_conns=10):
         """
         Args:
             db_conn: either a libpq connection string, or a psycopg2 connection
 
         """
         try:
             if isinstance(db, psycopg2.extensions.connection):
                 self._pool = None
                 self._db = Db(db)
             else:
                 self._pool = psycopg2.pool.ThreadedConnectionPool(
                     min_pool_conns, max_pool_conns, db
                 )
                 self._db = None
         except psycopg2.OperationalError as e:
             raise StorageDBError(e)
 
     def get_db(self):
         if self._db:
             return self._db
         return Db.from_pool(self._pool)
 
     def put_db(self, db):
         if db is not self._db:
             db.put_conn()
 
-    @remote_api_endpoint('check_config')
     @db_transaction()
     def check_config(self, *, check_write, db=None, cur=None):
-        """Check that the storage is configured and ready to go."""
         # Check permissions on one of the tables
         if check_write:
             check = 'INSERT'
         else:
             check = 'SELECT'
 
         cur.execute(
             "select has_table_privilege(current_user, 'content_mimetype', %s)",  # noqa
             (check,)
         )
         return cur.fetchone()[0]
 
-    @remote_api_endpoint('content_mimetype/missing')
     @db_transaction_generator()
     def content_mimetype_missing(self, mimetypes, db=None, cur=None):
-        """Generate mimetypes missing from storage.
-
-        Args:
-            mimetypes (iterable): iterable of dict with keys:
-
-              - **id** (bytes): sha1 identifier
-              - **indexer_configuration_id** (int): tool used to compute the
-                results
-
-        Yields:
-            tuple (id, indexer_configuration_id): missing id
-
-        """
         for obj in db.content_mimetype_missing_from_list(mimetypes, cur):
             yield obj[0]
 
     def _content_get_range(self, content_type, start, end,
                            indexer_configuration_id, limit=1000,
                            with_textual_data=False,
                            db=None, cur=None):
-        """Retrieve ids of type content_type within range [start, end] bound
-           by limit.
-
-        Args:
-            **content_type** (str): content's type (mimetype, language, etc...)
-            **start** (bytes): Starting identifier range (expected smaller
-                           than end)
-            **end** (bytes): Ending identifier range (expected larger
-                             than start)
-            **indexer_configuration_id** (int): The tool used to index data
-            **limit** (int): Limit result (default to 1000)
-            **with_textual_data** (bool): Deal with only textual
-                                          content (True) or all
-                                          content (all contents by
-                                          defaults, False)
-
-        Raises:
-            ValueError for;
-            - limit to None
-            - wrong content_type provided
-
-        Returns:
-            a dict with keys:
-            - **ids** [bytes]: iterable of content ids within the range.
-            - **next** (Optional[bytes]): The next range of sha1 starts at
-                                          this sha1 if any
-
-        """
         if limit is None:
             raise ValueError('Development error: limit should not be None')
         if content_type not in db.content_indexer_names:
             err = 'Development error: Wrong type. Should be one of [%s]' % (
                 ','.join(db.content_indexer_names))
             raise ValueError(err)
 
         ids = []
         next_id = None
         for counter, obj in enumerate(db.content_get_range(
                 content_type, start, end, indexer_configuration_id,
                 limit=limit+1, with_textual_data=with_textual_data, cur=cur)):
             _id = obj[0]
             if counter >= limit:
                 next_id = _id
                 break
 
             ids.append(_id)
 
         return {
             'ids': ids,
             'next': next_id
         }
 
-    @remote_api_endpoint('content_mimetype/range')
     @db_transaction()
     def content_mimetype_get_range(self, start, end, indexer_configuration_id,
                                    limit=1000, db=None, cur=None):
-        """Retrieve mimetypes within range [start, end] bound by limit.
-
-        Args:
-            **start** (bytes): Starting identifier range (expected smaller
-                           than end)
-            **end** (bytes): Ending identifier range (expected larger
-                             than start)
-            **indexer_configuration_id** (int): The tool used to index data
-            **limit** (int): Limit result (default to 1000)
-
-        Raises:
-            ValueError for limit to None
-
-        Returns:
-            a dict with keys:
-            - **ids** [bytes]: iterable of content ids within the range.
-            - **next** (Optional[bytes]): The next range of sha1 starts at
-                                          this sha1 if any
-
-        """
         return self._content_get_range('mimetype', start, end,
                                        indexer_configuration_id, limit=limit,
                                        db=db, cur=cur)
 
-    @remote_api_endpoint('content_mimetype/add')
     @db_transaction()
     def content_mimetype_add(self, mimetypes, conflict_update=False, db=None,
                              cur=None):
-        """Add mimetypes not present in storage.
-
-        Args:
-            mimetypes (iterable): dictionaries with keys:
-
-              - **id** (bytes): sha1 identifier
-              - **mimetype** (bytes): raw content's mimetype
-              - **encoding** (bytes): raw content's encoding
-              - **indexer_configuration_id** (int): tool's id used to
-                compute the results
-              - **conflict_update** (bool): Flag to determine if we want to
-                overwrite (``True``) or skip duplicates (``False``, the
-                default)
-
-        """
         _check_id_duplicates(mimetypes)
         mimetypes.sort(key=lambda m: m['id'])
         db.mktemp_content_mimetype(cur)
         db.copy_to(mimetypes, 'tmp_content_mimetype',
                    ['id', 'mimetype', 'encoding', 'indexer_configuration_id'],
                    cur)
         db.content_mimetype_add_from_temp(conflict_update, cur)
 
-    @remote_api_endpoint('content_mimetype')
     @db_transaction_generator()
     def content_mimetype_get(self, ids, db=None, cur=None):
-        """Retrieve full content mimetype per ids.
-
-        Args:
-            ids (iterable): sha1 identifier
-
-        Yields:
-            mimetypes (iterable): dictionaries with keys:
-
-                - **id** (bytes): sha1 identifier
-                - **mimetype** (bytes): raw content's mimetype
-                - **encoding** (bytes): raw content's encoding
-                - **tool** (dict): Tool used to compute the language
-
-        """
         for c in db.content_mimetype_get_from_list(ids, cur):
             yield converters.db_to_mimetype(
                 dict(zip(db.content_mimetype_cols, c)))
 
-    @remote_api_endpoint('content_language/missing')
     @db_transaction_generator()
     def content_language_missing(self, languages, db=None, cur=None):
-        """List languages missing from storage.
-
-        Args:
-            languages (iterable): dictionaries with keys:
-
-                - **id** (bytes): sha1 identifier
-                - **indexer_configuration_id** (int): tool used to compute
-                  the results
-
-        Yields:
-            an iterable of missing id for the tuple (id,
-            indexer_configuration_id)
-
-        """
         for obj in db.content_language_missing_from_list(languages, cur):
             yield obj[0]
 
-    @remote_api_endpoint('content_language')
     @db_transaction_generator()
     def content_language_get(self, ids, db=None, cur=None):
-        """Retrieve full content language per ids.
-
-        Args:
-            ids (iterable): sha1 identifier
-
-        Yields:
-            languages (iterable): dictionaries with keys:
-
-                - **id** (bytes): sha1 identifier
-                - **lang** (bytes): raw content's language
-                - **tool** (dict): Tool used to compute the language
-
-        """
         for c in db.content_language_get_from_list(ids, cur):
             yield converters.db_to_language(
                 dict(zip(db.content_language_cols, c)))
 
-    @remote_api_endpoint('content_language/add')
     @db_transaction()
     def content_language_add(self, languages, conflict_update=False, db=None,
                              cur=None):
-        """Add languages not present in storage.
-
-        Args:
-            languages (iterable): dictionaries with keys:
-
-                - **id** (bytes): sha1
-                - **lang** (bytes): language detected
-
-            conflict_update (bool): Flag to determine if we want to
-                overwrite (true) or skip duplicates (false, the
-                default)
-
-        """
         _check_id_duplicates(languages)
         languages.sort(key=lambda m: m['id'])
         db.mktemp_content_language(cur)
         # empty language is mapped to 'unknown'
         db.copy_to(
             ({
                 'id': l['id'],
                 'lang': 'unknown' if not l['lang'] else l['lang'],
                 'indexer_configuration_id': l['indexer_configuration_id'],
             } for l in languages),
             'tmp_content_language',
             ['id', 'lang', 'indexer_configuration_id'], cur)
 
         db.content_language_add_from_temp(conflict_update, cur)
 
-    @remote_api_endpoint('content/ctags/missing')
     @db_transaction_generator()
     def content_ctags_missing(self, ctags, db=None, cur=None):
-        """List ctags missing from storage.
-
-        Args:
-            ctags (iterable): dicts with keys:
-
-                - **id** (bytes): sha1 identifier
-                - **indexer_configuration_id** (int): tool used to compute
-                  the results
-
-        Yields:
-            an iterable of missing id for the tuple (id,
-            indexer_configuration_id)
-
-        """
         for obj in db.content_ctags_missing_from_list(ctags, cur):
             yield obj[0]
 
-    @remote_api_endpoint('content/ctags')
     @db_transaction_generator()
     def content_ctags_get(self, ids, db=None, cur=None):
-        """Retrieve ctags per id.
-
-        Args:
-            ids (iterable): sha1 checksums
-
-        Yields:
-            Dictionaries with keys:
-
-                - **id** (bytes): content's identifier
-                - **name** (str): symbol's name
-                - **kind** (str): symbol's kind
-                - **lang** (str): language for that content
-                - **tool** (dict): tool used to compute the ctags' info
-
-
-        """
         for c in db.content_ctags_get_from_list(ids, cur):
             yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, c)))
 
-    @remote_api_endpoint('content/ctags/add')
     @db_transaction()
     def content_ctags_add(self, ctags, conflict_update=False, db=None,
                           cur=None):
-        """Add ctags not present in storage
-
-        Args:
-            ctags (iterable): dictionaries with keys:
-
-                - **id** (bytes): sha1
-                - **ctags** ([list): List of dictionary with keys: name, kind,
-                  line, lang
-
-        """
         _check_id_duplicates(ctags)
         ctags.sort(key=lambda m: m['id'])
 
         def _convert_ctags(__ctags):
             """Convert ctags dict to list of ctags.
 
             """
             for ctags in __ctags:
                 yield from converters.ctags_to_db(ctags)
 
         db.mktemp_content_ctags(cur)
         db.copy_to(list(_convert_ctags(ctags)),
                    tblname='tmp_content_ctags',
                    columns=['id', 'name', 'kind', 'line',
                             'lang', 'indexer_configuration_id'],
                    cur=cur)
 
         db.content_ctags_add_from_temp(conflict_update, cur)
 
-    @remote_api_endpoint('content/ctags/search')
     @db_transaction_generator()
     def content_ctags_search(self, expression,
                              limit=10, last_sha1=None, db=None, cur=None):
-        """Search through content's raw ctags symbols.
-
-        Args:
-            expression (str): Expression to search for
-            limit (int): Number of rows to return (default to 10).
-            last_sha1 (str): Offset from which retrieving data (default to '').
-
-        Yields:
-            rows of ctags including id, name, lang, kind, line, etc...
-
-        """
         for obj in db.content_ctags_search(expression, last_sha1, limit,
                                            cur=cur):
             yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, obj)))
 
-    @remote_api_endpoint('content/fossology_license')
     @db_transaction_generator()
     def content_fossology_license_get(self, ids, db=None, cur=None):
-        """Retrieve licenses per id.
-
-        Args:
-            ids (iterable): sha1 checksums
-
-        Yields:
-            dict: ``{id: facts}`` where ``facts`` is a dict with the
-            following keys:
-
-                - **licenses** ([str]): associated licenses for that content
-                - **tool** (dict): Tool used to compute the license
-
-        """
         d = defaultdict(list)
         for c in db.content_fossology_license_get_from_list(ids, cur):
             license = dict(zip(db.content_fossology_license_cols, c))
 
             id_ = license['id']
             d[id_].append(converters.db_to_fossology_license(license))
 
         for id_, facts in d.items():
             yield {id_: facts}
 
-    @remote_api_endpoint('content/fossology_license/add')
     @db_transaction()
     def content_fossology_license_add(self, licenses, conflict_update=False,
                                       db=None, cur=None):
-        """Add licenses not present in storage.
-
-        Args:
-            licenses (iterable): dictionaries with keys:
-
-                - **id**: sha1
-                - **licenses** ([bytes]): List of licenses associated to sha1
-                - **tool** (str): nomossa
-
-            conflict_update: Flag to determine if we want to overwrite (true)
-                or skip duplicates (false, the default)
-
-        Returns:
-            list: content_license entries which failed due to unknown licenses
-
-        """
         _check_id_duplicates(licenses)
         licenses.sort(key=lambda m: m['id'])
         db.mktemp_content_fossology_license(cur)
         db.copy_to(
             ({
                 'id': sha1['id'],
                 'indexer_configuration_id': sha1['indexer_configuration_id'],
                 'license': license,
               } for sha1 in licenses
                 for license in sha1['licenses']),
             tblname='tmp_content_fossology_license',
             columns=['id', 'license', 'indexer_configuration_id'],
             cur=cur)
         db.content_fossology_license_add_from_temp(conflict_update, cur)
 
-    @remote_api_endpoint('content/fossology_license/range')
     @db_transaction()
     def content_fossology_license_get_range(
             self, start, end, indexer_configuration_id,
             limit=1000, db=None, cur=None):
-        """Retrieve licenses within range [start, end] bound by limit.
-
-        Args:
-            **start** (bytes): Starting identifier range (expected smaller
-                           than end)
-            **end** (bytes): Ending identifier range (expected larger
-                             than start)
-            **indexer_configuration_id** (int): The tool used to index data
-            **limit** (int): Limit result (default to 1000)
-
-        Raises:
-            ValueError for limit to None
-
-        Returns:
-            a dict with keys:
-            - **ids** [bytes]: iterable of content ids within the range.
-            - **next** (Optional[bytes]): The next range of sha1 starts at
-                                          this sha1 if any
-
-        """
         return self._content_get_range('fossology_license', start, end,
                                        indexer_configuration_id, limit=limit,
                                        with_textual_data=True, db=db, cur=cur)
 
-    @remote_api_endpoint('content_metadata/missing')
     @db_transaction_generator()
     def content_metadata_missing(self, metadata, db=None, cur=None):
-        """List metadata missing from storage.
-
-        Args:
-            metadata (iterable): dictionaries with keys:
-
-                - **id** (bytes): sha1 identifier
-                - **indexer_configuration_id** (int): tool used to compute
-                  the results
-
-        Yields:
-            missing sha1s
-
-        """
         for obj in db.content_metadata_missing_from_list(metadata, cur):
             yield obj[0]
 
-    @remote_api_endpoint('content_metadata')
     @db_transaction_generator()
     def content_metadata_get(self, ids, db=None, cur=None):
-        """Retrieve metadata per id.
-
-        Args:
-            ids (iterable): sha1 checksums
-
-        Yields:
-            dictionaries with the following keys:
-
-                id (bytes)
-                metadata (str): associated metadata
-                tool (dict): tool used to compute metadata
-
-        """
         for c in db.content_metadata_get_from_list(ids, cur):
             yield converters.db_to_metadata(
                 dict(zip(db.content_metadata_cols, c)))
 
-    @remote_api_endpoint('content_metadata/add')
     @db_transaction()
     def content_metadata_add(self, metadata, conflict_update=False, db=None,
                              cur=None):
-        """Add metadata not present in storage.
-
-        Args:
-            metadata (iterable): dictionaries with keys:
-
-                - **id**: sha1
-                - **metadata**: arbitrary dict
-
-            conflict_update: Flag to determine if we want to overwrite (true)
-                or skip duplicates (false, the default)
-
-        """
         _check_id_duplicates(metadata)
         metadata.sort(key=lambda m: m['id'])
 
         db.mktemp_content_metadata(cur)
 
         db.copy_to(metadata, 'tmp_content_metadata',
                    ['id', 'metadata', 'indexer_configuration_id'],
                    cur)
         db.content_metadata_add_from_temp(conflict_update, cur)
 
-    @remote_api_endpoint('revision_intrinsic_metadata/missing')
     @db_transaction_generator()
     def revision_intrinsic_metadata_missing(self, metadata, db=None, cur=None):
-        """List metadata missing from storage.
-
-        Args:
-            metadata (iterable): dictionaries with keys:
-
-               - **id** (bytes): sha1_git revision identifier
-               - **indexer_configuration_id** (int): tool used to compute
-                 the results
-
-        Yields:
-            missing ids
-
-        """
         for obj in db.revision_intrinsic_metadata_missing_from_list(
                 metadata, cur):
             yield obj[0]
 
-    @remote_api_endpoint('revision_intrinsic_metadata')
     @db_transaction_generator()
     def revision_intrinsic_metadata_get(self, ids, db=None, cur=None):
-        """Retrieve revision metadata per id.
-
-        Args:
-            ids (iterable): sha1 checksums
-
-        Yields:
-            : dictionaries with the following keys:
-
-                - **id** (bytes)
-                - **metadata** (str): associated metadata
-                - **tool** (dict): tool used to compute metadata
-                - **mappings** (List[str]): list of mappings used to translate
-                  these metadata
-
-        """
         for c in db.revision_intrinsic_metadata_get_from_list(ids, cur):
             yield converters.db_to_metadata(
                 dict(zip(db.revision_intrinsic_metadata_cols, c)))
 
-    @remote_api_endpoint('revision_intrinsic_metadata/add')
     @db_transaction()
     def revision_intrinsic_metadata_add(self, metadata, conflict_update=False,
                                         db=None, cur=None):
-        """Add metadata not present in storage.
-
-        Args:
-            metadata (iterable): dictionaries with keys:
-
-                - **id**: sha1_git of revision
-                - **metadata**: arbitrary dict
-                - **indexer_configuration_id**: tool used to compute metadata
-                - **mappings** (List[str]): list of mappings used to translate
-                  these metadata
-
-            conflict_update: Flag to determine if we want to overwrite (true)
-              or skip duplicates (false, the default)
-
-        """
         _check_id_duplicates(metadata)
         metadata.sort(key=lambda m: m['id'])
 
         db.mktemp_revision_intrinsic_metadata(cur)
 
         db.copy_to(metadata, 'tmp_revision_intrinsic_metadata',
                    ['id', 'metadata', 'mappings',
                     'indexer_configuration_id'],
                    cur)
         db.revision_intrinsic_metadata_add_from_temp(conflict_update, cur)
 
-    @remote_api_endpoint('revision_intrinsic_metadata/delete')
     @db_transaction()
     def revision_intrinsic_metadata_delete(self, entries, db=None, cur=None):
-        """Remove revision metadata from the storage.
-
-        Args:
-            entries (dict): dictionaries with the following keys:
-
-                - **id** (bytes): revision identifier
-                - **indexer_configuration_id** (int): tool used to compute
-                  metadata
-        """
         db.revision_intrinsic_metadata_delete(entries, cur)
 
-    @remote_api_endpoint('origin_intrinsic_metadata')
     @db_transaction_generator()
     def origin_intrinsic_metadata_get(self, ids, db=None, cur=None):
-        """Retrieve origin metadata per id.
-
-        Args:
-            ids (iterable): origin identifiers
-
-        Yields:
-            list: dictionaries with the following keys:
-
-                - **id** (str): origin url
-                - **from_revision** (bytes): which revision this metadata
-                  was extracted from
-                - **metadata** (str): associated metadata
-                - **tool** (dict): tool used to compute metadata
-                - **mappings** (List[str]): list of mappings used to translate
-                  these metadata
-
-        """
         for c in db.origin_intrinsic_metadata_get_from_list(ids, cur):
             yield converters.db_to_metadata(
                 dict(zip(db.origin_intrinsic_metadata_cols, c)))
 
-    @remote_api_endpoint('origin_intrinsic_metadata/add')
     @db_transaction()
     def origin_intrinsic_metadata_add(self, metadata,
                                       conflict_update=False, db=None,
                                       cur=None):
-        """Add origin metadata not present in storage.
-
-        Args:
-            metadata (iterable): dictionaries with keys:
-
-                - **id**: origin urls
-                - **from_revision**: sha1 id of the revision used to generate
-                  these metadata.
-                - **metadata**: arbitrary dict
-                - **indexer_configuration_id**: tool used to compute metadata
-                - **mappings** (List[str]): list of mappings used to translate
-                  these metadata
-
-            conflict_update: Flag to determine if we want to overwrite (true)
-              or skip duplicates (false, the default)
-
-        """
         _check_id_duplicates(metadata)
         metadata.sort(key=lambda m: m['id'])
 
         db.mktemp_origin_intrinsic_metadata(cur)
 
         db.copy_to(metadata, 'tmp_origin_intrinsic_metadata',
                    ['id', 'metadata',
                     'indexer_configuration_id',
                     'from_revision', 'mappings'],
                    cur)
         db.origin_intrinsic_metadata_add_from_temp(conflict_update, cur)
 
-    @remote_api_endpoint('origin_intrinsic_metadata/delete')
     @db_transaction()
     def origin_intrinsic_metadata_delete(
             self, entries, db=None, cur=None):
-        """Remove origin metadata from the storage.
-
-        Args:
-            entries (dict): dictionaries with the following keys:
-
-                - **id** (str): origin urls
-                - **indexer_configuration_id** (int): tool used to compute
-                  metadata
-        """
         db.origin_intrinsic_metadata_delete(entries, cur)
 
-    @remote_api_endpoint('origin_intrinsic_metadata/search/fulltext')
     @db_transaction_generator()
     def origin_intrinsic_metadata_search_fulltext(
             self, conjunction, limit=100, db=None, cur=None):
-        """Returns the list of origins whose metadata contain all the terms.
-
-        Args:
-            conjunction (List[str]): List of terms to be searched for.
-            limit (int): The maximum number of results to return
-
-        Yields:
-            list: dictionaries with the following keys:
-
-                - **id** (str): origin urls
-                - **from_revision**: sha1 id of the revision used to generate
-                  these metadata.
-                - **metadata** (str): associated metadata
-                - **tool** (dict): tool used to compute metadata
-                - **mappings** (List[str]): list of mappings used to translate
-                  these metadata
-
-        """
         for c in db.origin_intrinsic_metadata_search_fulltext(
                 conjunction, limit=limit, cur=cur):
             yield converters.db_to_metadata(
                 dict(zip(db.origin_intrinsic_metadata_cols, c)))
 
-    @remote_api_endpoint('origin_intrinsic_metadata/search/by_producer')
     @db_transaction()
     def origin_intrinsic_metadata_search_by_producer(
             self, page_token='', limit=100, ids_only=False,
             mappings=None, tool_ids=None,
             db=None, cur=None):
-        """Returns the list of origins whose metadata contain all the terms.
-
-        Args:
-            page_token (str): Opaque token used for pagination.
-            limit (int): The maximum number of results to return
-            ids_only (bool): Determines whether only origin urls are
-                returned or the content as well
-            mappings (List[str]): Returns origins whose intrinsic metadata
-                were generated using at least one of these mappings.
-
-        Returns:
-            dict: dict with the following keys:
-              - **next_page_token** (str, optional): opaque token to be used as
-                `page_token` for retrieving the next page. If absent, there is
-                no more pages to gather.
-              - **origins** (list): list of origin url (str) if `ids_only=True`
-                else dictionaries with the following keys:
-
-                - **id** (str): origin urls
-                - **from_revision**: sha1 id of the revision used to generate
-                  these metadata.
-                - **metadata** (str): associated metadata
-                - **tool** (dict): tool used to compute metadata
-                - **mappings** (List[str]): list of mappings used to translate
-                  these metadata
-
-        """
         assert isinstance(page_token, str)
         # we go to limit+1 to check whether we should add next_page_token in
         # the response
         res = db.origin_intrinsic_metadata_search_by_producer(
             page_token, limit + 1, ids_only, mappings, tool_ids, cur)
         result = {}
         if ids_only:
             result['origins'] = [origin for (origin,) in res]
             if len(result['origins']) > limit:
                 result['origins'][limit:] = []
                 result['next_page_token'] = result['origins'][-1]
         else:
             result['origins'] = [converters.db_to_metadata(
                 dict(zip(db.origin_intrinsic_metadata_cols, c)))for c in res]
             if len(result['origins']) > limit:
                 result['origins'][limit:] = []
                 result['next_page_token'] = result['origins'][-1]['id']
         return result
 
-    @remote_api_endpoint('origin_intrinsic_metadata/stats')
     @db_transaction()
     def origin_intrinsic_metadata_stats(
             self, db=None, cur=None):
-        """Returns counts of indexed metadata per origins, broken down
-        into metadata types.
-
-        Returns:
-            dict: dictionary with keys:
-
-                - total (int): total number of origins that were indexed
-                  (possibly yielding an empty metadata dictionary)
-                - non_empty (int): total number of origins that we extracted
-                  a non-empty metadata dictionary from
-                - per_mapping (dict): a dictionary with mapping names as
-                  keys and number of origins whose indexing used this
-                  mapping. Note that indexing a given origin may use
-                  0, 1, or many mappings.
-        """
         mapping_names = [m for m in MAPPING_NAMES]
         select_parts = []
 
         # Count rows for each mapping
         for mapping_name in mapping_names:
             select_parts.append((
                 "sum(case when (mappings @> ARRAY['%s']) "
                 "         then 1 else 0 end)"
                 ) % mapping_name)
 
         # Total
         select_parts.append("sum(1)")
 
         # Rows whose metadata has at least one key that is not '@context'
         select_parts.append(
             "sum(case when ('{}'::jsonb @> (metadata - '@context')) "
             "         then 0 else 1 end)")
         cur.execute('select ' + ', '.join(select_parts)
                     + ' from origin_intrinsic_metadata')
         results = dict(zip(mapping_names + ['total', 'non_empty'],
                            cur.fetchone()))
         return {
             'total': results.pop('total'),
             'non_empty': results.pop('non_empty'),
             'per_mapping': results,
         }
 
-    @remote_api_endpoint('indexer_configuration/add')
     @db_transaction_generator()
     def indexer_configuration_add(self, tools, db=None, cur=None):
-        """Add new tools to the storage.
-
-        Args:
-            tools ([dict]): List of dictionary representing tool to
-                insert in the db. Dictionary with the following keys:
-
-                - **tool_name** (str): tool's name
-                - **tool_version** (str): tool's version
-                - **tool_configuration** (dict): tool's configuration
-                  (free form dict)
-
-        Returns:
-            List of dict inserted in the db (holding the id key as
-            well).  The order of the list is not guaranteed to match
-            the order of the initial list.
-
-        """
         db.mktemp_indexer_configuration(cur)
         db.copy_to(tools, 'tmp_indexer_configuration',
                    ['tool_name', 'tool_version', 'tool_configuration'],
                    cur)
 
         tools = db.indexer_configuration_add_from_temp(cur)
         for line in tools:
             yield dict(zip(db.indexer_configuration_cols, line))
 
-    @remote_api_endpoint('indexer_configuration/data')
     @db_transaction()
     def indexer_configuration_get(self, tool, db=None, cur=None):
-        """Retrieve tool information.
-
-        Args:
-            tool (dict): Dictionary representing a tool with the
-                following keys:
-
-                - **tool_name** (str): tool's name
-                - **tool_version** (str): tool's version
-                - **tool_configuration** (dict): tool's configuration
-                  (free form dict)
-
-        Returns:
-            The same dictionary with an `id` key, None otherwise.
-
-        """
         tool_conf = tool['tool_configuration']
         if isinstance(tool_conf, dict):
             tool_conf = json.dumps(tool_conf)
         idx = db.indexer_configuration_get(tool['tool_name'],
                                            tool['tool_version'],
                                            tool_conf)
         if not idx:
             return None
         return dict(zip(db.indexer_configuration_cols, idx))
diff --git a/swh/indexer/storage/api/client.py b/swh/indexer/storage/api/client.py
index ec4c234..0e62adc 100644
--- a/swh/indexer/storage/api/client.py
+++ b/swh/indexer/storage/api/client.py
@@ -1,17 +1,17 @@
 # Copyright (C) 2015-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from swh.core.api import RPCClient
 
 from swh.storage.exc import StorageAPIError
 
-from .. import IndexerStorage
+from ..interface import IndexerStorageInterface
 
 
 class RemoteStorage(RPCClient):
     """Proxy to a remote storage API"""
 
-    backend_class = IndexerStorage
+    backend_class = IndexerStorageInterface
     api_exception = StorageAPIError
diff --git a/swh/indexer/storage/api/server.py b/swh/indexer/storage/api/server.py
index 7edfec6..2b71f2b 100644
--- a/swh/indexer/storage/api/server.py
+++ b/swh/indexer/storage/api/server.py
@@ -1,106 +1,107 @@
 # Copyright (C) 2015-2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 import logging
 
 from swh.core import config
 from swh.core.api import (RPCServerApp, error_handler,
                           encode_data_server as encode_data)
 from swh.indexer.storage import (
-    get_indexer_storage, INDEXER_CFG_KEY, IndexerStorage
+    get_indexer_storage, INDEXER_CFG_KEY
 )
+from swh.indexer.storage.interface import IndexerStorageInterface
 
 
 def get_storage():
     global storage
     if not storage:
         storage = get_indexer_storage(**app.config[INDEXER_CFG_KEY])
 
     return storage
 
 
 app = RPCServerApp(__name__,
-                   backend_class=IndexerStorage,
+                   backend_class=IndexerStorageInterface,
                    backend_factory=get_storage)
 storage = None
 
 
 @app.errorhandler(Exception)
 def my_error_handler(exception):
     return error_handler(exception, encode_data)
 
 
 @app.route('/')
 def index():
     return 'SWH Indexer Storage API server'
 
 
 api_cfg = None
 
 
 def load_and_check_config(config_file, type='local'):
     """Check the minimal configuration is set to run the api or raise an
        error explanation.
 
     Args:
         config_file (str): Path to the configuration file to load
         type (str): configuration type. For 'local' type, more
                     checks are done.
 
     Raises:
         Error if the setup is not as expected
 
     Returns:
         configuration as a dict
 
     """
     if not config_file:
         raise EnvironmentError('Configuration file must be defined')
 
     if not os.path.exists(config_file):
         raise FileNotFoundError('Configuration file %s does not exist' % (
             config_file, ))
 
     cfg = config.read(config_file)
     if 'indexer_storage' not in cfg:
         raise KeyError("Missing '%indexer_storage' configuration")
 
     if type == 'local':
         vcfg = cfg['indexer_storage']
         cls = vcfg.get('cls')
         if cls != 'local':
             raise ValueError(
                 "The indexer_storage backend can only be started with a "
                 "'local' configuration")
 
         args = vcfg['args']
         if not args.get('db'):
             raise ValueError(
                 "Invalid configuration; missing 'db' config entry")
 
     return cfg
 
 
 def make_app_from_configfile():
     """Run the WSGI app from the webserver, loading the configuration from
        a configuration file.
 
        SWH_CONFIG_FILENAME environment variable defines the
        configuration path to load.
 
     """
     global api_cfg
     if not api_cfg:
         config_file = os.environ.get('SWH_CONFIG_FILENAME')
         api_cfg = load_and_check_config(config_file)
         app.config.update(api_cfg)
     handler = logging.StreamHandler()
     app.logger.addHandler(handler)
     return app
 
 
 if __name__ == '__main__':
     print('Deprecated. Use swh-indexer')
diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py
index dcb0cea..74a41a5 100644
--- a/swh/indexer/storage/in_memory.py
+++ b/swh/indexer/storage/in_memory.py
@@ -1,849 +1,421 @@
 # Copyright (C) 2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import bisect
 from collections import defaultdict, Counter
 import itertools
 import json
 import operator
 import math
 import re
 
 from . import MAPPING_NAMES
 
 SHA1_DIGEST_SIZE = 160
 
 
 def _transform_tool(tool):
     return {
         'id': tool['id'],
         'name': tool['tool_name'],
         'version': tool['tool_version'],
         'configuration': tool['tool_configuration'],
     }
 
 
 class SubStorage:
     """Implements common missing/get/add logic for each indexer type."""
     def __init__(self, tools):
         self._tools = tools
         self._sorted_ids = []
         self._data = {}  # map (id_, tool_id) -> metadata_dict
         self._tools_per_id = defaultdict(set)  # map id_ -> Set[tool_id]
 
     def missing(self, ids):
         """List data missing from storage.
 
         Args:
             data (iterable): dictionaries with keys:
 
                 - **id** (bytes): sha1 identifier
                 - **indexer_configuration_id** (int): tool used to compute
                   the results
 
         Yields:
             missing sha1s
 
         """
         for id_ in ids:
             tool_id = id_['indexer_configuration_id']
             id_ = id_['id']
             if tool_id not in self._tools_per_id.get(id_, set()):
                 yield id_
 
     def get(self, ids):
         """Retrieve data per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             dict: dictionaries with the following keys:
 
               - **id** (bytes)
               - **tool** (dict): tool used to compute metadata
               - arbitrary data (as provided to `add`)
 
         """
         for id_ in ids:
             for tool_id in self._tools_per_id.get(id_, set()):
                 key = (id_, tool_id)
                 yield {
                     'id': id_,
                     'tool': _transform_tool(self._tools[tool_id]),
                     **self._data[key],
                 }
 
     def get_all(self):
         yield from self.get(self._sorted_ids)
 
     def get_range(self, start, end, indexer_configuration_id, limit):
         """Retrieve data within range [start, end] bound by limit.
 
         Args:
             **start** (bytes): Starting identifier range (expected smaller
                            than end)
             **end** (bytes): Ending identifier range (expected larger
                              than start)
             **indexer_configuration_id** (int): The tool used to index data
             **limit** (int): Limit result
 
         Raises:
             ValueError for limit to None
 
         Returns:
             a dict with keys:
             - **ids** [bytes]: iterable of content ids within the range.
             - **next** (Optional[bytes]): The next range of sha1 starts at
                                           this sha1 if any
 
         """
         if limit is None:
             raise ValueError('Development error: limit should not be None')
         from_index = bisect.bisect_left(self._sorted_ids, start)
         to_index = bisect.bisect_right(self._sorted_ids, end, lo=from_index)
         if to_index - from_index >= limit:
             return {
                 'ids': self._sorted_ids[from_index:from_index+limit],
                 'next': self._sorted_ids[from_index+limit],
             }
         else:
             return {
                 'ids': self._sorted_ids[from_index:to_index],
                 'next': None,
                 }
 
     def add(self, data, conflict_update):
         """Add data not present in storage.
 
         Args:
             data (iterable): dictionaries with keys:
 
               - **id**: sha1
               - **indexer_configuration_id**: tool used to compute the
                 results
               - arbitrary data
 
             conflict_update (bool): Flag to determine if we want to overwrite
               (true) or skip duplicates (false)
 
         """
         data = list(data)
         if len({x['id'] for x in data}) < len(data):
             # For "exception-compatibility" with the pgsql backend
             raise ValueError('The same id is present more than once.')
         for item in data:
             item = item.copy()
             tool_id = item.pop('indexer_configuration_id')
             id_ = item.pop('id')
             data = item
             if not conflict_update and \
                     tool_id in self._tools_per_id.get(id_, set()):
                 # Duplicate, should not be updated
                 continue
             key = (id_, tool_id)
             self._data[key] = data
             self._tools_per_id[id_].add(tool_id)
             if id_ not in self._sorted_ids:
                 bisect.insort(self._sorted_ids, id_)
 
     def add_merge(self, new_data, conflict_update, merged_key):
         for new_item in new_data:
             id_ = new_item['id']
             tool_id = new_item['indexer_configuration_id']
             if conflict_update:
                 all_subitems = []
             else:
                 existing = list(self.get([id_]))
                 all_subitems = [
                     old_subitem
                     for existing_item in existing
                     if existing_item['tool']['id'] == tool_id
                     for old_subitem in existing_item[merged_key]
                 ]
             for new_subitem in new_item[merged_key]:
                 if new_subitem not in all_subitems:
                     all_subitems.append(new_subitem)
             self.add([
                 {
                     'id': id_,
                     'indexer_configuration_id': tool_id,
                     merged_key: all_subitems,
                 }
             ], conflict_update=True)
             if id_ not in self._sorted_ids:
                 bisect.insort(self._sorted_ids, id_)
 
     def delete(self, entries):
         for entry in entries:
             (id_, tool_id) = (entry['id'], entry['indexer_configuration_id'])
             key = (id_, tool_id)
             if tool_id in self._tools_per_id[id_]:
                 self._tools_per_id[id_].remove(tool_id)
             if key in self._data:
                 del self._data[key]
 
 
 class IndexerStorage:
     """In-memory SWH indexer storage."""
 
     def __init__(self):
         self._tools = {}
         self._mimetypes = SubStorage(self._tools)
         self._languages = SubStorage(self._tools)
         self._content_ctags = SubStorage(self._tools)
         self._licenses = SubStorage(self._tools)
         self._content_metadata = SubStorage(self._tools)
         self._revision_intrinsic_metadata = SubStorage(self._tools)
         self._origin_intrinsic_metadata = SubStorage(self._tools)
 
     def check_config(self, *, check_write):
         return True
 
     def content_mimetype_missing(self, mimetypes):
-        """Generate mimetypes missing from storage.
-
-        Args:
-            mimetypes (iterable): iterable of dict with keys:
-
-              - **id** (bytes): sha1 identifier
-              - **indexer_configuration_id** (int): tool used to compute the
-                results
-
-        Yields:
-            tuple (id, indexer_configuration_id): missing id
-
-        """
         yield from self._mimetypes.missing(mimetypes)
 
     def content_mimetype_get_range(
             self, start, end, indexer_configuration_id, limit=1000):
-        """Retrieve mimetypes within range [start, end] bound by limit.
-
-        Args:
-            **start** (bytes): Starting identifier range (expected smaller
-                           than end)
-            **end** (bytes): Ending identifier range (expected larger
-                             than start)
-            **indexer_configuration_id** (int): The tool used to index data
-            **limit** (int): Limit result (default to 1000)
-
-        Raises:
-            ValueError for limit to None
-
-        Returns:
-            a dict with keys:
-            - **ids** [bytes]: iterable of content ids within the range.
-            - **next** (Optional[bytes]): The next range of sha1 starts at
-                                          this sha1 if any
-
-        """
         return self._mimetypes.get_range(
             start, end, indexer_configuration_id, limit)
 
     def content_mimetype_add(self, mimetypes, conflict_update=False):
-        """Add mimetypes not present in storage.
-
-        Args:
-            mimetypes (iterable): dictionaries with keys:
-
-              - **id** (bytes): sha1 identifier
-              - **mimetype** (bytes): raw content's mimetype
-              - **encoding** (bytes): raw content's encoding
-              - **indexer_configuration_id** (int): tool's id used to
-                compute the results
-              - **conflict_update** (bool): Flag to determine if we want to
-                overwrite (``True``) or skip duplicates (``False``, the
-                default)
-
-        """
         if not all(isinstance(x['id'], bytes) for x in mimetypes):
             raise TypeError('identifiers must be bytes.')
         self._mimetypes.add(mimetypes, conflict_update)
 
-    def content_mimetype_get(self, ids, db=None, cur=None):
-        """Retrieve full content mimetype per ids.
-
-        Args:
-            ids (iterable): sha1 identifier
-
-        Yields:
-            mimetypes (iterable): dictionaries with keys:
-
-                - **id** (bytes): sha1 identifier
-                - **mimetype** (bytes): raw content's mimetype
-                - **encoding** (bytes): raw content's encoding
-                - **tool** (dict): Tool used to compute the language
-
-        """
+    def content_mimetype_get(self, ids):
         yield from self._mimetypes.get(ids)
 
     def content_language_missing(self, languages):
-        """List languages missing from storage.
-
-        Args:
-            languages (iterable): dictionaries with keys:
-
-                - **id** (bytes): sha1 identifier
-                - **indexer_configuration_id** (int): tool used to compute
-                  the results
-
-        Yields:
-            an iterable of missing id for the tuple (id,
-            indexer_configuration_id)
-
-        """
         yield from self._languages.missing(languages)
 
     def content_language_get(self, ids):
-        """Retrieve full content language per ids.
-
-        Args:
-            ids (iterable): sha1 identifier
-
-        Yields:
-            languages (iterable): dictionaries with keys:
-
-                - **id** (bytes): sha1 identifier
-                - **lang** (bytes): raw content's language
-                - **tool** (dict): Tool used to compute the language
-
-        """
         yield from self._languages.get(ids)
 
     def content_language_add(self, languages, conflict_update=False):
-        """Add languages not present in storage.
-
-        Args:
-            languages (iterable): dictionaries with keys:
-
-                - **id** (bytes): sha1
-                - **lang** (bytes): language detected
-
-            conflict_update (bool): Flag to determine if we want to
-                overwrite (true) or skip duplicates (false, the
-                default)
-
-        """
         if not all(isinstance(x['id'], bytes) for x in languages):
             raise TypeError('identifiers must be bytes.')
         self._languages.add(languages, conflict_update)
 
     def content_ctags_missing(self, ctags):
-        """List ctags missing from storage.
-
-        Args:
-            ctags (iterable): dicts with keys:
-
-                - **id** (bytes): sha1 identifier
-                - **indexer_configuration_id** (int): tool used to compute
-                  the results
-
-        Yields:
-            an iterable of missing id for the tuple (id,
-            indexer_configuration_id)
-
-        """
         yield from self._content_ctags.missing(ctags)
 
     def content_ctags_get(self, ids):
-        """Retrieve ctags per id.
-
-        Args:
-            ids (iterable): sha1 checksums
-
-        Yields:
-            Dictionaries with keys:
-
-                - **id** (bytes): content's identifier
-                - **name** (str): symbol's name
-                - **kind** (str): symbol's kind
-                - **lang** (str): language for that content
-                - **tool** (dict): tool used to compute the ctags' info
-
-
-        """
         for item in self._content_ctags.get(ids):
             for item_ctags_item in item['ctags']:
                 yield {
                     'id': item['id'],
                     'tool': item['tool'],
                     **item_ctags_item
                 }
 
     def content_ctags_add(self, ctags, conflict_update=False):
-        """Add ctags not present in storage
-
-        Args:
-            ctags (iterable): dictionaries with keys:
-
-              - **id** (bytes): sha1
-              - **ctags** ([list): List of dictionary with keys: name, kind,
-                  line, lang
-              - **indexer_configuration_id**: tool used to compute the
-                results
-
-        """
         if not all(isinstance(x['id'], bytes) for x in ctags):
             raise TypeError('identifiers must be bytes.')
         self._content_ctags.add_merge(ctags, conflict_update, 'ctags')
 
     def content_ctags_search(self, expression,
-                             limit=10, last_sha1=None, db=None, cur=None):
-        """Search through content's raw ctags symbols.
-
-        Args:
-            expression (str): Expression to search for
-            limit (int): Number of rows to return (default to 10).
-            last_sha1 (str): Offset from which retrieving data (default to '').
-
-        Yields:
-            rows of ctags including id, name, lang, kind, line, etc...
-
-        """
+                             limit=10, last_sha1=None):
         nb_matches = 0
         for ((id_, tool_id), item) in \
                 sorted(self._content_ctags._data.items()):
             if id_ <= (last_sha1 or bytes(0 for _ in range(SHA1_DIGEST_SIZE))):
                 continue
             for ctags_item in item['ctags']:
                 if ctags_item['name'] != expression:
                     continue
                 nb_matches += 1
                 yield {
                     'id': id_,
                     'tool': _transform_tool(self._tools[tool_id]),
                     **ctags_item
                 }
                 if nb_matches >= limit:
                     return
 
     def content_fossology_license_get(self, ids):
-        """Retrieve licenses per id.
-
-        Args:
-            ids (iterable): sha1 checksums
-
-        Yields:
-            dict: ``{id: facts}`` where ``facts`` is a dict with the
-            following keys:
-
-                - **licenses** ([str]): associated licenses for that content
-                - **tool** (dict): Tool used to compute the license
-
-        """
         # Rewrites the output of SubStorage.get from the old format to
         # the new one. SubStorage.get should be updated once all other
         # *_get methods use the new format.
         # See: https://forge.softwareheritage.org/T1433
         res = {}
         for d in self._licenses.get(ids):
             res.setdefault(d.pop('id'), []).append(d)
         for (id_, facts) in res.items():
             yield {id_: facts}
 
     def content_fossology_license_add(self, licenses, conflict_update=False):
-        """Add licenses not present in storage.
-
-        Args:
-            licenses (iterable): dictionaries with keys:
-
-                - **id**: sha1
-                - **licenses** ([bytes]): List of licenses associated to sha1
-                - **tool** (str): nomossa
-
-            conflict_update: Flag to determine if we want to overwrite (true)
-                or skip duplicates (false, the default)
-
-        Returns:
-            list: content_license entries which failed due to unknown licenses
-
-        """
         if not all(isinstance(x['id'], bytes) for x in licenses):
             raise TypeError('identifiers must be bytes.')
         self._licenses.add_merge(licenses, conflict_update, 'licenses')
 
     def content_fossology_license_get_range(
             self, start, end, indexer_configuration_id, limit=1000):
-        """Retrieve licenses within range [start, end] bound by limit.
-
-        Args:
-            **start** (bytes): Starting identifier range (expected smaller
-                           than end)
-            **end** (bytes): Ending identifier range (expected larger
-                             than start)
-            **indexer_configuration_id** (int): The tool used to index data
-            **limit** (int): Limit result (default to 1000)
-
-        Raises:
-            ValueError for limit to None
-
-        Returns:
-            a dict with keys:
-            - **ids** [bytes]: iterable of content ids within the range.
-            - **next** (Optional[bytes]): The next range of sha1 starts at
-                                          this sha1 if any
-
-        """
         return self._licenses.get_range(
             start, end, indexer_configuration_id, limit)
 
     def content_metadata_missing(self, metadata):
-        """List metadata missing from storage.
-
-        Args:
-            metadata (iterable): dictionaries with keys:
-
-              - **id** (bytes): sha1 identifier
-              - **indexer_configuration_id** (int): tool used to compute
-                the results
-
-        Yields:
-            missing sha1s
-
-        """
         yield from self._content_metadata.missing(metadata)
 
     def content_metadata_get(self, ids):
-        """Retrieve metadata per id.
-
-        Args:
-            ids (iterable): sha1 checksums
-
-        Yields:
-            dictionaries with the following keys:
-
-              - **id** (bytes)
-              - **metadata** (str): associated metadata
-              - **tool** (dict): tool used to compute metadata
-
-        """
         yield from self._content_metadata.get(ids)
 
     def content_metadata_add(self, metadata, conflict_update=False):
-        """Add metadata not present in storage.
-
-        Args:
-            metadata (iterable): dictionaries with keys:
-
-              - **id**: sha1
-              - **metadata**: arbitrary dict
-              - **indexer_configuration_id**: tool used to compute the
-                results
-
-            conflict_update: Flag to determine if we want to overwrite (true)
-                or skip duplicates (false, the default)
-
-        """
         if not all(isinstance(x['id'], bytes) for x in metadata):
             raise TypeError('identifiers must be bytes.')
         self._content_metadata.add(metadata, conflict_update)
 
     def revision_intrinsic_metadata_missing(self, metadata):
-        """List metadata missing from storage.
-
-        Args:
-            metadata (iterable): dictionaries with keys:
-
-              - **id** (bytes): sha1_git revision identifier
-              - **indexer_configuration_id** (int): tool used to compute
-                the results
-
-        Yields:
-            missing ids
-
-        """
         yield from self._revision_intrinsic_metadata.missing(metadata)
 
     def revision_intrinsic_metadata_get(self, ids):
-        """Retrieve revision metadata per id.
-
-        Args:
-            ids (iterable): sha1 checksums
-
-        Yields:
-            dictionaries with the following keys:
-
-            - **id** (bytes)
-            - **metadata** (str): associated metadata
-            - **tool** (dict): tool used to compute metadata
-            - **mappings** (List[str]): list of mappings used to translate
-              these metadata
-
-        """
         yield from self._revision_intrinsic_metadata.get(ids)
 
     def revision_intrinsic_metadata_add(self, metadata, conflict_update=False):
-        """Add metadata not present in storage.
-
-        Args:
-            metadata (iterable): dictionaries with keys:
-
-              - **id**: sha1_git of revision
-              - **metadata**: arbitrary dict
-              - **indexer_configuration_id**: tool used to compute metadata
-              - **mappings** (List[str]): list of mappings used to translate
-                these metadata
-
-            conflict_update: Flag to determine if we want to overwrite (true)
-              or skip duplicates (false, the default)
-
-        """
         if not all(isinstance(x['id'], bytes) for x in metadata):
             raise TypeError('identifiers must be bytes.')
         self._revision_intrinsic_metadata.add(metadata, conflict_update)
 
     def revision_intrinsic_metadata_delete(self, entries):
-        """Remove revision metadata from the storage.
-
-        Args:
-            entries (dict): dictionaries with the following keys:
-                - **revision** (int): origin identifier
-                - **id** (int): tool used to compute metadata
-        """
         self._revision_intrinsic_metadata.delete(entries)
 
     def origin_intrinsic_metadata_get(self, ids):
-        """Retrieve origin metadata per id.
-
-        Args:
-            ids (iterable): origin identifiers
-
-        Yields:
-            list: dictionaries with the following keys:
-
-                - **id** (str): origin url
-                - **from_revision** (bytes): which revision this metadata
-                  was extracted from
-                - **metadata** (str): associated metadata
-                - **tool** (dict): tool used to compute metadata
-                - **mappings** (List[str]): list of mappings used to translate
-                  these metadata
-
-        """
         yield from self._origin_intrinsic_metadata.get(ids)
 
     def origin_intrinsic_metadata_add(self, metadata,
                                       conflict_update=False):
-        """Add origin metadata not present in storage.
-
-        Args:
-            metadata (iterable): dictionaries with keys:
-
-                - **id**: origin url
-                - **from_revision**: sha1 id of the revision used to generate
-                  these metadata.
-                - **metadata**: arbitrary dict
-                - **indexer_configuration_id**: tool used to compute metadata
-                - **mappings** (List[str]): list of mappings used to translate
-                  these metadata
-
-            conflict_update: Flag to determine if we want to overwrite (true)
-              or skip duplicates (false, the default)
-
-        """
         self._origin_intrinsic_metadata.add(metadata, conflict_update)
 
     def origin_intrinsic_metadata_delete(self, entries):
-        """Remove origin metadata from the storage.
-
-        Args:
-            entries (dict): dictionaries with the following keys:
-
-                - **id** (str): origin url
-                - **indexer_configuration_id** (int): tool used to compute
-                  metadata
-        """
         self._origin_intrinsic_metadata.delete(entries)
 
     def origin_intrinsic_metadata_search_fulltext(
             self, conjunction, limit=100):
-        """Returns the list of origins whose metadata contain all the terms.
-
-        Args:
-            conjunction (List[str]): List of terms to be searched for.
-            limit (int): The maximum number of results to return
-
-        Yields:
-            list: dictionaries with the following keys:
-
-                - **id** (str): origin url
-                - **from_revision** (bytes): which revision this metadata
-                  was extracted from
-                - **metadata** (str): associated metadata
-                - **tool** (dict): tool used to compute metadata
-                - **mappings** (List[str]): list of mappings used to translate
-                  these metadata
-
-        """
         # A very crude fulltext search implementation, but that's enough
         # to work on English metadata
         tokens_re = re.compile('[a-zA-Z0-9]+')
         search_tokens = list(itertools.chain(
             *map(tokens_re.findall, conjunction)))
 
         def rank(data):
             # Tokenize the metadata
             text = json.dumps(data['metadata'])
             text_tokens = tokens_re.findall(text)
             text_token_occurences = Counter(text_tokens)
 
             # Count the number of occurrences of search tokens in the text
             score = 0
             for search_token in search_tokens:
                 if text_token_occurences[search_token] == 0:
                     # Search token is not in the text.
                     return 0
                 score += text_token_occurences[search_token]
 
             # Normalize according to the text's length
             return score / math.log(len(text_tokens))
 
         results = [(rank(data), data)
                    for data in self._origin_intrinsic_metadata.get_all()]
         results = [(rank_, data) for (rank_, data) in results if rank_ > 0]
         results.sort(key=operator.itemgetter(0),  # Don't try to order 'data'
                      reverse=True)
         for (rank_, result) in results[:limit]:
             yield result
 
     def origin_intrinsic_metadata_search_by_producer(
             self, page_token='', limit=100, ids_only=False,
-            mappings=None, tool_ids=None,
-            db=None, cur=None):
-        """Returns the list of origins whose metadata contain all the terms.
-
-        Args:
-            page_token (str): Opaque token used for pagination.
-            limit (int): The maximum number of results to return
-            ids_only (bool): Determines whether only origin ids are returned
-                or the content as well
-            mappings (List[str]): Returns origins whose intrinsic metadata
-                were generated using at least one of these mappings.
-
-        Returns:
-            dict: dict with the following keys:
-              - **next_page_token** (str, optional): opaque token to be used as
-                `page_token` for retrieveing the next page.
-              - **origins** (list): list of origin url (str) if `ids_only=True`
-                else dictionaries with the following keys:
-
-                - **id** (str): origin urls
-                - **from_revision**: sha1 id of the revision used to generate
-                  these metadata.
-                - **metadata** (str): associated metadata
-                - **tool** (dict): tool used to compute metadata
-                - **mappings** (List[str]): list of mappings used to translate
-                  these metadata
-
-        """
+            mappings=None, tool_ids=None):
         assert isinstance(page_token, str)
         nb_results = 0
         if mappings is not None:
             mappings = frozenset(mappings)
         if tool_ids is not None:
             tool_ids = frozenset(tool_ids)
         origins = []
 
         # we go to limit+1 to check whether we should add next_page_token in
         # the response
         for entry in self._origin_intrinsic_metadata.get_all():
             if entry['id'] <= page_token:
                 continue
             if nb_results >= (limit + 1):
                 break
             if mappings is not None and mappings.isdisjoint(entry['mappings']):
                 continue
             if tool_ids is not None and entry['tool']['id'] not in tool_ids:
                 continue
             origins.append(entry)
             nb_results += 1
 
         result = {}
         if len(origins) > limit:
             origins = origins[:limit]
             result['next_page_token'] = origins[-1]['id']
         if ids_only:
             origins = [origin['id'] for origin in origins]
         result['origins'] = origins
         return result
 
     def origin_intrinsic_metadata_stats(self):
-        """Returns statistics on stored intrinsic metadata.
-
-        Returns:
-            dict: dictionary with keys:
-
-                - total (int): total number of origins that were indexed
-                  (possibly yielding an empty metadata dictionary)
-                - non_empty (int): total number of origins that we extracted
-                  a non-empty metadata dictionary from
-                - per_mapping (dict): a dictionary with mapping names as
-                  keys and number of origins whose indexing used this
-                  mapping. Note that indexing a given origin may use
-                  0, 1, or many mappings.
-        """
         mapping_count = {m: 0 for m in MAPPING_NAMES}
         total = non_empty = 0
         for data in self._origin_intrinsic_metadata.get_all():
             total += 1
             if set(data['metadata']) - {'@context'}:
                 non_empty += 1
             for mapping in data['mappings']:
                 mapping_count[mapping] += 1
         return {
             'per_mapping': mapping_count,
             'total': total,
             'non_empty': non_empty
         }
 
     def indexer_configuration_add(self, tools):
-        """Add new tools to the storage.
-
-        Args:
-            tools ([dict]): List of dictionary representing tool to
-              insert in the db. Dictionary with the following keys:
-
-              - **tool_name** (str): tool's name
-              - **tool_version** (str): tool's version
-              - **tool_configuration** (dict): tool's configuration
-                (free form dict)
-
-        Returns:
-            list: List of dict inserted in the db (holding the id key as
-            well). The order of the list is not guaranteed to match
-            the order of the initial list.
-
-        """
         inserted = []
         for tool in tools:
             tool = tool.copy()
             id_ = self._tool_key(tool)
             tool['id'] = id_
             self._tools[id_] = tool
             inserted.append(tool)
         return inserted
 
     def indexer_configuration_get(self, tool):
-        """Retrieve tool information.
-
-        Args:
-            tool (dict): Dictionary representing a tool with the
-              following keys:
-
-              - **tool_name** (str): tool's name
-              - **tool_version** (str): tool's version
-              - **tool_configuration** (dict): tool's configuration
-                (free form dict)
-
-        Returns:
-            The same dictionary with an `id` key, None otherwise.
-
-        """
         return self._tools.get(self._tool_key(tool))
 
     def _tool_key(self, tool):
         return hash((tool['tool_name'], tool['tool_version'],
                      json.dumps(tool['tool_configuration'], sort_keys=True)))
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/interface.py
similarity index 54%
copy from swh/indexer/storage/__init__.py
copy to swh/indexer/storage/interface.py
index 4eaa7d5..a884f1c 100644
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/interface.py
@@ -1,938 +1,600 @@
-# Copyright (C) 2015-2018  The Software Heritage developers
+# Copyright (C) 2015-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
-import json
-import psycopg2
-
-from collections import defaultdict
-
 from swh.core.api import remote_api_endpoint
-from swh.storage.common import db_transaction_generator, db_transaction
-from swh.storage.exc import StorageDBError
-from .db import Db
-
-from . import converters
-
-
-INDEXER_CFG_KEY = 'indexer_storage'
-
-
-MAPPING_NAMES = ['codemeta', 'gemspec', 'maven', 'npm', 'pkg-info']
-
-
-def get_indexer_storage(cls, args):
-    """Get an indexer storage object of class `storage_class` with
-    arguments `storage_args`.
-
-    Args:
-        cls (str): storage's class, either 'local' or 'remote'
-        args (dict): dictionary of arguments passed to the
-            storage class constructor
-
-    Returns:
-        an instance of swh.indexer's storage (either local or remote)
-
-    Raises:
-        ValueError if passed an unknown storage class.
-
-    """
-    if cls == 'remote':
-        from .api.client import RemoteStorage as IndexerStorage
-    elif cls == 'local':
-        from . import IndexerStorage
-    elif cls == 'memory':
-        from .in_memory import IndexerStorage
-    else:
-        raise ValueError('Unknown indexer storage class `%s`' % cls)
-
-    return IndexerStorage(**args)
-
-
-def _check_id_duplicates(data):
-    """
-    If any two dictionaries in `data` have the same id, raises
-    a `ValueError`.
-
-    Values associated to the key must be hashable.
-
-    Args:
-        data (List[dict]): List of dictionaries to be inserted
-
-    >>> _check_id_duplicates([
-    ...     {'id': 'foo', 'data': 'spam'},
-    ...     {'id': 'bar', 'data': 'egg'},
-    ... ])
-    >>> _check_id_duplicates([
-    ...     {'id': 'foo', 'data': 'spam'},
-    ...     {'id': 'foo', 'data': 'egg'},
-    ... ])
-    Traceback (most recent call last):
-      ...
-    ValueError: The same id is present more than once.
-    """
-    if len({item['id'] for item in data}) < len(data):
-        raise ValueError('The same id is present more than once.')
 
 
-class IndexerStorage:
-    """SWH Indexer Storage
-
-    """
-    def __init__(self, db, min_pool_conns=1, max_pool_conns=10):
-        """
-        Args:
-            db_conn: either a libpq connection string, or a psycopg2 connection
-
-        """
-        try:
-            if isinstance(db, psycopg2.extensions.connection):
-                self._pool = None
-                self._db = Db(db)
-            else:
-                self._pool = psycopg2.pool.ThreadedConnectionPool(
-                    min_pool_conns, max_pool_conns, db
-                )
-                self._db = None
-        except psycopg2.OperationalError as e:
-            raise StorageDBError(e)
-
-    def get_db(self):
-        if self._db:
-            return self._db
-        return Db.from_pool(self._pool)
-
-    def put_db(self, db):
-        if db is not self._db:
-            db.put_conn()
-
+class IndexerStorageInterface:
     @remote_api_endpoint('check_config')
-    @db_transaction()
-    def check_config(self, *, check_write, db=None, cur=None):
+    def check_config(self, *, check_write):
         """Check that the storage is configured and ready to go."""
-        # Check permissions on one of the tables
-        if check_write:
-            check = 'INSERT'
-        else:
-            check = 'SELECT'
-
-        cur.execute(
-            "select has_table_privilege(current_user, 'content_mimetype', %s)",  # noqa
-            (check,)
-        )
-        return cur.fetchone()[0]
+        ...
 
     @remote_api_endpoint('content_mimetype/missing')
-    @db_transaction_generator()
-    def content_mimetype_missing(self, mimetypes, db=None, cur=None):
+    def content_mimetype_missing(self, mimetypes):
         """Generate mimetypes missing from storage.
 
         Args:
             mimetypes (iterable): iterable of dict with keys:
 
               - **id** (bytes): sha1 identifier
               - **indexer_configuration_id** (int): tool used to compute the
                 results
 
         Yields:
             tuple (id, indexer_configuration_id): missing id
 
         """
-        for obj in db.content_mimetype_missing_from_list(mimetypes, cur):
-            yield obj[0]
+        ...
 
     def _content_get_range(self, content_type, start, end,
                            indexer_configuration_id, limit=1000,
-                           with_textual_data=False,
-                           db=None, cur=None):
+                           with_textual_data=False):
         """Retrieve ids of type content_type within range [start, end] bound
            by limit.
 
         Args:
             **content_type** (str): content's type (mimetype, language, etc...)
             **start** (bytes): Starting identifier range (expected smaller
                            than end)
             **end** (bytes): Ending identifier range (expected larger
                              than start)
             **indexer_configuration_id** (int): The tool used to index data
             **limit** (int): Limit result (default to 1000)
             **with_textual_data** (bool): Deal with only textual
                                           content (True) or all
                                           content (all contents by
                                           defaults, False)
 
         Raises:
             ValueError for;
             - limit to None
             - wrong content_type provided
 
         Returns:
             a dict with keys:
             - **ids** [bytes]: iterable of content ids within the range.
             - **next** (Optional[bytes]): The next range of sha1 starts at
                                           this sha1 if any
 
         """
-        if limit is None:
-            raise ValueError('Development error: limit should not be None')
-        if content_type not in db.content_indexer_names:
-            err = 'Development error: Wrong type. Should be one of [%s]' % (
-                ','.join(db.content_indexer_names))
-            raise ValueError(err)
-
-        ids = []
-        next_id = None
-        for counter, obj in enumerate(db.content_get_range(
-                content_type, start, end, indexer_configuration_id,
-                limit=limit+1, with_textual_data=with_textual_data, cur=cur)):
-            _id = obj[0]
-            if counter >= limit:
-                next_id = _id
-                break
-
-            ids.append(_id)
-
-        return {
-            'ids': ids,
-            'next': next_id
-        }
+        ...
 
     @remote_api_endpoint('content_mimetype/range')
-    @db_transaction()
     def content_mimetype_get_range(self, start, end, indexer_configuration_id,
-                                   limit=1000, db=None, cur=None):
+                                   limit=1000):
         """Retrieve mimetypes within range [start, end] bound by limit.
 
         Args:
             **start** (bytes): Starting identifier range (expected smaller
                            than end)
             **end** (bytes): Ending identifier range (expected larger
                              than start)
             **indexer_configuration_id** (int): The tool used to index data
             **limit** (int): Limit result (default to 1000)
 
         Raises:
             ValueError for limit to None
 
         Returns:
             a dict with keys:
             - **ids** [bytes]: iterable of content ids within the range.
             - **next** (Optional[bytes]): The next range of sha1 starts at
                                           this sha1 if any
 
         """
-        return self._content_get_range('mimetype', start, end,
-                                       indexer_configuration_id, limit=limit,
-                                       db=db, cur=cur)
+        ...
 
     @remote_api_endpoint('content_mimetype/add')
-    @db_transaction()
-    def content_mimetype_add(self, mimetypes, conflict_update=False, db=None,
-                             cur=None):
+    def content_mimetype_add(self, mimetypes, conflict_update=False):
         """Add mimetypes not present in storage.
 
         Args:
             mimetypes (iterable): dictionaries with keys:
 
               - **id** (bytes): sha1 identifier
               - **mimetype** (bytes): raw content's mimetype
               - **encoding** (bytes): raw content's encoding
               - **indexer_configuration_id** (int): tool's id used to
                 compute the results
               - **conflict_update** (bool): Flag to determine if we want to
                 overwrite (``True``) or skip duplicates (``False``, the
                 default)
 
         """
-        _check_id_duplicates(mimetypes)
-        mimetypes.sort(key=lambda m: m['id'])
-        db.mktemp_content_mimetype(cur)
-        db.copy_to(mimetypes, 'tmp_content_mimetype',
-                   ['id', 'mimetype', 'encoding', 'indexer_configuration_id'],
-                   cur)
-        db.content_mimetype_add_from_temp(conflict_update, cur)
+        ...
 
     @remote_api_endpoint('content_mimetype')
-    @db_transaction_generator()
-    def content_mimetype_get(self, ids, db=None, cur=None):
+    def content_mimetype_get(self, ids):
         """Retrieve full content mimetype per ids.
 
         Args:
             ids (iterable): sha1 identifier
 
         Yields:
             mimetypes (iterable): dictionaries with keys:
 
                 - **id** (bytes): sha1 identifier
                 - **mimetype** (bytes): raw content's mimetype
                 - **encoding** (bytes): raw content's encoding
                 - **tool** (dict): Tool used to compute the language
 
         """
-        for c in db.content_mimetype_get_from_list(ids, cur):
-            yield converters.db_to_mimetype(
-                dict(zip(db.content_mimetype_cols, c)))
+        ...
 
     @remote_api_endpoint('content_language/missing')
-    @db_transaction_generator()
-    def content_language_missing(self, languages, db=None, cur=None):
+    def content_language_missing(self, languages):
         """List languages missing from storage.
 
         Args:
             languages (iterable): dictionaries with keys:
 
                 - **id** (bytes): sha1 identifier
                 - **indexer_configuration_id** (int): tool used to compute
                   the results
 
         Yields:
             an iterable of missing id for the tuple (id,
             indexer_configuration_id)
 
         """
-        for obj in db.content_language_missing_from_list(languages, cur):
-            yield obj[0]
+        ...
 
     @remote_api_endpoint('content_language')
-    @db_transaction_generator()
-    def content_language_get(self, ids, db=None, cur=None):
+    def content_language_get(self, ids):
         """Retrieve full content language per ids.
 
         Args:
             ids (iterable): sha1 identifier
 
         Yields:
             languages (iterable): dictionaries with keys:
 
                 - **id** (bytes): sha1 identifier
                 - **lang** (bytes): raw content's language
                 - **tool** (dict): Tool used to compute the language
 
         """
-        for c in db.content_language_get_from_list(ids, cur):
-            yield converters.db_to_language(
-                dict(zip(db.content_language_cols, c)))
+        ...
 
     @remote_api_endpoint('content_language/add')
-    @db_transaction()
-    def content_language_add(self, languages, conflict_update=False, db=None,
-                             cur=None):
+    def content_language_add(self, languages, conflict_update=False):
         """Add languages not present in storage.
 
         Args:
             languages (iterable): dictionaries with keys:
 
                 - **id** (bytes): sha1
                 - **lang** (bytes): language detected
 
             conflict_update (bool): Flag to determine if we want to
                 overwrite (true) or skip duplicates (false, the
                 default)
 
         """
-        _check_id_duplicates(languages)
-        languages.sort(key=lambda m: m['id'])
-        db.mktemp_content_language(cur)
-        # empty language is mapped to 'unknown'
-        db.copy_to(
-            ({
-                'id': l['id'],
-                'lang': 'unknown' if not l['lang'] else l['lang'],
-                'indexer_configuration_id': l['indexer_configuration_id'],
-            } for l in languages),
-            'tmp_content_language',
-            ['id', 'lang', 'indexer_configuration_id'], cur)
-
-        db.content_language_add_from_temp(conflict_update, cur)
+        ...
 
     @remote_api_endpoint('content/ctags/missing')
-    @db_transaction_generator()
-    def content_ctags_missing(self, ctags, db=None, cur=None):
+    def content_ctags_missing(self, ctags):
         """List ctags missing from storage.
 
         Args:
             ctags (iterable): dicts with keys:
 
                 - **id** (bytes): sha1 identifier
                 - **indexer_configuration_id** (int): tool used to compute
                   the results
 
         Yields:
             an iterable of missing id for the tuple (id,
             indexer_configuration_id)
 
         """
-        for obj in db.content_ctags_missing_from_list(ctags, cur):
-            yield obj[0]
+        ...
 
     @remote_api_endpoint('content/ctags')
-    @db_transaction_generator()
-    def content_ctags_get(self, ids, db=None, cur=None):
+    def content_ctags_get(self, ids):
         """Retrieve ctags per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             Dictionaries with keys:
 
                 - **id** (bytes): content's identifier
                 - **name** (str): symbol's name
                 - **kind** (str): symbol's kind
                 - **lang** (str): language for that content
                 - **tool** (dict): tool used to compute the ctags' info
 
 
         """
-        for c in db.content_ctags_get_from_list(ids, cur):
-            yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, c)))
+        ...
 
     @remote_api_endpoint('content/ctags/add')
-    @db_transaction()
-    def content_ctags_add(self, ctags, conflict_update=False, db=None,
-                          cur=None):
+    def content_ctags_add(self, ctags, conflict_update=False):
         """Add ctags not present in storage
 
         Args:
             ctags (iterable): dictionaries with keys:
 
                 - **id** (bytes): sha1
                 - **ctags** ([list): List of dictionary with keys: name, kind,
                   line, lang
 
         """
-        _check_id_duplicates(ctags)
-        ctags.sort(key=lambda m: m['id'])
-
-        def _convert_ctags(__ctags):
-            """Convert ctags dict to list of ctags.
-
-            """
-            for ctags in __ctags:
-                yield from converters.ctags_to_db(ctags)
-
-        db.mktemp_content_ctags(cur)
-        db.copy_to(list(_convert_ctags(ctags)),
-                   tblname='tmp_content_ctags',
-                   columns=['id', 'name', 'kind', 'line',
-                            'lang', 'indexer_configuration_id'],
-                   cur=cur)
-
-        db.content_ctags_add_from_temp(conflict_update, cur)
+        ...
 
     @remote_api_endpoint('content/ctags/search')
-    @db_transaction_generator()
     def content_ctags_search(self, expression,
-                             limit=10, last_sha1=None, db=None, cur=None):
+                             limit=10, last_sha1=None):
         """Search through content's raw ctags symbols.
 
         Args:
             expression (str): Expression to search for
             limit (int): Number of rows to return (default to 10).
             last_sha1 (str): Offset from which retrieving data (default to '').
 
         Yields:
             rows of ctags including id, name, lang, kind, line, etc...
 
         """
-        for obj in db.content_ctags_search(expression, last_sha1, limit,
-                                           cur=cur):
-            yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, obj)))
+        ...
 
     @remote_api_endpoint('content/fossology_license')
-    @db_transaction_generator()
-    def content_fossology_license_get(self, ids, db=None, cur=None):
+    def content_fossology_license_get(self, ids):
         """Retrieve licenses per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             dict: ``{id: facts}`` where ``facts`` is a dict with the
             following keys:
 
                 - **licenses** ([str]): associated licenses for that content
                 - **tool** (dict): Tool used to compute the license
 
         """
-        d = defaultdict(list)
-        for c in db.content_fossology_license_get_from_list(ids, cur):
-            license = dict(zip(db.content_fossology_license_cols, c))
-
-            id_ = license['id']
-            d[id_].append(converters.db_to_fossology_license(license))
-
-        for id_, facts in d.items():
-            yield {id_: facts}
+        ...
 
     @remote_api_endpoint('content/fossology_license/add')
-    @db_transaction()
-    def content_fossology_license_add(self, licenses, conflict_update=False,
-                                      db=None, cur=None):
+    def content_fossology_license_add(self, licenses, conflict_update=False):
         """Add licenses not present in storage.
 
         Args:
             licenses (iterable): dictionaries with keys:
 
                 - **id**: sha1
                 - **licenses** ([bytes]): List of licenses associated to sha1
                 - **tool** (str): nomossa
 
             conflict_update: Flag to determine if we want to overwrite (true)
                 or skip duplicates (false, the default)
 
         Returns:
             list: content_license entries which failed due to unknown licenses
 
         """
-        _check_id_duplicates(licenses)
-        licenses.sort(key=lambda m: m['id'])
-        db.mktemp_content_fossology_license(cur)
-        db.copy_to(
-            ({
-                'id': sha1['id'],
-                'indexer_configuration_id': sha1['indexer_configuration_id'],
-                'license': license,
-              } for sha1 in licenses
-                for license in sha1['licenses']),
-            tblname='tmp_content_fossology_license',
-            columns=['id', 'license', 'indexer_configuration_id'],
-            cur=cur)
-        db.content_fossology_license_add_from_temp(conflict_update, cur)
+        ...
 
     @remote_api_endpoint('content/fossology_license/range')
-    @db_transaction()
     def content_fossology_license_get_range(
             self, start, end, indexer_configuration_id,
-            limit=1000, db=None, cur=None):
+            limit=1000):
         """Retrieve licenses within range [start, end] bound by limit.
 
         Args:
             **start** (bytes): Starting identifier range (expected smaller
                            than end)
             **end** (bytes): Ending identifier range (expected larger
                              than start)
             **indexer_configuration_id** (int): The tool used to index data
             **limit** (int): Limit result (default to 1000)
 
         Raises:
             ValueError for limit to None
 
         Returns:
             a dict with keys:
             - **ids** [bytes]: iterable of content ids within the range.
             - **next** (Optional[bytes]): The next range of sha1 starts at
                                           this sha1 if any
 
         """
-        return self._content_get_range('fossology_license', start, end,
-                                       indexer_configuration_id, limit=limit,
-                                       with_textual_data=True, db=db, cur=cur)
+        ...
 
     @remote_api_endpoint('content_metadata/missing')
-    @db_transaction_generator()
-    def content_metadata_missing(self, metadata, db=None, cur=None):
+    def content_metadata_missing(self, metadata):
         """List metadata missing from storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
                 - **id** (bytes): sha1 identifier
                 - **indexer_configuration_id** (int): tool used to compute
                   the results
 
         Yields:
             missing sha1s
 
         """
-        for obj in db.content_metadata_missing_from_list(metadata, cur):
-            yield obj[0]
+        ...
 
     @remote_api_endpoint('content_metadata')
-    @db_transaction_generator()
-    def content_metadata_get(self, ids, db=None, cur=None):
+    def content_metadata_get(self, ids):
         """Retrieve metadata per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             dictionaries with the following keys:
 
                 id (bytes)
                 metadata (str): associated metadata
                 tool (dict): tool used to compute metadata
 
         """
-        for c in db.content_metadata_get_from_list(ids, cur):
-            yield converters.db_to_metadata(
-                dict(zip(db.content_metadata_cols, c)))
+        ...
 
     @remote_api_endpoint('content_metadata/add')
-    @db_transaction()
-    def content_metadata_add(self, metadata, conflict_update=False, db=None,
-                             cur=None):
+    def content_metadata_add(self, metadata, conflict_update=False):
         """Add metadata not present in storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
                 - **id**: sha1
                 - **metadata**: arbitrary dict
 
             conflict_update: Flag to determine if we want to overwrite (true)
                 or skip duplicates (false, the default)
 
         """
-        _check_id_duplicates(metadata)
-        metadata.sort(key=lambda m: m['id'])
-
-        db.mktemp_content_metadata(cur)
-
-        db.copy_to(metadata, 'tmp_content_metadata',
-                   ['id', 'metadata', 'indexer_configuration_id'],
-                   cur)
-        db.content_metadata_add_from_temp(conflict_update, cur)
+        ...
 
     @remote_api_endpoint('revision_intrinsic_metadata/missing')
-    @db_transaction_generator()
-    def revision_intrinsic_metadata_missing(self, metadata, db=None, cur=None):
+    def revision_intrinsic_metadata_missing(self, metadata):
         """List metadata missing from storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
                - **id** (bytes): sha1_git revision identifier
                - **indexer_configuration_id** (int): tool used to compute
                  the results
 
         Yields:
             missing ids
 
         """
-        for obj in db.revision_intrinsic_metadata_missing_from_list(
-                metadata, cur):
-            yield obj[0]
+        ...
 
     @remote_api_endpoint('revision_intrinsic_metadata')
-    @db_transaction_generator()
-    def revision_intrinsic_metadata_get(self, ids, db=None, cur=None):
+    def revision_intrinsic_metadata_get(self, ids):
         """Retrieve revision metadata per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             : dictionaries with the following keys:
 
                 - **id** (bytes)
                 - **metadata** (str): associated metadata
                 - **tool** (dict): tool used to compute metadata
                 - **mappings** (List[str]): list of mappings used to translate
                   these metadata
 
         """
-        for c in db.revision_intrinsic_metadata_get_from_list(ids, cur):
-            yield converters.db_to_metadata(
-                dict(zip(db.revision_intrinsic_metadata_cols, c)))
+        ...
 
     @remote_api_endpoint('revision_intrinsic_metadata/add')
-    @db_transaction()
-    def revision_intrinsic_metadata_add(self, metadata, conflict_update=False,
-                                        db=None, cur=None):
+    def revision_intrinsic_metadata_add(self, metadata, conflict_update=False):
         """Add metadata not present in storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
                 - **id**: sha1_git of revision
                 - **metadata**: arbitrary dict
                 - **indexer_configuration_id**: tool used to compute metadata
                 - **mappings** (List[str]): list of mappings used to translate
                   these metadata
 
             conflict_update: Flag to determine if we want to overwrite (true)
               or skip duplicates (false, the default)
 
         """
-        _check_id_duplicates(metadata)
-        metadata.sort(key=lambda m: m['id'])
-
-        db.mktemp_revision_intrinsic_metadata(cur)
-
-        db.copy_to(metadata, 'tmp_revision_intrinsic_metadata',
-                   ['id', 'metadata', 'mappings',
-                    'indexer_configuration_id'],
-                   cur)
-        db.revision_intrinsic_metadata_add_from_temp(conflict_update, cur)
+        ...
 
     @remote_api_endpoint('revision_intrinsic_metadata/delete')
-    @db_transaction()
-    def revision_intrinsic_metadata_delete(self, entries, db=None, cur=None):
+    def revision_intrinsic_metadata_delete(self, entries):
         """Remove revision metadata from the storage.
 
         Args:
             entries (dict): dictionaries with the following keys:
 
                 - **id** (bytes): revision identifier
                 - **indexer_configuration_id** (int): tool used to compute
                   metadata
         """
-        db.revision_intrinsic_metadata_delete(entries, cur)
+        ...
 
     @remote_api_endpoint('origin_intrinsic_metadata')
-    @db_transaction_generator()
-    def origin_intrinsic_metadata_get(self, ids, db=None, cur=None):
+    def origin_intrinsic_metadata_get(self, ids):
         """Retrieve origin metadata per id.
 
         Args:
             ids (iterable): origin identifiers
 
         Yields:
             list: dictionaries with the following keys:
 
                 - **id** (str): origin url
                 - **from_revision** (bytes): which revision this metadata
                   was extracted from
                 - **metadata** (str): associated metadata
                 - **tool** (dict): tool used to compute metadata
                 - **mappings** (List[str]): list of mappings used to translate
                   these metadata
 
         """
-        for c in db.origin_intrinsic_metadata_get_from_list(ids, cur):
-            yield converters.db_to_metadata(
-                dict(zip(db.origin_intrinsic_metadata_cols, c)))
+        ...
 
     @remote_api_endpoint('origin_intrinsic_metadata/add')
-    @db_transaction()
     def origin_intrinsic_metadata_add(self, metadata,
-                                      conflict_update=False, db=None,
-                                      cur=None):
+                                      conflict_update=False):
         """Add origin metadata not present in storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
                 - **id**: origin urls
                 - **from_revision**: sha1 id of the revision used to generate
                   these metadata.
                 - **metadata**: arbitrary dict
                 - **indexer_configuration_id**: tool used to compute metadata
                 - **mappings** (List[str]): list of mappings used to translate
                   these metadata
 
             conflict_update: Flag to determine if we want to overwrite (true)
               or skip duplicates (false, the default)
 
         """
-        _check_id_duplicates(metadata)
-        metadata.sort(key=lambda m: m['id'])
-
-        db.mktemp_origin_intrinsic_metadata(cur)
-
-        db.copy_to(metadata, 'tmp_origin_intrinsic_metadata',
-                   ['id', 'metadata',
-                    'indexer_configuration_id',
-                    'from_revision', 'mappings'],
-                   cur)
-        db.origin_intrinsic_metadata_add_from_temp(conflict_update, cur)
+        ...
 
     @remote_api_endpoint('origin_intrinsic_metadata/delete')
-    @db_transaction()
     def origin_intrinsic_metadata_delete(
-            self, entries, db=None, cur=None):
+            self, entries):
         """Remove origin metadata from the storage.
 
         Args:
             entries (dict): dictionaries with the following keys:
 
                 - **id** (str): origin urls
                 - **indexer_configuration_id** (int): tool used to compute
                   metadata
         """
-        db.origin_intrinsic_metadata_delete(entries, cur)
+        ...
 
     @remote_api_endpoint('origin_intrinsic_metadata/search/fulltext')
-    @db_transaction_generator()
     def origin_intrinsic_metadata_search_fulltext(
-            self, conjunction, limit=100, db=None, cur=None):
+            self, conjunction, limit=100):
         """Returns the list of origins whose metadata contain all the terms.
 
         Args:
             conjunction (List[str]): List of terms to be searched for.
             limit (int): The maximum number of results to return
 
         Yields:
             list: dictionaries with the following keys:
 
                 - **id** (str): origin urls
                 - **from_revision**: sha1 id of the revision used to generate
                   these metadata.
                 - **metadata** (str): associated metadata
                 - **tool** (dict): tool used to compute metadata
                 - **mappings** (List[str]): list of mappings used to translate
                   these metadata
 
         """
-        for c in db.origin_intrinsic_metadata_search_fulltext(
-                conjunction, limit=limit, cur=cur):
-            yield converters.db_to_metadata(
-                dict(zip(db.origin_intrinsic_metadata_cols, c)))
+        ...
 
     @remote_api_endpoint('origin_intrinsic_metadata/search/by_producer')
-    @db_transaction()
     def origin_intrinsic_metadata_search_by_producer(
             self, page_token='', limit=100, ids_only=False,
-            mappings=None, tool_ids=None,
-            db=None, cur=None):
+            mappings=None, tool_ids=None):
         """Returns the list of origins whose metadata contain all the terms.
 
         Args:
             page_token (str): Opaque token used for pagination.
             limit (int): The maximum number of results to return
             ids_only (bool): Determines whether only origin urls are
                 returned or the content as well
             mappings (List[str]): Returns origins whose intrinsic metadata
                 were generated using at least one of these mappings.
 
         Returns:
             dict: dict with the following keys:
               - **next_page_token** (str, optional): opaque token to be used as
                 `page_token` for retrieving the next page. If absent, there is
                 no more pages to gather.
               - **origins** (list): list of origin url (str) if `ids_only=True`
                 else dictionaries with the following keys:
 
                 - **id** (str): origin urls
                 - **from_revision**: sha1 id of the revision used to generate
                   these metadata.
                 - **metadata** (str): associated metadata
                 - **tool** (dict): tool used to compute metadata
                 - **mappings** (List[str]): list of mappings used to translate
                   these metadata
 
         """
-        assert isinstance(page_token, str)
-        # we go to limit+1 to check whether we should add next_page_token in
-        # the response
-        res = db.origin_intrinsic_metadata_search_by_producer(
-            page_token, limit + 1, ids_only, mappings, tool_ids, cur)
-        result = {}
-        if ids_only:
-            result['origins'] = [origin for (origin,) in res]
-            if len(result['origins']) > limit:
-                result['origins'][limit:] = []
-                result['next_page_token'] = result['origins'][-1]
-        else:
-            result['origins'] = [converters.db_to_metadata(
-                dict(zip(db.origin_intrinsic_metadata_cols, c)))for c in res]
-            if len(result['origins']) > limit:
-                result['origins'][limit:] = []
-                result['next_page_token'] = result['origins'][-1]['id']
-        return result
+        ...
 
     @remote_api_endpoint('origin_intrinsic_metadata/stats')
-    @db_transaction()
     def origin_intrinsic_metadata_stats(
-            self, db=None, cur=None):
+            self):
         """Returns counts of indexed metadata per origins, broken down
         into metadata types.
 
         Returns:
             dict: dictionary with keys:
 
                 - total (int): total number of origins that were indexed
                   (possibly yielding an empty metadata dictionary)
                 - non_empty (int): total number of origins that we extracted
                   a non-empty metadata dictionary from
                 - per_mapping (dict): a dictionary with mapping names as
                   keys and number of origins whose indexing used this
                   mapping. Note that indexing a given origin may use
                   0, 1, or many mappings.
         """
-        mapping_names = [m for m in MAPPING_NAMES]
-        select_parts = []
-
-        # Count rows for each mapping
-        for mapping_name in mapping_names:
-            select_parts.append((
-                "sum(case when (mappings @> ARRAY['%s']) "
-                "         then 1 else 0 end)"
-                ) % mapping_name)
-
-        # Total
-        select_parts.append("sum(1)")
-
-        # Rows whose metadata has at least one key that is not '@context'
-        select_parts.append(
-            "sum(case when ('{}'::jsonb @> (metadata - '@context')) "
-            "         then 0 else 1 end)")
-        cur.execute('select ' + ', '.join(select_parts)
-                    + ' from origin_intrinsic_metadata')
-        results = dict(zip(mapping_names + ['total', 'non_empty'],
-                           cur.fetchone()))
-        return {
-            'total': results.pop('total'),
-            'non_empty': results.pop('non_empty'),
-            'per_mapping': results,
-        }
+        ...
 
     @remote_api_endpoint('indexer_configuration/add')
-    @db_transaction_generator()
-    def indexer_configuration_add(self, tools, db=None, cur=None):
+    def indexer_configuration_add(self, tools):
         """Add new tools to the storage.
 
         Args:
             tools ([dict]): List of dictionary representing tool to
                 insert in the db. Dictionary with the following keys:
 
                 - **tool_name** (str): tool's name
                 - **tool_version** (str): tool's version
                 - **tool_configuration** (dict): tool's configuration
                   (free form dict)
 
         Returns:
             List of dict inserted in the db (holding the id key as
             well).  The order of the list is not guaranteed to match
             the order of the initial list.
 
         """
-        db.mktemp_indexer_configuration(cur)
-        db.copy_to(tools, 'tmp_indexer_configuration',
-                   ['tool_name', 'tool_version', 'tool_configuration'],
-                   cur)
-
-        tools = db.indexer_configuration_add_from_temp(cur)
-        for line in tools:
-            yield dict(zip(db.indexer_configuration_cols, line))
+        ...
 
     @remote_api_endpoint('indexer_configuration/data')
-    @db_transaction()
-    def indexer_configuration_get(self, tool, db=None, cur=None):
+    def indexer_configuration_get(self, tool):
         """Retrieve tool information.
 
         Args:
             tool (dict): Dictionary representing a tool with the
                 following keys:
 
                 - **tool_name** (str): tool's name
                 - **tool_version** (str): tool's version
                 - **tool_configuration** (dict): tool's configuration
                   (free form dict)
 
         Returns:
             The same dictionary with an `id` key, None otherwise.
 
         """
-        tool_conf = tool['tool_configuration']
-        if isinstance(tool_conf, dict):
-            tool_conf = json.dumps(tool_conf)
-        idx = db.indexer_configuration_get(tool['tool_name'],
-                                           tool['tool_version'],
-                                           tool_conf)
-        if not idx:
-            return None
-        return dict(zip(db.indexer_configuration_cols, idx))
+        ...
diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py
index c6dbc94..3218dc2 100644
--- a/swh/indexer/tests/storage/test_storage.py
+++ b/swh/indexer/tests/storage/test_storage.py
@@ -1,1855 +1,1888 @@
 # Copyright (C) 2015-2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import inspect
 import threading
+
 import pytest
+
 from swh.model.hashutil import hash_to_bytes
 
+from swh.indexer.storage.interface import IndexerStorageInterface
+
 
 def prepare_mimetypes_from(fossology_licenses):
     """Fossology license needs some consistent data in db to run.
 
     """
     mimetypes = []
     for c in fossology_licenses:
         mimetypes.append({
             'id': c['id'],
             'mimetype': 'text/plain',
             'encoding': 'utf-8',
             'indexer_configuration_id': c['indexer_configuration_id'],
         })
     return mimetypes
 
 
 def endpoint(storage, endpoint_type, endpoint_name):
     return getattr(storage, endpoint_type + '_' + endpoint_name)
 
 
 class StorageETypeTester:
     """Base class for testing a series of common behaviour between a bunch of
     endpoint types supported by an IndexerStorage.
 
     This is supposed to be inherited with the following class attributes:
     - endpoint_type
     - tool_name
     - example_data
 
     See below for example usage.
     """
 
     def test_missing(self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         etype = self.endpoint_type
         tool_id = data.tools[self.tool_name]['id']
 
         # given 2 (hopefully) unknown objects
         query = [
             {
                 'id': data.sha1_1,
                 'indexer_configuration_id': tool_id,
             },
             {
                 'id': data.sha1_2,
                 'indexer_configuration_id': tool_id,
             }]
 
         # we expect these are both returned by the xxx_missing endpoint
         actual_missing = endpoint(storage, etype, 'missing')(query)
         assert list(actual_missing) == [
             data.sha1_1,
             data.sha1_2,
         ]
 
         # now, when we add one of them
         endpoint(storage, etype, 'add')([{
             'id': data.sha1_2,
             **self.example_data[0],
             'indexer_configuration_id': tool_id,
         }])
 
         # we expect only the other one returned
         actual_missing = endpoint(storage, etype, 'missing')(query)
         assert list(actual_missing) == [data.sha1_1]
 
     def test_add__drop_duplicate(self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         etype = self.endpoint_type
         tool_id = data.tools[self.tool_name]['id']
 
         # add the first object
         data_v1 = {
             'id': data.sha1_2,
             **self.example_data[0],
             'indexer_configuration_id': tool_id,
         }
         endpoint(storage, etype, 'add')([data_v1])
 
         # should be able to retrieve it
         actual_data = list(endpoint(storage, etype, 'get')([data.sha1_2]))
         expected_data_v1 = [{
             'id': data.sha1_2,
             **self.example_data[0],
             'tool': data.tools[self.tool_name],
         }]
         assert actual_data == expected_data_v1
 
         # now if we add a modified version of the same object (same id)
         data_v2 = data_v1.copy()
         data_v2.update(self.example_data[1])
         endpoint(storage, etype, 'add')([data_v2])
 
         # we expect to retrieve the original data, not the modified one
         actual_data = list(endpoint(storage, etype, 'get')([data.sha1_2]))
         assert actual_data == expected_data_v1
 
     def test_add__update_in_place_duplicate(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         etype = self.endpoint_type
         tool = data.tools[self.tool_name]
 
         data_v1 = {
             'id': data.sha1_2,
             **self.example_data[0],
             'indexer_configuration_id': tool['id'],
         }
 
         # given
         endpoint(storage, etype, 'add')([data_v1])
 
         # when
         actual_data = list(endpoint(storage, etype, 'get')([data.sha1_2]))
 
         expected_data_v1 = [{
             'id': data.sha1_2,
             **self.example_data[0],
             'tool': tool,
         }]
 
         # then
         assert actual_data == expected_data_v1
 
         # given
         data_v2 = data_v1.copy()
         data_v2.update(self.example_data[1])
 
         endpoint(storage, etype, 'add')([data_v2], conflict_update=True)
 
         actual_data = list(endpoint(storage, etype, 'get')([data.sha1_2]))
 
         expected_data_v2 = [{
             'id': data.sha1_2,
             **self.example_data[1],
             'tool': tool,
         }]
 
         # data did change as the v2 was used to overwrite v1
         assert actual_data == expected_data_v2
 
     def test_add__update_in_place_deadlock(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         etype = self.endpoint_type
         tool = data.tools[self.tool_name]
 
         hashes = [
             hash_to_bytes(
                 '34973274ccef6ab4dfaaf86599792fa9c3fe4{:03d}'.format(i))
             for i in range(1000)]
 
         data_v1 = [
             {
                 'id': hash_,
                 **self.example_data[0],
                 'indexer_configuration_id': tool['id'],
             }
             for hash_ in hashes
         ]
         data_v2 = [
             {
                 'id': hash_,
                 **self.example_data[1],
                 'indexer_configuration_id': tool['id'],
             }
             for hash_ in hashes
         ]
 
         # Remove one item from each, so that both queries have to succeed for
         # all items to be in the DB.
         data_v2a = data_v2[1:]
         data_v2b = list(reversed(data_v2[0:-1]))
 
         # given
         endpoint(storage, etype, 'add')(data_v1)
 
         # when
         actual_data = list(endpoint(storage, etype, 'get')(hashes))
 
         expected_data_v1 = [
             {
                 'id': hash_,
                 **self.example_data[0],
                 'tool': tool,
             }
             for hash_ in hashes
         ]
 
         # then
         assert actual_data == expected_data_v1
 
         # given
         def f1():
             endpoint(storage, etype, 'add')(data_v2a, conflict_update=True)
 
         def f2():
             endpoint(storage, etype, 'add')(data_v2b, conflict_update=True)
 
         t1 = threading.Thread(target=f1)
         t2 = threading.Thread(target=f2)
         t2.start()
         t1.start()
 
         t1.join()
         t2.join()
 
         actual_data = sorted(endpoint(storage, etype, 'get')(hashes),
                              key=lambda x: x['id'])
 
         expected_data_v2 = [
             {
                 'id': hash_,
                 **self.example_data[1],
                 'tool': tool,
             }
             for hash_ in hashes
         ]
 
         assert actual_data == expected_data_v2
 
     def test_add__duplicate_twice(self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         etype = self.endpoint_type
         tool = data.tools[self.tool_name]
 
         data_rev1 = {
             'id': data.revision_id_2,
             **self.example_data[0],
             'indexer_configuration_id': tool['id']
         }
 
         data_rev2 = {
             'id': data.revision_id_2,
             **self.example_data[1],
             'indexer_configuration_id': tool['id']
         }
 
         # when
         endpoint(storage, etype, 'add')([data_rev1])
 
         with pytest.raises(ValueError):
             endpoint(storage, etype, 'add')(
                 [data_rev2, data_rev2],
                 conflict_update=True)
 
         # then
         actual_data = list(endpoint(storage, etype, 'get')(
             [data.revision_id_2, data.revision_id_1]))
 
         expected_data = [{
             'id': data.revision_id_2,
             **self.example_data[0],
             'tool': tool,
         }]
         assert actual_data == expected_data
 
     def test_get(self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         etype = self.endpoint_type
         tool = data.tools[self.tool_name]
 
         query = [data.sha1_2, data.sha1_1]
         data1 = {
             'id': data.sha1_2,
             **self.example_data[0],
             'indexer_configuration_id': tool['id'],
         }
 
         # when
         endpoint(storage, etype, 'add')([data1])
 
         # then
         actual_data = list(endpoint(storage, etype, 'get')(query))
 
         # then
         expected_data = [{
             'id': data.sha1_2,
             **self.example_data[0],
             'tool': tool,
         }]
 
         assert actual_data == expected_data
 
 
 class TestIndexerStorageContentMimetypes(StorageETypeTester):
     """Test Indexer Storage content_mimetype related methods
     """
     endpoint_type = 'content_mimetype'
     tool_name = 'file'
     example_data = [
         {
             'mimetype': 'text/plain',
             'encoding': 'utf-8',
         },
         {
             'mimetype': 'text/html',
             'encoding': 'us-ascii',
         },
         ]
 
     def test_generate_content_mimetype_get_range_limit_none(
             self, swh_indexer_storage):
         """mimetype_get_range call with wrong limit input should fail"""
         storage = swh_indexer_storage
         with pytest.raises(ValueError) as e:
             storage.content_mimetype_get_range(
                 start=None, end=None, indexer_configuration_id=None,
                 limit=None)
 
         assert e.value.args == (
             'Development error: limit should not be None',)
 
     def test_generate_content_mimetype_get_range_no_limit(
             self, swh_indexer_storage_with_data):
         """mimetype_get_range returns mimetypes within range provided"""
         storage, data = swh_indexer_storage_with_data
         mimetypes = data.mimetypes
 
         # All ids from the db
         content_ids = sorted([c['id'] for c in mimetypes])
 
         start = content_ids[0]
         end = content_ids[-1]
 
         # retrieve mimetypes
         tool_id = mimetypes[0]['indexer_configuration_id']
         actual_result = storage.content_mimetype_get_range(
             start, end, indexer_configuration_id=tool_id)
 
         actual_ids = actual_result['ids']
         actual_next = actual_result['next']
 
         assert len(mimetypes) == len(actual_ids)
         assert actual_next is None
         assert content_ids == actual_ids
 
     def test_generate_content_mimetype_get_range_limit(
             self, swh_indexer_storage_with_data):
         """mimetype_get_range paginates results if limit exceeded"""
         storage, data = swh_indexer_storage_with_data
 
         indexer_configuration_id = data.tools['file']['id']
 
         # input the list of sha1s we want from storage
         content_ids = sorted(
             [c['id'] for c in data.mimetypes])
         mimetypes = list(storage.content_mimetype_get(content_ids))
         assert len(mimetypes) == len(data.mimetypes)
 
         start = content_ids[0]
         end = content_ids[-1]
         # retrieve mimetypes limited to 10 results
         actual_result = storage.content_mimetype_get_range(
             start, end,
             indexer_configuration_id=indexer_configuration_id,
             limit=10)
 
         assert actual_result
         assert set(actual_result.keys()) == {'ids', 'next'}
         actual_ids = actual_result['ids']
         actual_next = actual_result['next']
 
         assert len(actual_ids) == 10
         assert actual_next is not None
         assert actual_next == content_ids[10]
 
         expected_mimetypes = content_ids[:10]
         assert expected_mimetypes == actual_ids
 
         # retrieve next part
         actual_result = storage.content_mimetype_get_range(
             start=end, end=end,
             indexer_configuration_id=indexer_configuration_id)
         assert set(actual_result.keys()) == {'ids', 'next'}
         actual_ids = actual_result['ids']
         actual_next = actual_result['next']
 
         assert actual_next is None
         expected_mimetypes = [content_ids[-1]]
         assert expected_mimetypes == actual_ids
 
 
 class TestIndexerStorageContentLanguage(StorageETypeTester):
     """Test Indexer Storage content_language related methods
     """
     endpoint_type = 'content_language'
     tool_name = 'pygments'
     example_data = [
         {
             'lang': 'haskell',
         },
         {
             'lang': 'common-lisp',
         },
         ]
 
 
 class TestIndexerStorageContentCTags(StorageETypeTester):
     """Test Indexer Storage content_ctags related methods
     """
     endpoint_type = 'content_ctags'
     tool_name = 'universal-ctags'
     example_data = [
         {
             'ctags': [{
                 'name': 'done',
                 'kind': 'variable',
                 'line': 119,
                 'lang': 'OCaml',
             }]
         },
         {
             'ctags': [
                 {
                     'name': 'done',
                     'kind': 'variable',
                     'line': 100,
                     'lang': 'Python',
                 },
                 {
                     'name': 'main',
                     'kind': 'function',
                     'line': 119,
                     'lang': 'Python',
                 }]
         },
         ]
 
     # the following tests are disabled because CTAGS behaves differently
     @pytest.mark.skip
     def test_add__drop_duplicate(self):
         pass
 
     @pytest.mark.skip
     def test_add__update_in_place_duplicate(self):
         pass
 
     @pytest.mark.skip
     def test_add__update_in_place_deadlock(self):
         pass
 
     @pytest.mark.skip
     def test_add__duplicate_twice(self):
         pass
 
     @pytest.mark.skip
     def test_get(self):
         pass
 
     def test_content_ctags_search(self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         # 1. given
         tool = data.tools['universal-ctags']
         tool_id = tool['id']
 
         ctag1 = {
             'id': data.sha1_1,
             'indexer_configuration_id': tool_id,
             'ctags': [
                 {
                     'name': 'hello',
                     'kind': 'function',
                     'line': 133,
                     'lang': 'Python',
                 },
                 {
                     'name': 'counter',
                     'kind': 'variable',
                     'line': 119,
                     'lang': 'Python',
                 },
                 {
                     'name': 'hello',
                     'kind': 'variable',
                     'line': 210,
                     'lang': 'Python',
                 },
             ]
         }
 
         ctag2 = {
             'id': data.sha1_2,
             'indexer_configuration_id': tool_id,
             'ctags': [
                 {
                     'name': 'hello',
                     'kind': 'variable',
                     'line': 100,
                     'lang': 'C',
                 },
                 {
                     'name': 'result',
                     'kind': 'variable',
                     'line': 120,
                     'lang': 'C',
                 },
             ]
         }
 
         storage.content_ctags_add([ctag1, ctag2])
 
         # 1. when
         actual_ctags = list(storage.content_ctags_search('hello', limit=1))
 
         # 1. then
         assert actual_ctags == [
             {
                 'id': ctag1['id'],
                 'tool': tool,
                 'name': 'hello',
                 'kind': 'function',
                 'line': 133,
                 'lang': 'Python',
             }
         ]
 
         # 2. when
         actual_ctags = list(storage.content_ctags_search(
             'hello',
             limit=1,
             last_sha1=ctag1['id']))
 
         # 2. then
         assert actual_ctags == [
             {
                 'id': ctag2['id'],
                 'tool': tool,
                 'name': 'hello',
                 'kind': 'variable',
                 'line': 100,
                 'lang': 'C',
             }
         ]
 
         # 3. when
         actual_ctags = list(storage.content_ctags_search('hello'))
 
         # 3. then
         assert actual_ctags == [
             {
                 'id': ctag1['id'],
                 'tool': tool,
                 'name': 'hello',
                 'kind': 'function',
                 'line': 133,
                 'lang': 'Python',
             },
             {
                 'id': ctag1['id'],
                 'tool': tool,
                 'name': 'hello',
                 'kind': 'variable',
                 'line': 210,
                 'lang': 'Python',
             },
             {
                 'id': ctag2['id'],
                 'tool': tool,
                 'name': 'hello',
                 'kind': 'variable',
                 'line': 100,
                 'lang': 'C',
             },
         ]
 
         # 4. when
         actual_ctags = list(storage.content_ctags_search('counter'))
 
         # then
         assert actual_ctags == [{
             'id': ctag1['id'],
             'tool': tool,
             'name': 'counter',
             'kind': 'variable',
             'line': 119,
             'lang': 'Python',
         }]
 
         # 5. when
         actual_ctags = list(storage.content_ctags_search('result', limit=1))
 
         # then
         assert actual_ctags == [{
             'id': ctag2['id'],
             'tool': tool,
             'name': 'result',
             'kind': 'variable',
             'line': 120,
             'lang': 'C',
         }]
 
     def test_content_ctags_search_no_result(self, swh_indexer_storage):
         storage = swh_indexer_storage
         actual_ctags = list(storage.content_ctags_search('counter'))
 
         assert not actual_ctags
 
     def test_content_ctags_add__add_new_ctags_added(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
 
         # given
         tool = data.tools['universal-ctags']
         tool_id = tool['id']
 
         ctag_v1 = {
             'id': data.sha1_2,
             'indexer_configuration_id': tool_id,
             'ctags': [{
                 'name': 'done',
                 'kind': 'variable',
                 'line': 100,
                 'lang': 'Scheme',
             }]
         }
 
         # given
         storage.content_ctags_add([ctag_v1])
         storage.content_ctags_add([ctag_v1])  # conflict does nothing
 
         # when
         actual_ctags = list(storage.content_ctags_get([data.sha1_2]))
 
         # then
         expected_ctags = [{
             'id': data.sha1_2,
             'name': 'done',
             'kind': 'variable',
             'line': 100,
             'lang': 'Scheme',
             'tool': tool,
         }]
 
         assert actual_ctags == expected_ctags
 
         # given
         ctag_v2 = ctag_v1.copy()
         ctag_v2.update({
             'ctags': [
                 {
                     'name': 'defn',
                     'kind': 'function',
                     'line': 120,
                     'lang': 'Scheme',
                 }
             ]
         })
 
         storage.content_ctags_add([ctag_v2])
 
         expected_ctags = [
             {
                 'id': data.sha1_2,
                 'name': 'done',
                 'kind': 'variable',
                 'line': 100,
                 'lang': 'Scheme',
                 'tool': tool,
             }, {
                 'id': data.sha1_2,
                 'name': 'defn',
                 'kind': 'function',
                 'line': 120,
                 'lang': 'Scheme',
                 'tool': tool,
             }
         ]
 
         actual_ctags = list(storage.content_ctags_get(
             [data.sha1_2]))
 
         assert actual_ctags == expected_ctags
 
     def test_content_ctags_add__update_in_place(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         # given
         tool = data.tools['universal-ctags']
         tool_id = tool['id']
 
         ctag_v1 = {
             'id': data.sha1_2,
             'indexer_configuration_id': tool_id,
             'ctags': [{
                 'name': 'done',
                 'kind': 'variable',
                 'line': 100,
                 'lang': 'Scheme',
             }]
         }
 
         # given
         storage.content_ctags_add([ctag_v1])
 
         # when
         actual_ctags = list(storage.content_ctags_get(
             [data.sha1_2]))
 
         # then
         expected_ctags = [
             {
                 'id': data.sha1_2,
                 'name': 'done',
                 'kind': 'variable',
                 'line': 100,
                 'lang': 'Scheme',
                 'tool': tool
             }
         ]
         assert actual_ctags == expected_ctags
 
         # given
         ctag_v2 = ctag_v1.copy()
         ctag_v2.update({
             'ctags': [
                 {
                     'name': 'done',
                     'kind': 'variable',
                     'line': 100,
                     'lang': 'Scheme',
                 },
                 {
                     'name': 'defn',
                     'kind': 'function',
                     'line': 120,
                     'lang': 'Scheme',
                 }
             ]
         })
 
         storage.content_ctags_add([ctag_v2], conflict_update=True)
 
         actual_ctags = list(storage.content_ctags_get(
             [data.sha1_2]))
 
         # ctag did change as the v2 was used to overwrite v1
         expected_ctags = [
             {
                 'id': data.sha1_2,
                 'name': 'done',
                 'kind': 'variable',
                 'line': 100,
                 'lang': 'Scheme',
                 'tool': tool,
             },
             {
                 'id': data.sha1_2,
                 'name': 'defn',
                 'kind': 'function',
                 'line': 120,
                 'lang': 'Scheme',
                 'tool': tool,
             }
         ]
         assert actual_ctags == expected_ctags
 
 
 class TestIndexerStorageContentMetadata(StorageETypeTester):
     """Test Indexer Storage content_metadata related methods
     """
     tool_name = 'swh-metadata-detector'
     endpoint_type = 'content_metadata'
     example_data = [
         {
             'metadata': {
                 'other': {},
                 'codeRepository': {
                     'type': 'git',
                     'url': 'https://github.com/moranegg/metadata_test'
                 },
                 'description': 'Simple package.json test for indexer',
                 'name': 'test_metadata',
                 'version': '0.0.1'
             },
         },
         {
             'metadata': {
                 'other': {},
                 'name': 'test_metadata',
                 'version': '0.0.1'
             },
         },
         ]
 
 
 class TestIndexerStorageRevisionIntrinsicMetadata(StorageETypeTester):
     """Test Indexer Storage revision_intrinsic_metadata related methods
     """
     tool_name = 'swh-metadata-detector'
     endpoint_type = 'revision_intrinsic_metadata'
     example_data = [
         {
             'metadata': {
                 'other': {},
                 'codeRepository': {
                     'type': 'git',
                     'url': 'https://github.com/moranegg/metadata_test'
                 },
                 'description': 'Simple package.json test for indexer',
                 'name': 'test_metadata',
                 'version': '0.0.1'
             },
             'mappings': ['mapping1'],
         },
         {
             'metadata': {
                 'other': {},
                 'name': 'test_metadata',
                 'version': '0.0.1'
             },
             'mappings': ['mapping2'],
         },
         ]
 
     def test_revision_intrinsic_metadata_delete(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         etype = self.endpoint_type
         tool = data.tools[self.tool_name]
 
         query = [data.sha1_2, data.sha1_1]
         data1 = {
             'id': data.sha1_2,
             **self.example_data[0],
             'indexer_configuration_id': tool['id'],
         }
 
         # when
         endpoint(storage, etype, 'add')([data1])
         endpoint(storage, etype, 'delete')([
             {
                 'id': data.sha1_2,
                 'indexer_configuration_id': tool['id'],
             }
         ])
 
         # then
         actual_data = list(endpoint(storage, etype, 'get')(query))
 
         # then
         assert not actual_data
 
     def test_revision_intrinsic_metadata_delete_nonexisting(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         etype = self.endpoint_type
         tool = data.tools[self.tool_name]
         endpoint(storage, etype, 'delete')([
             {
                 'id': data.sha1_2,
                 'indexer_configuration_id': tool['id'],
             }
         ])
 
 
 class TestIndexerStorageContentFossologyLicence:
     def test_content_fossology_license_add__new_license_added(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         # given
         tool = data.tools['nomos']
         tool_id = tool['id']
 
         license_v1 = {
             'id': data.sha1_1,
             'licenses': ['Apache-2.0'],
             'indexer_configuration_id': tool_id,
         }
 
         # given
         storage.content_fossology_license_add([license_v1])
         # conflict does nothing
         storage.content_fossology_license_add([license_v1])
 
         # when
         actual_licenses = list(storage.content_fossology_license_get(
             [data.sha1_1]))
 
         # then
         expected_license = {
             data.sha1_1: [{
                 'licenses': ['Apache-2.0'],
                 'tool': tool,
             }]
         }
         assert actual_licenses == [expected_license]
 
         # given
         license_v2 = license_v1.copy()
         license_v2.update({
             'licenses': ['BSD-2-Clause'],
         })
 
         storage.content_fossology_license_add([license_v2])
 
         actual_licenses = list(storage.content_fossology_license_get(
             [data.sha1_1]))
 
         expected_license = {
             data.sha1_1: [{
                 'licenses': ['Apache-2.0', 'BSD-2-Clause'],
                 'tool': tool
             }]
         }
 
         # license did not change as the v2 was dropped.
         assert actual_licenses == [expected_license]
 
     def test_generate_content_fossology_license_get_range_limit_none(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         """license_get_range call with wrong limit input should fail"""
         with pytest.raises(ValueError) as e:
             storage.content_fossology_license_get_range(
                 start=None, end=None, indexer_configuration_id=None,
                 limit=None)
 
         assert e.value.args == (
             'Development error: limit should not be None',)
 
     def test_generate_content_fossology_license_get_range_no_limit(
             self, swh_indexer_storage_with_data):
         """license_get_range returns licenses within range provided"""
         storage, data = swh_indexer_storage_with_data
         # craft some consistent mimetypes
         fossology_licenses = data.fossology_licenses
         mimetypes = prepare_mimetypes_from(fossology_licenses)
 
         storage.content_mimetype_add(mimetypes, conflict_update=True)
         # add fossology_licenses to storage
         storage.content_fossology_license_add(fossology_licenses)
 
         # All ids from the db
         content_ids = sorted([c['id'] for c in fossology_licenses])
 
         start = content_ids[0]
         end = content_ids[-1]
 
         # retrieve fossology_licenses
         tool_id = fossology_licenses[0]['indexer_configuration_id']
         actual_result = storage.content_fossology_license_get_range(
             start, end, indexer_configuration_id=tool_id)
 
         actual_ids = actual_result['ids']
         actual_next = actual_result['next']
 
         assert len(fossology_licenses) == len(actual_ids)
         assert actual_next is None
         assert content_ids == actual_ids
 
     def test_generate_content_fossology_license_get_range_no_limit_with_filter(
             self, swh_indexer_storage_with_data):
         """This filters non textual, then returns results within range"""
         storage, data = swh_indexer_storage_with_data
         fossology_licenses = data.fossology_licenses
         mimetypes = data.mimetypes
 
         # craft some consistent mimetypes
         _mimetypes = prepare_mimetypes_from(fossology_licenses)
         # add binary mimetypes which will get filtered out in results
         for m in mimetypes:
             _mimetypes.append({
                 'mimetype': 'binary',
                 **m,
             })
 
         storage.content_mimetype_add(_mimetypes, conflict_update=True)
         # add fossology_licenses to storage
         storage.content_fossology_license_add(fossology_licenses)
 
         # All ids from the db
         content_ids = sorted([c['id'] for c in fossology_licenses])
 
         start = content_ids[0]
         end = content_ids[-1]
 
         # retrieve fossology_licenses
         tool_id = fossology_licenses[0]['indexer_configuration_id']
         actual_result = storage.content_fossology_license_get_range(
             start, end, indexer_configuration_id=tool_id)
 
         actual_ids = actual_result['ids']
         actual_next = actual_result['next']
 
         assert len(fossology_licenses) == len(actual_ids)
         assert actual_next is None
         assert content_ids == actual_ids
 
     def test_generate_fossology_license_get_range_limit(
             self, swh_indexer_storage_with_data):
         """fossology_license_get_range paginates results if limit exceeded"""
         storage, data = swh_indexer_storage_with_data
         fossology_licenses = data.fossology_licenses
 
         # craft some consistent mimetypes
         mimetypes = prepare_mimetypes_from(fossology_licenses)
 
         # add fossology_licenses to storage
         storage.content_mimetype_add(mimetypes, conflict_update=True)
         storage.content_fossology_license_add(fossology_licenses)
 
         # input the list of sha1s we want from storage
         content_ids = sorted([c['id'] for c in fossology_licenses])
         start = content_ids[0]
         end = content_ids[-1]
 
         # retrieve fossology_licenses limited to 3 results
         limited_results = len(fossology_licenses) - 1
         tool_id = fossology_licenses[0]['indexer_configuration_id']
         actual_result = storage.content_fossology_license_get_range(
             start, end,
             indexer_configuration_id=tool_id, limit=limited_results)
 
         actual_ids = actual_result['ids']
         actual_next = actual_result['next']
 
         assert limited_results == len(actual_ids)
         assert actual_next is not None
         assert actual_next == content_ids[-1]
 
         expected_fossology_licenses = content_ids[:-1]
         assert expected_fossology_licenses == actual_ids
 
         # retrieve next part
         actual_results2 = storage.content_fossology_license_get_range(
             start=end, end=end, indexer_configuration_id=tool_id)
         actual_ids2 = actual_results2['ids']
         actual_next2 = actual_results2['next']
 
         assert actual_next2 is None
         expected_fossology_licenses2 = [content_ids[-1]]
         assert expected_fossology_licenses2 == actual_ids2
 
 
 class TestIndexerStorageOriginIntrinsicMetadata:
     def test_origin_intrinsic_metadata_get(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         # given
         tool_id = data.tools['swh-metadata-detector']['id']
 
         metadata = {
             'version': None,
             'name': None,
         }
         metadata_rev = {
             'id': data.revision_id_2,
             'metadata': metadata,
             'mappings': ['mapping1'],
             'indexer_configuration_id': tool_id,
         }
         metadata_origin = {
             'id': data.origin_url_1,
             'metadata': metadata,
             'indexer_configuration_id': tool_id,
             'mappings': ['mapping1'],
             'from_revision': data.revision_id_2,
             }
 
         # when
         storage.revision_intrinsic_metadata_add([metadata_rev])
         storage.origin_intrinsic_metadata_add([metadata_origin])
 
         # then
         actual_metadata = list(storage.origin_intrinsic_metadata_get(
             [data.origin_url_1, 'no://where']))
 
         expected_metadata = [{
             'id': data.origin_url_1,
             'metadata': metadata,
             'tool': data.tools['swh-metadata-detector'],
             'from_revision': data.revision_id_2,
             'mappings': ['mapping1'],
         }]
 
         assert actual_metadata == expected_metadata
 
     def test_origin_intrinsic_metadata_delete(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         # given
         tool_id = data.tools['swh-metadata-detector']['id']
 
         metadata = {
             'version': None,
             'name': None,
         }
         metadata_rev = {
             'id': data.revision_id_2,
             'metadata': metadata,
             'mappings': ['mapping1'],
             'indexer_configuration_id': tool_id,
         }
         metadata_origin = {
             'id': data.origin_url_1,
             'metadata': metadata,
             'indexer_configuration_id': tool_id,
             'mappings': ['mapping1'],
             'from_revision': data.revision_id_2,
             }
         metadata_origin2 = metadata_origin.copy()
         metadata_origin2['id'] = data.origin_url_2
 
         # when
         storage.revision_intrinsic_metadata_add([metadata_rev])
         storage.origin_intrinsic_metadata_add([
             metadata_origin, metadata_origin2])
 
         storage.origin_intrinsic_metadata_delete([
             {
                 'id': data.origin_url_1,
                 'indexer_configuration_id': tool_id
             }
         ])
 
         # then
         actual_metadata = list(storage.origin_intrinsic_metadata_get(
             [data.origin_url_1, data.origin_url_2, 'no://where']))
         for item in actual_metadata:
             item['indexer_configuration_id'] = item.pop('tool')['id']
         assert actual_metadata == [metadata_origin2]
 
     def test_origin_intrinsic_metadata_delete_nonexisting(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         tool_id = data.tools['swh-metadata-detector']['id']
         storage.origin_intrinsic_metadata_delete([
             {
                 'id': data.origin_url_1,
                 'indexer_configuration_id': tool_id
             }
         ])
 
     def test_origin_intrinsic_metadata_add_drop_duplicate(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         # given
         tool_id = data.tools['swh-metadata-detector']['id']
 
         metadata_v1 = {
             'version': None,
             'name': None,
         }
         metadata_rev_v1 = {
             'id': data.revision_id_1,
             'metadata': metadata_v1.copy(),
             'mappings': [],
             'indexer_configuration_id': tool_id,
         }
         metadata_origin_v1 = {
             'id': data.origin_url_1,
             'metadata': metadata_v1.copy(),
             'indexer_configuration_id': tool_id,
             'mappings': [],
             'from_revision': data.revision_id_1,
         }
 
         # given
         storage.revision_intrinsic_metadata_add([metadata_rev_v1])
         storage.origin_intrinsic_metadata_add([metadata_origin_v1])
 
         # when
         actual_metadata = list(storage.origin_intrinsic_metadata_get(
             [data.origin_url_1, 'no://where']))
 
         expected_metadata_v1 = [{
             'id': data.origin_url_1,
             'metadata': metadata_v1,
             'tool': data.tools['swh-metadata-detector'],
             'from_revision': data.revision_id_1,
             'mappings': [],
         }]
 
         assert actual_metadata == expected_metadata_v1
 
         # given
         metadata_v2 = metadata_v1.copy()
         metadata_v2.update({
             'name': 'test_metadata',
             'author': 'MG',
         })
         metadata_rev_v2 = metadata_rev_v1.copy()
         metadata_origin_v2 = metadata_origin_v1.copy()
         metadata_rev_v2['metadata'] = metadata_v2
         metadata_origin_v2['metadata'] = metadata_v2
 
         storage.revision_intrinsic_metadata_add([metadata_rev_v2])
         storage.origin_intrinsic_metadata_add([metadata_origin_v2])
 
         # then
         actual_metadata = list(storage.origin_intrinsic_metadata_get(
             [data.origin_url_1]))
 
         # metadata did not change as the v2 was dropped.
         assert actual_metadata == expected_metadata_v1
 
     def test_origin_intrinsic_metadata_add_update_in_place_duplicate(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         # given
         tool_id = data.tools['swh-metadata-detector']['id']
 
         metadata_v1 = {
             'version': None,
             'name': None,
         }
         metadata_rev_v1 = {
             'id': data.revision_id_2,
             'metadata': metadata_v1,
             'mappings': [],
             'indexer_configuration_id': tool_id,
         }
         metadata_origin_v1 = {
             'id': data.origin_url_1,
             'metadata': metadata_v1.copy(),
             'indexer_configuration_id': tool_id,
             'mappings': [],
             'from_revision': data.revision_id_2,
         }
 
         # given
         storage.revision_intrinsic_metadata_add([metadata_rev_v1])
         storage.origin_intrinsic_metadata_add([metadata_origin_v1])
 
         # when
         actual_metadata = list(storage.origin_intrinsic_metadata_get(
             [data.origin_url_1]))
 
         # then
         expected_metadata_v1 = [{
             'id': data.origin_url_1,
             'metadata': metadata_v1,
             'tool': data.tools['swh-metadata-detector'],
             'from_revision': data.revision_id_2,
             'mappings': [],
         }]
         assert actual_metadata == expected_metadata_v1
 
         # given
         metadata_v2 = metadata_v1.copy()
         metadata_v2.update({
             'name': 'test_update_duplicated_metadata',
             'author': 'MG',
         })
         metadata_rev_v2 = metadata_rev_v1.copy()
         metadata_origin_v2 = metadata_origin_v1.copy()
         metadata_rev_v2['metadata'] = metadata_v2
         metadata_origin_v2 = {
             'id': data.origin_url_1,
             'metadata': metadata_v2.copy(),
             'indexer_configuration_id': tool_id,
             'mappings': ['npm'],
             'from_revision': data.revision_id_1,
         }
 
         storage.revision_intrinsic_metadata_add(
                 [metadata_rev_v2], conflict_update=True)
         storage.origin_intrinsic_metadata_add(
                 [metadata_origin_v2], conflict_update=True)
 
         actual_metadata = list(storage.origin_intrinsic_metadata_get(
             [data.origin_url_1]))
 
         expected_metadata_v2 = [{
             'id': data.origin_url_1,
             'metadata': metadata_v2,
             'tool': data.tools['swh-metadata-detector'],
             'from_revision': data.revision_id_1,
             'mappings': ['npm'],
         }]
 
         # metadata did change as the v2 was used to overwrite v1
         assert actual_metadata == expected_metadata_v2
 
     def test_origin_intrinsic_metadata_add__update_in_place_deadlock(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         # given
         tool_id = data.tools['swh-metadata-detector']['id']
 
         ids = list(range(10))
 
         example_data1 = {
             'metadata': {
                 'version': None,
                 'name': None,
             },
             'mappings': [],
         }
         example_data2 = {
             'metadata': {
                 'version': 'v1.1.1',
                 'name': 'foo',
             },
             'mappings': [],
         }
 
         metadata_rev_v1 = {
             'id': data.revision_id_2,
             'metadata': {
                 'version': None,
                 'name': None,
             },
             'mappings': [],
             'indexer_configuration_id': tool_id,
         }
 
         data_v1 = [
             {
                 'id': 'file:///tmp/origin%d' % id_,
                 'from_revision': data.revision_id_2,
                 **example_data1,
                 'indexer_configuration_id': tool_id,
             }
             for id_ in ids
         ]
         data_v2 = [
             {
                 'id': 'file:///tmp/origin%d' % id_,
                 'from_revision': data.revision_id_2,
                 **example_data2,
                 'indexer_configuration_id': tool_id,
             }
             for id_ in ids
         ]
 
         # Remove one item from each, so that both queries have to succeed for
         # all items to be in the DB.
         data_v2a = data_v2[1:]
         data_v2b = list(reversed(data_v2[0:-1]))
 
         # given
         storage.revision_intrinsic_metadata_add([metadata_rev_v1])
         storage.origin_intrinsic_metadata_add(data_v1)
 
         # when
         origins = ['file:///tmp/origin%d' % i for i in ids]
         actual_data = list(storage.origin_intrinsic_metadata_get(origins))
 
         expected_data_v1 = [
             {
                 'id': 'file:///tmp/origin%d' % id_,
                 'from_revision': data.revision_id_2,
                 **example_data1,
                 'tool': data.tools['swh-metadata-detector'],
             }
             for id_ in ids
         ]
 
         # then
         assert actual_data == expected_data_v1
 
         # given
         def f1():
             storage.origin_intrinsic_metadata_add(
                 data_v2a, conflict_update=True)
 
         def f2():
             storage.origin_intrinsic_metadata_add(
                 data_v2b, conflict_update=True)
 
         t1 = threading.Thread(target=f1)
         t2 = threading.Thread(target=f2)
         t2.start()
         t1.start()
 
         t1.join()
         t2.join()
 
         actual_data = list(storage.origin_intrinsic_metadata_get(origins))
 
         expected_data_v2 = [
             {
                 'id': 'file:///tmp/origin%d' % id_,
                 'from_revision': data.revision_id_2,
                 **example_data2,
                 'tool': data.tools['swh-metadata-detector'],
             }
             for id_ in ids
         ]
 
         assert len(actual_data) == len(expected_data_v2)
         assert sorted(actual_data, key=lambda x: x['id']) == expected_data_v2
 
     def test_origin_intrinsic_metadata_add__duplicate_twice(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         # given
         tool_id = data.tools['swh-metadata-detector']['id']
 
         metadata = {
             'developmentStatus': None,
             'name': None,
         }
         metadata_rev = {
             'id': data.revision_id_2,
             'metadata': metadata,
             'mappings': ['mapping1'],
             'indexer_configuration_id': tool_id,
         }
         metadata_origin = {
             'id': data.origin_url_1,
             'metadata': metadata,
             'indexer_configuration_id': tool_id,
             'mappings': ['mapping1'],
             'from_revision': data.revision_id_2,
             }
 
         # when
         storage.revision_intrinsic_metadata_add([metadata_rev])
 
         with pytest.raises(ValueError):
             storage.origin_intrinsic_metadata_add([
                 metadata_origin, metadata_origin])
 
     def test_origin_intrinsic_metadata_search_fulltext(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         # given
         tool_id = data.tools['swh-metadata-detector']['id']
 
         metadata1 = {
             'author': 'John Doe',
         }
         metadata1_rev = {
             'id': data.revision_id_1,
             'metadata': metadata1,
             'mappings': [],
             'indexer_configuration_id': tool_id,
         }
         metadata1_origin = {
             'id': data.origin_url_1,
             'metadata': metadata1,
             'mappings': [],
             'indexer_configuration_id': tool_id,
             'from_revision': data.revision_id_1,
         }
         metadata2 = {
             'author': 'Jane Doe',
         }
         metadata2_rev = {
             'id': data.revision_id_2,
             'metadata': metadata2,
             'mappings': [],
             'indexer_configuration_id': tool_id,
         }
         metadata2_origin = {
             'id': data.origin_url_2,
             'metadata': metadata2,
             'mappings': [],
             'indexer_configuration_id': tool_id,
             'from_revision': data.revision_id_2,
         }
 
         # when
         storage.revision_intrinsic_metadata_add([metadata1_rev])
         storage.origin_intrinsic_metadata_add([metadata1_origin])
         storage.revision_intrinsic_metadata_add([metadata2_rev])
         storage.origin_intrinsic_metadata_add([metadata2_origin])
 
         # then
         search = storage.origin_intrinsic_metadata_search_fulltext
         assert set([res['id'] for res in search(['Doe'])]) \
             == set([data.origin_url_1, data.origin_url_2])
         assert [res['id'] for res in search(['John', 'Doe'])] \
             == [data.origin_url_1]
         assert [res['id'] for res in search(['John'])] \
             == [data.origin_url_1]
         assert not list(search(['John', 'Jane']))
 
     def test_origin_intrinsic_metadata_search_fulltext_rank(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         # given
         tool_id = data.tools['swh-metadata-detector']['id']
 
         # The following authors have "Random Person" to add some more content
         # to the JSON data, to work around normalization quirks when there
         # are few words (rank/(1+ln(nb_words)) is very sensitive to nb_words
         # for small values of nb_words).
         metadata1 = {
             'author': [
                 'Random Person',
                 'John Doe',
                 'Jane Doe',
             ]
         }
         metadata1_rev = {
             'id': data.revision_id_1,
             'metadata': metadata1,
             'mappings': [],
             'indexer_configuration_id': tool_id,
         }
         metadata1_origin = {
             'id': data.origin_url_1,
             'metadata': metadata1,
             'mappings': [],
             'indexer_configuration_id': tool_id,
             'from_revision': data.revision_id_1,
         }
         metadata2 = {
             'author': [
                 'Random Person',
                 'Jane Doe',
             ]
         }
         metadata2_rev = {
             'id': data.revision_id_2,
             'metadata': metadata2,
             'mappings': [],
             'indexer_configuration_id': tool_id,
         }
         metadata2_origin = {
             'id': data.origin_url_2,
             'metadata': metadata2,
             'mappings': [],
             'indexer_configuration_id': tool_id,
             'from_revision': data.revision_id_2,
         }
 
         # when
         storage.revision_intrinsic_metadata_add([metadata1_rev])
         storage.origin_intrinsic_metadata_add([metadata1_origin])
         storage.revision_intrinsic_metadata_add([metadata2_rev])
         storage.origin_intrinsic_metadata_add([metadata2_origin])
 
         # then
         search = storage.origin_intrinsic_metadata_search_fulltext
         assert [res['id'] for res in search(['Doe'])] \
             == [data.origin_url_1, data.origin_url_2]
         assert [res['id'] for res in search(['Doe'], limit=1)] \
             == [data.origin_url_1]
         assert [res['id'] for res in search(['John'])] \
             == [data.origin_url_1]
         assert [res['id'] for res in search(['Jane'])] \
             == [data.origin_url_2, data.origin_url_1]
         assert [res['id'] for res in search(['John', 'Jane'])] \
             == [data.origin_url_1]
 
     def _fill_origin_intrinsic_metadata(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         tool1_id = data.tools['swh-metadata-detector']['id']
         tool2_id = data.tools['swh-metadata-detector2']['id']
 
         metadata1 = {
             '@context': 'foo',
             'author': 'John Doe',
         }
         metadata1_rev = {
             'id': data.revision_id_1,
             'metadata': metadata1,
             'mappings': ['npm'],
             'indexer_configuration_id': tool1_id,
         }
         metadata1_origin = {
             'id': data.origin_url_1,
             'metadata': metadata1,
             'mappings': ['npm'],
             'indexer_configuration_id': tool1_id,
             'from_revision': data.revision_id_1,
         }
         metadata2 = {
             '@context': 'foo',
             'author': 'Jane Doe',
         }
         metadata2_rev = {
             'id': data.revision_id_2,
             'metadata': metadata2,
             'mappings': ['npm', 'gemspec'],
             'indexer_configuration_id': tool2_id,
         }
         metadata2_origin = {
             'id': data.origin_url_2,
             'metadata': metadata2,
             'mappings': ['npm', 'gemspec'],
             'indexer_configuration_id': tool2_id,
             'from_revision': data.revision_id_2,
         }
         metadata3 = {
             '@context': 'foo',
         }
         metadata3_rev = {
             'id': data.revision_id_3,
             'metadata': metadata3,
             'mappings': ['npm', 'gemspec'],
             'indexer_configuration_id': tool2_id,
         }
         metadata3_origin = {
             'id': data.origin_url_3,
             'metadata': metadata3,
             'mappings': ['pkg-info'],
             'indexer_configuration_id': tool2_id,
             'from_revision': data.revision_id_3,
         }
 
         storage.revision_intrinsic_metadata_add([metadata1_rev])
         storage.origin_intrinsic_metadata_add([metadata1_origin])
         storage.revision_intrinsic_metadata_add([metadata2_rev])
         storage.origin_intrinsic_metadata_add([metadata2_origin])
         storage.revision_intrinsic_metadata_add([metadata3_rev])
         storage.origin_intrinsic_metadata_add([metadata3_origin])
 
     def test_origin_intrinsic_metadata_search_by_producer(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         self._fill_origin_intrinsic_metadata(
             swh_indexer_storage_with_data)
         tool1 = data.tools['swh-metadata-detector']
         tool2 = data.tools['swh-metadata-detector2']
         endpoint = storage.origin_intrinsic_metadata_search_by_producer
 
         # test pagination
         # no 'page_token' param, return all origins
         result = endpoint(ids_only=True)
         assert result['origins'] \
             == [data.origin_url_1, data.origin_url_2, data.origin_url_3]
         assert 'next_page_token' not in result
 
         # 'page_token' is < than origin_1, return everything
         result = endpoint(page_token=data.origin_url_1[:-1], ids_only=True)
         assert result['origins'] \
             == [data.origin_url_1, data.origin_url_2, data.origin_url_3]
         assert 'next_page_token' not in result
 
         # 'page_token' is origin_3, return nothing
         result = endpoint(page_token=data.origin_url_3, ids_only=True)
         assert not result['origins']
         assert 'next_page_token' not in result
 
         # test limit argument
         result = endpoint(page_token=data.origin_url_1[:-1],
                           limit=2, ids_only=True)
         assert result['origins'] == [data.origin_url_1, data.origin_url_2]
         assert result['next_page_token'] == result['origins'][-1]
 
         result = endpoint(page_token=data.origin_url_1, limit=2, ids_only=True)
         assert result['origins'] == [data.origin_url_2, data.origin_url_3]
         assert 'next_page_token' not in result
 
         result = endpoint(page_token=data.origin_url_2, limit=2, ids_only=True)
         assert result['origins'] == [data.origin_url_3]
         assert 'next_page_token' not in result
 
         # test mappings filtering
         result = endpoint(mappings=['npm'], ids_only=True)
         assert result['origins'] == [data.origin_url_1, data.origin_url_2]
         assert 'next_page_token' not in result
 
         result = endpoint(mappings=['npm', 'gemspec'], ids_only=True)
         assert result['origins'] == [data.origin_url_1, data.origin_url_2]
         assert 'next_page_token' not in result
 
         result = endpoint(mappings=['gemspec'], ids_only=True)
         assert result['origins'] == [data.origin_url_2]
         assert 'next_page_token' not in result
 
         result = endpoint(mappings=['pkg-info'], ids_only=True)
         assert result['origins'] == [data.origin_url_3]
         assert 'next_page_token' not in result
 
         result = endpoint(mappings=['foobar'], ids_only=True)
         assert not result['origins']
         assert 'next_page_token' not in result
 
         # test pagination + mappings
         result = endpoint(mappings=['npm'], limit=1, ids_only=True)
         assert result['origins'] == [data.origin_url_1]
         assert result['next_page_token'] == result['origins'][-1]
 
         # test tool filtering
         result = endpoint(tool_ids=[tool1['id']], ids_only=True)
         assert result['origins'] == [data.origin_url_1]
         assert 'next_page_token' not in result
 
         result = endpoint(tool_ids=[tool2['id']], ids_only=True)
         assert sorted(result['origins']) \
             == [data.origin_url_2, data.origin_url_3]
         assert 'next_page_token' not in result
 
         result = endpoint(tool_ids=[tool1['id'], tool2['id']], ids_only=True)
         assert sorted(result['origins']) \
             == [data.origin_url_1, data.origin_url_2, data.origin_url_3]
         assert 'next_page_token' not in result
 
         # test ids_only=False
         assert endpoint(mappings=['gemspec'])['origins'] \
             == [{
                 'id': data.origin_url_2,
                 'metadata': {
                     '@context': 'foo',
                     'author': 'Jane Doe',
                 },
                 'mappings': ['npm', 'gemspec'],
                 'tool': tool2,
                 'from_revision': data.revision_id_2,
             }]
 
     def test_origin_intrinsic_metadata_stats(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         self._fill_origin_intrinsic_metadata(
             swh_indexer_storage_with_data)
 
         result = storage.origin_intrinsic_metadata_stats()
         assert result == {
             'per_mapping': {
                 'gemspec': 1,
                 'npm': 2,
                 'pkg-info': 1,
                 'codemeta': 0,
                 'maven': 0,
             },
             'total': 3,
             'non_empty': 2,
         }
 
 
 class TestIndexerStorageIndexerCondifuration:
     def test_indexer_configuration_add(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         tool = {
             'tool_name': 'some-unknown-tool',
             'tool_version': 'some-version',
             'tool_configuration': {"debian-package": "some-package"},
         }
 
         actual_tool = storage.indexer_configuration_get(tool)
         assert actual_tool is None  # does not exist
 
         # add it
         actual_tools = list(storage.indexer_configuration_add([tool]))
 
         assert len(actual_tools) == 1
         actual_tool = actual_tools[0]
         assert actual_tool is not None  # now it exists
         new_id = actual_tool.pop('id')
         assert actual_tool == tool
 
         actual_tools2 = list(storage.indexer_configuration_add([tool]))
         actual_tool2 = actual_tools2[0]
         assert actual_tool2 is not None  # now it exists
         new_id2 = actual_tool2.pop('id')
 
         assert new_id == new_id2
         assert actual_tool == actual_tool2
 
     def test_indexer_configuration_add_multiple(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         tool = {
             'tool_name': 'some-unknown-tool',
             'tool_version': 'some-version',
             'tool_configuration': {"debian-package": "some-package"},
         }
 
         actual_tools = list(storage.indexer_configuration_add([tool]))
         assert len(actual_tools) == 1
 
         new_tools = [tool, {
             'tool_name': 'yet-another-tool',
             'tool_version': 'version',
             'tool_configuration': {},
         }]
 
         actual_tools = list(storage.indexer_configuration_add(new_tools))
         assert len(actual_tools) == 2
 
         # order not guaranteed, so we iterate over results to check
         for tool in actual_tools:
             _id = tool.pop('id')
             assert _id is not None
             assert tool in new_tools
 
     def test_indexer_configuration_get_missing(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         tool = {
             'tool_name': 'unknown-tool',
             'tool_version': '3.1.0rc2-31-ga2cbb8c',
             'tool_configuration': {"command_line": "nomossa <filepath>"},
         }
 
         actual_tool = storage.indexer_configuration_get(tool)
 
         assert actual_tool is None
 
     def test_indexer_configuration_get(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         tool = {
             'tool_name': 'nomos',
             'tool_version': '3.1.0rc2-31-ga2cbb8c',
             'tool_configuration': {"command_line": "nomossa <filepath>"},
         }
 
         actual_tool = storage.indexer_configuration_get(tool)
         assert actual_tool
 
         expected_tool = tool.copy()
         del actual_tool['id']
 
         assert expected_tool == actual_tool
 
     def test_indexer_configuration_metadata_get_missing_context(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         tool = {
             'tool_name': 'swh-metadata-translator',
             'tool_version': '0.0.1',
             'tool_configuration': {"context": "unknown-context"},
         }
 
         actual_tool = storage.indexer_configuration_get(tool)
 
         assert actual_tool is None
 
     def test_indexer_configuration_metadata_get(
             self, swh_indexer_storage_with_data):
         storage, data = swh_indexer_storage_with_data
         tool = {
             'tool_name': 'swh-metadata-translator',
             'tool_version': '0.0.1',
             'tool_configuration': {"type": "local", "context": "NpmMapping"},
         }
 
         storage.indexer_configuration_add([tool])
         actual_tool = storage.indexer_configuration_get(tool)
         assert actual_tool
 
         expected_tool = tool.copy()
         expected_tool['id'] = actual_tool['id']
 
         assert expected_tool == actual_tool
 
 
 class TestIndexerStorageMisc:
     """Misc endpoints tests for the IndexerStorage.
     """
 
     def test_check_config(self, swh_indexer_storage):
         storage = swh_indexer_storage
         assert storage.check_config(check_write=True)
         assert storage.check_config(check_write=False)
+
+    def test_types(self, swh_indexer_storage):
+        """Checks all methods of StorageInterface are implemented by this
+        backend, and that they have the same signature."""
+        # Create an instance of the protocol (which cannot be instantiated
+        # directly, so this creates a subclass, then instantiates it)
+        interface = type('_', (IndexerStorageInterface,), {})()
+
+        assert 'content_mimetype_add' in dir(interface)
+
+        missing_methods = []
+
+        for meth_name in dir(interface):
+            if meth_name.startswith('_'):
+                continue
+            interface_meth = getattr(interface, meth_name)
+            try:
+                concrete_meth = getattr(swh_indexer_storage, meth_name)
+            except AttributeError:
+                missing_methods.append(meth_name)
+                continue
+
+            expected_signature = inspect.signature(interface_meth)
+            actual_signature = inspect.signature(concrete_meth)
+
+            assert expected_signature == actual_signature, meth_name
+
+        assert missing_methods == []