diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -346,7 +346,7 @@
                 - **id** (bytes): content's identifier
                 - **name** (str): symbol's name
                 - **kind** (str): symbol's kind
-                - **language** (str): language for that content
+                - **lang** (str): language for that content
                 - **tool** (dict): tool used to compute the ctags' info
 
 
@@ -365,7 +365,7 @@
 
                 - **id** (bytes): sha1
                 - **ctags** ([list): List of dictionary with keys: name, kind,
-                  line, language
+                  line, lang
 
         """
         def _convert_ctags(__ctags):
@@ -412,9 +412,8 @@
             ids (iterable): sha1 checksums
 
         Yields:
-            list: dictionaries with the following keys:
+            `{id: facts}` where `facts` is a dict with the following keys:
 
-                - **id** (bytes)
                 - **licenses** ([str]): associated licenses for that content
                 - **tool** (dict): Tool used to compute the license
 
@@ -439,7 +438,7 @@
             licenses (iterable): dictionaries with keys:
 
                 - **id**: sha1
-                - **license** ([bytes]): List of licenses associated to sha1
+                - **licenses** ([bytes]): List of licenses associated to sha1
                 - **tool** (str): nomossa
 
             conflict_update: Flag to determine if we want to overwrite (true)
diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py
--- a/swh/indexer/storage/in_memory.py
+++ b/swh/indexer/storage/in_memory.py
@@ -3,31 +3,35 @@
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import bisect
 from collections import defaultdict
 import json
 
+SHA1_DIGEST_SIZE = 160
 
-class MetadataStorage:
-    """Implements missing/get/add logic for both content_metadata and
-    revision_metadata."""
+
+def _transform_tool(tool):
+    return {
+        'id': tool['id'],
+        'name': tool['tool_name'],
+        'version': tool['tool_version'],
+        'configuration': tool['tool_configuration'],
+    }
+
+
+class SubStorage:
+    """Implements common missing/get/add logic for each indexer type."""
     def __init__(self, tools):
         self._tools = tools
-        self._metadata = {}  # map (id_, tool_id) -> metadata_dict
+        self._sorted_ids = []
+        self._data = {}  # map (id_, tool_id) -> metadata_dict
         self._tools_per_id = defaultdict(set)  # map id_ -> Set[tool_id]
 
-    def _transform_tool(self, tool):
-        return {
-            'id': tool['id'],
-            'name': tool['tool_name'],
-            'version': tool['tool_version'],
-            'configuration': tool['tool_configuration'],
-        }
-
     def missing(self, ids):
-        """List metadata missing from storage.
+        """List data missing from storage.
 
         Args:
-            metadata (iterable): dictionaries with keys:
+            data (iterable): dictionaries with keys:
 
                 - **id** (bytes): sha1 identifier
                 - **indexer_configuration_id** (int): tool used to compute
@@ -44,7 +48,7 @@
                 yield id_
 
     def get(self, ids):
-        """Retrieve metadata per id.
+        """Retrieve data per id.
 
         Args:
             ids (iterable): sha1 checksums
@@ -53,8 +57,8 @@
             dict: dictionaries with the following keys:
 
               - **id** (bytes)
-              - **translated_metadata** (str): associated metadata
               - **tool** (dict): tool used to compute metadata
+              - arbitrary data (as provided to `add`)
 
         """
         for id_ in ids:
@@ -62,36 +66,102 @@
                 key = (id_, tool_id)
                 yield {
                     'id': id_,
-                    'tool': self._transform_tool(self._tools[tool_id]),
-                    'translated_metadata': self._metadata[key],
+                    'tool': _transform_tool(self._tools[tool_id]),
+                    **self._data[key],
                 }
 
-    def add(self, metadata, conflict_update):
-        """Add metadata not present in storage.
+    def get_range(self, start, end, indexer_configuration_id, limit):
+        """Retrieve data within range [start, end] bound by limit.
 
         Args:
-            metadata (iterable): dictionaries with keys:
+            **start** (bytes): Starting identifier range (expected smaller
+                           than end)
+            **end** (bytes): Ending identifier range (expected larger
+                             than start)
+            **indexer_configuration_id** (int): The tool used to index data
+            **limit** (int): Limit result
+
+        Raises:
+            ValueError for limit to None
+
+        Returns:
+            a dict with keys:
+            - **ids** [bytes]: iterable of content ids within the range.
+            - **next** (Optional[bytes]): The next range of sha1 starts at
+                                          this sha1 if any
+
+        """
+        if limit is None:
+            raise ValueError('Development error: limit should not be None')
+        from_index = bisect.bisect_left(self._sorted_ids, start)
+        to_index = bisect.bisect_right(self._sorted_ids, end, lo=from_index)
+        if to_index - from_index >= limit:
+            return {
+                'ids': self._sorted_ids[from_index:from_index+limit],
+                'next': self._sorted_ids[from_index+limit],
+            }
+        else:
+            return {
+                'ids': self._sorted_ids[from_index:to_index],
+                'next': None,
+                }
+
+    def add(self, data, conflict_update):
+        """Add data not present in storage.
+
+        Args:
+            data (iterable): dictionaries with keys:
 
               - **id**: sha1
-              - **translated_metadata**: arbitrary dict
               - **indexer_configuration_id**: tool used to compute the
                 results
+              - arbitrary data
 
             conflict_update (bool): Flag to determine if we want to overwrite
               (true) or skip duplicates (false)
 
         """
-        for item in metadata:
-            tool_id = item['indexer_configuration_id']
-            data = item['translated_metadata']
-            id_ = item['id']
+        for item in data:
+            item = item.copy()
+            tool_id = item.pop('indexer_configuration_id')
+            id_ = item.pop('id')
+            data = item
             if not conflict_update and \
                     tool_id in self._tools_per_id.get(id_, set()):
                 # Duplicate, should not be updated
                 continue
             key = (id_, tool_id)
-            self._metadata[key] = data
+            self._data[key] = data
             self._tools_per_id[id_].add(tool_id)
+            if id_ not in self._sorted_ids:
+                bisect.insort(self._sorted_ids, id_)
+
+    def add_merge(self, new_data, conflict_update, merged_key):
+        for new_item in new_data:
+            id_ = new_item['id']
+            tool_id = new_item['indexer_configuration_id']
+            if conflict_update:
+                all_subitems = []
+            else:
+                existing = list(self.get([id_]))
+                all_subitems = [
+                    old_subitem
+                    for existing_item in existing
+                    if existing_item['tool']['id'] == tool_id
+                    for old_subitem in existing_item[merged_key]
+                ]
+            for new_subitem in new_item[merged_key]:
+                if new_subitem not in all_subitems:
+                    all_subitems.append(new_subitem)
+            self.add([
+                {
+                    'id': id_,
+                    'indexer_configuration_id': tool_id,
+                    merged_key: all_subitems,
+                }
+            ], conflict_update=True)
+            if id_ not in self._sorted_ids:
+                bisect.insort(self._sorted_ids, id_)
 
 
 class IndexerStorage:
@@ -99,8 +169,214 @@
 
     def __init__(self):
         self._tools = {}
-        self._content_metadata = MetadataStorage(self._tools)
-        self._revision_metadata = MetadataStorage(self._tools)
+        self._mimetypes = SubStorage(self._tools)
+        self._content_ctags = SubStorage(self._tools)
+        self._licenses = SubStorage(self._tools)
+        self._content_metadata = SubStorage(self._tools)
+        self._revision_metadata = SubStorage(self._tools)
+
+    def content_mimetype_missing(self, mimetypes):
+        """Generate mimetypes missing from storage.
+
+        Args:
+            mimetypes (iterable): iterable of dict with keys:
+
+              - **id** (bytes): sha1 identifier
+              - **indexer_configuration_id** (int): tool used to compute the
+                results
+
+        Yields:
+            tuple (id, indexer_configuration_id): missing id
+
+        """
+        yield from self._mimetypes.missing(mimetypes)
+
+    def content_mimetype_add(self, mimetypes, conflict_update=False):
+        """Add mimetypes not present in storage.
+
+        Args:
+            mimetypes (iterable): dictionaries with keys:
+
+              - **id** (bytes): sha1 identifier
+              - **mimetype** (bytes): raw content's mimetype
+              - **encoding** (bytes): raw content's encoding
+              - **indexer_configuration_id** (int): tool's id used to
+                compute the results
+              - **conflict_update** (bool): Flag to determine if we want to
+                overwrite (``True``) or skip duplicates (``False``, the
+                default)
+
+        """
+        self._mimetypes.add(mimetypes, conflict_update)
+
+    def content_mimetype_get(self, ids, db=None, cur=None):
+        """Retrieve full content mimetype per ids.
+
+        Args:
+            ids (iterable): sha1 identifier
+
+        Yields:
+            mimetypes (iterable): dictionaries with keys:
+
+                - **id** (bytes): sha1 identifier
+                - **mimetype** (bytes): raw content's mimetype
+                - **encoding** (bytes): raw content's encoding
+                - **tool** (dict): Tool used to compute the language
+
+        """
+        yield from self._mimetypes.get(ids)
+
+    def content_ctags_missing(self, ctags):
+        """List ctags missing from storage.
+
+        Args:
+            ctags (iterable): dicts with keys:
+
+                - **id** (bytes): sha1 identifier
+                - **indexer_configuration_id** (int): tool used to compute
+                  the results
+
+        Yields:
+            an iterable of missing id for the tuple (id,
+            indexer_configuration_id)
+
+        """
+        yield from self._content_ctags.missing(ctags)
+
+    def content_ctags_get(self, ids):
+        """Retrieve ctags per id.
+
+        Args:
+            ids (iterable): sha1 checksums
+
+        Yields:
+            Dictionaries with keys:
+
+                - **id** (bytes): content's identifier
+                - **name** (str): symbol's name
+                - **kind** (str): symbol's kind
+                - **lang** (str): language for that content
+                - **tool** (dict): tool used to compute the ctags' info
+
+
+        """
+        for item in self._content_ctags.get(ids):
+            for item_ctags_item in item['ctags']:
+                yield {
+                    'id': item['id'],
+                    'tool': item['tool'],
+                    **item_ctags_item
+                }
+
+    def content_ctags_add(self, ctags, conflict_update=False):
+        """Add ctags not present in storage
+
+        Args:
+            ctags (iterable): dictionaries with keys:
+
+              - **id** (bytes): sha1
+              - **ctags** ([list): List of dictionary with keys: name, kind,
+                  line, lang
+              - **indexer_configuration_id**: tool used to compute the
+                results
+
+        """
+        self._content_ctags.add_merge(ctags, conflict_update, 'ctags')
+
+    def content_ctags_search(self, expression,
+                             limit=10, last_sha1=None, db=None, cur=None):
+        """Search through content's raw ctags symbols.
+
+        Args:
+            expression (str): Expression to search for
+            limit (int): Number of rows to return (default to 10).
+            last_sha1 (str): Offset from which retrieving data (default to '').
+
+        Yields:
+            rows of ctags including id, name, lang, kind, line, etc...
+
+        """
+        nb_matches = 0
+        for ((id_, tool_id), item) in \
+                sorted(self._content_ctags._data.items()):
+            if id_ <= (last_sha1 or bytes(0 for _ in range(SHA1_DIGEST_SIZE))):
+                continue
+            nb_matches += 1
+            for ctags_item in item['ctags']:
+                if ctags_item['name'] != expression:
+                    continue
+                yield {
+                    'id': id_,
+                    'tool': _transform_tool(self._tools[tool_id]),
+                    **ctags_item
+                }
+            if nb_matches >= limit:
+                return
+
+    def content_fossology_license_get(self, ids):
+        """Retrieve licenses per id.
+
+        Args:
+            ids (iterable): sha1 checksums
+
+        Yields:
+            `{id: facts}` where `facts` is a dict with the following keys:
+
+                - **licenses** ([str]): associated licenses for that content
+                - **tool** (dict): Tool used to compute the license
+
+        """
+        # TODO: remove this reformatting in order to yield items with the
+        # same format as other _get methods.
+        res = {}
+        for d in self._licenses.get(ids):
+            res.setdefault(d.pop('id'), []).append(d)
+        for (id_, facts) in res.items():
+            yield {id_: facts}
+
+    def content_fossology_license_add(self, licenses, conflict_update=False):
+        """Add licenses not present in storage.
+
+        Args:
+            licenses (iterable): dictionaries with keys:
+
+                - **id**: sha1
+                - **licenses** ([bytes]): List of licenses associated to sha1
+                - **tool** (str): nomossa
+
+            conflict_update: Flag to determine if we want to overwrite (true)
+                or skip duplicates (false, the default)
+
+        Returns:
+            list: content_license entries which failed due to unknown licenses
+
+        """
+        self._licenses.add_merge(licenses, conflict_update, 'licenses')
+
+    def content_fossology_license_get_range(
+            self, start, end, indexer_configuration_id, limit=1000):
+        """Retrieve licenses within range [start, end] bound by limit.
+
+        Args:
+            **start** (bytes): Starting identifier range (expected smaller
+                           than end)
+            **end** (bytes): Ending identifier range (expected larger
+                             than start)
+            **indexer_configuration_id** (int): The tool used to index data
+            **limit** (int): Limit result (default to 1000)
+
+        Raises:
+            ValueError for limit to None
+
+        Returns:
+            a dict with keys:
+            - **ids** [bytes]: iterable of content ids within the range.
+            - **next** (Optional[bytes]): The next range of sha1 starts at
+                                          this sha1 if any
+
+        """
+        return self._licenses.get_range(
+            start, end, indexer_configuration_id, limit)
 
     def content_metadata_missing(self, metadata):
         """List metadata missing from storage.
diff --git a/swh/indexer/tests/storage/test_in_memory.py b/swh/indexer/tests/storage/test_in_memory.py
--- a/swh/indexer/tests/storage/test_in_memory.py
+++ b/swh/indexer/tests/storage/test_in_memory.py
@@ -13,24 +13,11 @@
         }
         super().setUp()
 
-    @pytest.mark.xfail
-    def test_check_config(self):
-        pass
+    def reset_storage_tables(self):
+        self.storage = self.storage.__class__()
 
     @pytest.mark.xfail
-    def test_content_mimetype_missing(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_mimetype_add__drop_duplicate(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_mimetype_add__update_in_place_duplicate(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_mimetype_get(self):
+    def test_check_config(self):
         pass
 
     @pytest.mark.xfail
@@ -50,42 +37,6 @@
         pass
 
     @pytest.mark.xfail
-    def test_content_ctags_missing(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_ctags_get(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_ctags_search(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_ctags_search_no_result(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_ctags_add__add_new_ctags_added(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_ctags_add__update_in_place(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_fossology_license_get(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_fossology_license_add__new_license_added(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_fossology_license_add__update_in_place_duplicate(self):
-        pass
-
-    @pytest.mark.xfail
     def test_origin_intrinsic_metadata_get(self):
         pass
 
@@ -124,20 +75,3 @@
     @pytest.mark.xfail
     def test_generate_content_mimetype_get_range_limit(self, mimetypes):
         pass
-
-    @pytest.mark.xfail
-    def test_generate_content_fossology_license_get_range_limit_none(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_generate_content_fossology_license_get_range_no_limit(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_generate_content_fossology_license_get_range_no_limit_with_filter(
-            self):
-        pass
-
-    @pytest.mark.xfail
-    def test_generate_fossology_license_get_range_limit(self):
-        pass
diff --git a/swh/indexer/tests/test_ctags.py b/swh/indexer/tests/test_ctags.py
--- a/swh/indexer/tests/test_ctags.py
+++ b/swh/indexer/tests/test_ctags.py
@@ -11,7 +11,7 @@
 )
 
 from swh.indexer.tests.test_utils import (
-    BasicMockIndexerStorage, MockObjStorage, CommonContentIndexerTest,
+    CommonContentIndexerTest,
     CommonIndexerWithErrorsTest, CommonIndexerNoTool,
     SHA1_TO_CTAGS, NoDiskIndexer, BASE_TEST_CONFIG
 )
@@ -99,12 +99,6 @@
             'workdir': '/nowhere',
         }
 
-    def prepare(self):
-        super().prepare()
-        self.idx_storage = BasicMockIndexerStorage()
-        self.objstorage = MockObjStorage()
-        self.tool_config = self.config['tools']['configuration']
-
 
 class TestCtagsIndexer(CommonContentIndexerTest, unittest.TestCase):
     """Ctags indexer test scenarios:
@@ -113,8 +107,13 @@
     - Unknown sha1 in the input list are not indexed
 
     """
+
+    def get_indexer_results(self, ids):
+        yield from self.idx_storage.content_ctags_get(ids)
+
     def setUp(self):
         self.indexer = CtagsIndexerTest()
+        self.idx_storage = self.indexer.idx_storage
 
         # Prepare test input
         self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5'
diff --git a/swh/indexer/tests/test_fossology_license.py b/swh/indexer/tests/test_fossology_license.py
--- a/swh/indexer/tests/test_fossology_license.py
+++ b/swh/indexer/tests/test_fossology_license.py
@@ -78,12 +78,6 @@
             },
         }
 
-    def prepare(self):
-        super().prepare()
-        self.idx_storage = BasicMockIndexerStorage()
-        self.log = logging.getLogger('swh.indexer')
-        self.objstorage = MockObjStorage()
-
 
 class TestFossologyLicenseIndexer(CommonContentIndexerTest, unittest.TestCase):
     """Language indexer test scenarios:
@@ -92,8 +86,13 @@
     - Unknown sha1 in the input list are not indexed
 
     """
+
+    def get_indexer_results(self, ids):
+        yield from self.idx_storage.content_ctags_get(ids)
+
     def setUp(self):
         self.indexer = FossologyLicenseTestIndexer()
+        self.idx_storage = self.indexer.idx_storage
 
         self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5'
         self.id1 = '688a5ef812c53907562fe379d4b3851e69c7cb15'
diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py
--- a/swh/indexer/tests/test_mimetype.py
+++ b/swh/indexer/tests/test_mimetype.py
@@ -4,7 +4,6 @@
 # See top-level LICENSE file for more information
 
 import unittest
-import logging
 
 from unittest.mock import patch
 
@@ -61,12 +60,6 @@
             },
         }
 
-    def prepare(self):
-        super().prepare()
-        self.idx_storage = BasicMockIndexerStorage()
-        self.log = logging.getLogger('swh.indexer')
-        self.objstorage = MockObjStorage()
-
 
 class TestMimetypeIndexer(CommonContentIndexerTest, unittest.TestCase):
     """Mimetype indexer test scenarios:
@@ -75,8 +68,13 @@
     - Unknown sha1 in the input list are not indexed
 
     """
+
+    def get_indexer_results(self, ids):
+        yield from self.idx_storage.content_mimetype_get(ids)
+
     def setUp(self):
         self.indexer = MimetypeTestIndexer()
+        self.idx_storage = self.indexer.idx_storage
 
         self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5'
         self.id1 = '688a5ef812c53907562fe379d4b3851e69c7cb15'
diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py
--- a/swh/indexer/tests/test_utils.py
+++ b/swh/indexer/tests/test_utils.py
@@ -659,7 +659,13 @@
 
 
 class CommonContentIndexerTest:
-    def assert_results_ok(self, actual_results, expected_results=None):
+    def get_indexer_results(self, ids):
+        """Override this for indexers that don't have a mock storage."""
+        return self.indexer.idx_storage.state
+
+    def assert_results_ok(self, sha1s, expected_results=None):
+        actual_results = self.get_indexer_results(sha1s)
+
         if expected_results is None:
             expected_results = self.expected_results
 
@@ -678,15 +684,12 @@
         # when
         self.indexer.run(sha1s, policy_update='update-dups')
 
-        actual_results = self.indexer.idx_storage.state
-        self.assertTrue(self.indexer.idx_storage.conflict_update)
-        self.assert_results_ok(actual_results)
+        self.assert_results_ok(sha1s)
 
         # 2nd pass
         self.indexer.run(sha1s, policy_update='ignore-dups')
 
-        self.assertFalse(self.indexer.idx_storage.conflict_update)
-        self.assert_results_ok(actual_results)
+        self.assert_results_ok(sha1s)
 
     def test_index_one_unknown_sha1(self):
         """Unknown sha1 are not indexed"""
@@ -696,14 +699,13 @@
 
         # when
         self.indexer.run(sha1s, policy_update='update-dups')
-        actual_results = self.indexer.idx_storage.state
 
         # then
         expected_results = {
             k: v for k, v in self.expected_results.items() if k in sha1s
         }
 
-        self.assert_results_ok(actual_results, expected_results)
+        self.assert_results_ok(sha1s, expected_results)
 
 
 class CommonContentIndexerRangeTest: