diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py
index c166dfd..e2f52ea 100644
--- a/swh/indexer/storage/in_memory.py
+++ b/swh/indexer/storage/in_memory.py
@@ -1,365 +1,417 @@
 # Copyright (C) 2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from collections import defaultdict
 import json
 
 SHA1_DIGEST_SIZE = 160
 
 
 def _transform_tool(tool):
     return {
         'id': tool['id'],
         'name': tool['tool_name'],
         'version': tool['tool_version'],
         'configuration': tool['tool_configuration'],
     }
 
 
 class SubStorage:
     """Implements common missing/get/add logic for each indexer type."""
     def __init__(self, tools):
         self._tools = tools
         self._data = {}  # map (id_, tool_id) -> metadata_dict
         self._tools_per_id = defaultdict(set)  # map id_ -> Set[tool_id]
 
     def missing(self, ids):
         """List data missing from storage.
 
         Args:
             data (iterable): dictionaries with keys:
 
                 - **id** (bytes): sha1 identifier
                 - **indexer_configuration_id** (int): tool used to compute
                   the results
 
         Yields:
             missing sha1s
 
         """
         for id_ in ids:
             tool_id = id_['indexer_configuration_id']
             id_ = id_['id']
             if tool_id not in self._tools_per_id.get(id_, set()):
                 yield id_
 
     def get(self, ids):
         """Retrieve data per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             dict: dictionaries with the following keys:
 
               - **id** (bytes)
               - **tool** (dict): tool used to compute metadata
               - arbitrary data (as provided to `add`)
 
         """
         for id_ in ids:
             for tool_id in self._tools_per_id.get(id_, set()):
                 key = (id_, tool_id)
                 yield {
                     'id': id_,
                     'tool': _transform_tool(self._tools[tool_id]),
                     **self._data[key],
                 }
 
     def add(self, data, conflict_update):
         """Add data not present in storage.
 
         Args:
             data (iterable): dictionaries with keys:
 
               - **id**: sha1
               - **indexer_configuration_id**: tool used to compute the
                 results
               - arbitrary data
 
             conflict_update (bool): Flag to determine if we want to overwrite
               (true) or skip duplicates (false)
 
         """
         for item in data:
             item = item.copy()
             tool_id = item.pop('indexer_configuration_id')
             id_ = item.pop('id')
             data = item
             if not conflict_update and \
                     tool_id in self._tools_per_id.get(id_, set()):
                 # Duplicate, should not be updated
                 continue
             key = (id_, tool_id)
             self._data[key] = data
             self._tools_per_id[id_].add(tool_id)
 
 
 class IndexerStorage:
     """In-memory SWH indexer storage."""
 
     def __init__(self):
         self._tools = {}
+        self._mimetypes = SubStorage(self._tools)
         self._content_ctags = SubStorage(self._tools)
         self._content_metadata = SubStorage(self._tools)
         self._revision_metadata = SubStorage(self._tools)
 
+    def content_mimetype_missing(self, mimetypes):
+        """Generate mimetypes missing from storage.
+
+        Args:
+            mimetypes (iterable): iterable of dict with keys:
+
+              - **id** (bytes): sha1 identifier
+              - **indexer_configuration_id** (int): tool used to compute the
+                results
+
+        Yields:
+            tuple (id, indexer_configuration_id): missing id
+
+        """
+        yield from self._mimetypes.missing(mimetypes)
+
+    def content_mimetype_add(self, mimetypes, conflict_update=False):
+        """Add mimetypes not present in storage.
+
+        Args:
+            mimetypes (iterable): dictionaries with keys:
+
+              - **id** (bytes): sha1 identifier
+              - **mimetype** (bytes): raw content's mimetype
+              - **encoding** (bytes): raw content's encoding
+              - **indexer_configuration_id** (int): tool's id used to
+                compute the results
+              - **conflict_update** (bool): Flag to determine if we want to
+                overwrite (``True``) or skip duplicates (``False``, the
+                default)
+
+        """
+        self._mimetypes.add(mimetypes, conflict_update)
+
+    def content_mimetype_get(self, ids, db=None, cur=None):
+        """Retrieve full content mimetype per ids.
+
+        Args:
+            ids (iterable): sha1 identifier
+
+        Yields:
+            mimetypes (iterable): dictionaries with keys:
+
+                - **id** (bytes): sha1 identifier
+                - **mimetype** (bytes): raw content's mimetype
+                - **encoding** (bytes): raw content's encoding
+                - **tool** (dict): Tool used to compute the language
+
+        """
+        yield from self._mimetypes.get(ids)
+
     def content_ctags_missing(self, ctags):
         """List ctags missing from storage.
 
         Args:
             ctags (iterable): dicts with keys:
 
                 - **id** (bytes): sha1 identifier
                 - **indexer_configuration_id** (int): tool used to compute
                   the results
 
         Yields:
             an iterable of missing id for the tuple (id,
             indexer_configuration_id)
 
         """
         yield from self._content_ctags.missing(ctags)
 
     def content_ctags_get(self, ids):
         """Retrieve ctags per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             Dictionaries with keys:
 
                 - **id** (bytes): content's identifier
                 - **name** (str): symbol's name
                 - **kind** (str): symbol's kind
                 - **lang** (str): language for that content
                 - **tool** (dict): tool used to compute the ctags' info
 
 
         """
         for item in self._content_ctags.get(ids):
             for item_ctags_item in item['ctags']:
                 yield {
                     'id': item['id'],
                     'tool': item['tool'],
                     **item_ctags_item
                 }
 
     def content_ctags_add(self, ctags, conflict_update=False):
         """Add ctags not present in storage
 
         Args:
             ctags (iterable): dictionaries with keys:
 
               - **id** (bytes): sha1
               - **ctags** ([list): List of dictionary with keys: name, kind,
                   line, lang
               - **indexer_configuration_id**: tool used to compute the
                 results
 
         """
         for item in ctags:
             tool_id = item['indexer_configuration_id']
             if conflict_update:
                 item_ctags = []
             else:
                 # merge old ctags with new ctags
                 existing = list(self._content_ctags.get([item['id']]))
                 item_ctags = [
                     {
                         key: ctags_item[key]
                         for key in ('name', 'kind', 'line', 'lang')
                     }
                     for existing_item in existing
                     if existing_item['tool']['id'] == tool_id
                     for ctags_item in existing_item['ctags']
                 ]
             for new_item_ctags in item['ctags']:
                 if new_item_ctags not in item_ctags:
                     item_ctags.append(new_item_ctags)
             self._content_ctags.add([
                 {
                     'id': item['id'],
                     'indexer_configuration_id': tool_id,
                     'ctags': item_ctags,
                 }
             ], conflict_update=True)
 
     def content_ctags_search(self, expression,
                              limit=10, last_sha1=None, db=None, cur=None):
         """Search through content's raw ctags symbols.
 
         Args:
             expression (str): Expression to search for
             limit (int): Number of rows to return (default to 10).
             last_sha1 (str): Offset from which retrieving data (default to '').
 
         Yields:
             rows of ctags including id, name, lang, kind, line, etc...
 
         """
         nb_matches = 0
         for ((id_, tool_id), item) in \
                 sorted(self._content_ctags._data.items()):
             if id_ <= (last_sha1 or bytes(0 for _ in range(SHA1_DIGEST_SIZE))):
                 continue
             nb_matches += 1
             for ctags_item in item['ctags']:
                 if ctags_item['name'] != expression:
                     continue
                 yield {
                     'id': id_,
                     'tool': _transform_tool(self._tools[tool_id]),
                     **ctags_item
                 }
             if nb_matches >= limit:
                 return
 
     def content_metadata_missing(self, metadata):
         """List metadata missing from storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
               - **id** (bytes): sha1 identifier
               - **indexer_configuration_id** (int): tool used to compute
                 the results
 
         Yields:
             missing sha1s
 
         """
         yield from self._content_metadata.missing(metadata)
 
     def content_metadata_get(self, ids):
         """Retrieve metadata per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             dictionaries with the following keys:
 
               - **id** (bytes)
               - **translated_metadata** (str): associated metadata
               - **tool** (dict): tool used to compute metadata
 
         """
         yield from self._content_metadata.get(ids)
 
     def content_metadata_add(self, metadata, conflict_update=False):
         """Add metadata not present in storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
               - **id**: sha1
               - **translated_metadata**: arbitrary dict
               - **indexer_configuration_id**: tool used to compute the
                 results
 
             conflict_update: Flag to determine if we want to overwrite (true)
                 or skip duplicates (false, the default)
 
         """
         self._content_metadata.add(metadata, conflict_update)
 
     def revision_metadata_missing(self, metadata):
         """List metadata missing from storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
               - **id** (bytes): sha1_git revision identifier
               - **indexer_configuration_id** (int): tool used to compute
                 the results
 
         Yields:
             missing ids
 
         """
         yield from self._revision_metadata.missing(metadata)
 
     def revision_metadata_get(self, ids):
         """Retrieve revision metadata per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             dictionaries with the following keys:
 
             - **id** (bytes)
             - **translated_metadata** (str): associated metadata
             - **tool** (dict): tool used to compute metadata
 
         """
         yield from self._revision_metadata.get(ids)
 
     def revision_metadata_add(self, metadata, conflict_update=False):
         """Add metadata not present in storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
               - **id**: sha1_git of revision
               - **translated_metadata**: arbitrary dict
               - **indexer_configuration_id**: tool used to compute metadata
 
             conflict_update: Flag to determine if we want to overwrite (true)
               or skip duplicates (false, the default)
 
         """
         self._revision_metadata.add(metadata, conflict_update)
 
     def indexer_configuration_add(self, tools):
         """Add new tools to the storage.
 
         Args:
             tools ([dict]): List of dictionary representing tool to
               insert in the db. Dictionary with the following keys:
 
               - **tool_name** (str): tool's name
               - **tool_version** (str): tool's version
               - **tool_configuration** (dict): tool's configuration
                 (free form dict)
 
         Returns:
             list: List of dict inserted in the db (holding the id key as
             well). The order of the list is not guaranteed to match
             the order of the initial list.
 
         """
         inserted = []
         for tool in tools:
             tool = tool.copy()
             id_ = self._tool_key(tool)
             tool['id'] = id_
             self._tools[id_] = tool
             inserted.append(tool)
         return inserted
 
     def indexer_configuration_get(self, tool):
         """Retrieve tool information.
 
         Args:
             tool (dict): Dictionary representing a tool with the
               following keys:
 
               - **tool_name** (str): tool's name
               - **tool_version** (str): tool's version
               - **tool_configuration** (dict): tool's configuration
                 (free form dict)
 
         Returns:
             The same dictionary with an `id` key, None otherwise.
 
         """
         return self._tools.get(self._tool_key(tool))
 
     def _tool_key(self, tool):
         return (tool['tool_name'], tool['tool_version'],
                 json.dumps(tool['tool_configuration'], sort_keys=True))
diff --git a/swh/indexer/tests/storage/test_in_memory.py b/swh/indexer/tests/storage/test_in_memory.py
index ff18b6e..d5e69a5 100644
--- a/swh/indexer/tests/storage/test_in_memory.py
+++ b/swh/indexer/tests/storage/test_in_memory.py
@@ -1,119 +1,103 @@
 from unittest import TestCase
 import pytest
 
 from .test_storage import CommonTestStorage
 
 
 class IndexerTestInMemoryStorage(CommonTestStorage, TestCase):
     def setUp(self):
         self.storage_config = {
             'cls': 'memory',
             'args': {
             },
         }
         super().setUp()
 
     @pytest.mark.xfail
     def test_check_config(self):
         pass
 
-    @pytest.mark.xfail
-    def test_content_mimetype_missing(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_mimetype_add__drop_duplicate(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_mimetype_add__update_in_place_duplicate(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_mimetype_get(self):
-        pass
-
     @pytest.mark.xfail
     def test_content_language_missing(self):
         pass
 
     @pytest.mark.xfail
     def test_content_language_get(self):
         pass
 
     @pytest.mark.xfail
     def test_content_language_add__drop_duplicate(self):
         pass
 
     @pytest.mark.xfail
     def test_content_language_add__update_in_place_duplicate(self):
         pass
 
     @pytest.mark.xfail
     def test_content_fossology_license_get(self):
         pass
 
     @pytest.mark.xfail
     def test_content_fossology_license_add__new_license_added(self):
         pass
 
     @pytest.mark.xfail
     def test_content_fossology_license_add__update_in_place_duplicate(self):
         pass
 
     @pytest.mark.xfail
     def test_origin_intrinsic_metadata_get(self):
         pass
 
     @pytest.mark.xfail
     def test_origin_intrinsic_metadata_add_drop_duplicate(self):
         pass
 
     @pytest.mark.xfail
     def test_origin_intrinsic_metadata_add_update_in_place_duplicate(self):
         pass
 
     @pytest.mark.xfail
     def test_origin_intrinsic_metadata_search_fulltext(self):
         pass
 
     @pytest.mark.xfail
     def test_origin_intrinsic_metadata_search_fulltext_rank(self):
         pass
 
     @pytest.mark.xfail
     def test_indexer_configuration_metadata_get_missing_context(self):
         pass
 
     @pytest.mark.xfail
     def test_indexer_configuration_metadata_get(self):
         pass
 
     @pytest.mark.xfail
     def test_generate_content_mimetype_get_range_limit_none(self):
         pass
 
     @pytest.mark.xfail
     def test_generate_content_mimetype_get_range_no_limit(self, mimetypes):
         pass
 
     @pytest.mark.xfail
     def test_generate_content_mimetype_get_range_limit(self, mimetypes):
         pass
 
     @pytest.mark.xfail
     def test_generate_content_fossology_license_get_range_limit_none(self):
         pass
 
     @pytest.mark.xfail
     def test_generate_content_fossology_license_get_range_no_limit(self):
         pass
 
     @pytest.mark.xfail
     def test_generate_content_fossology_license_get_range_no_limit_with_filter(
             self):
         pass
 
     @pytest.mark.xfail
     def test_generate_fossology_license_get_range_limit(self):
         pass
diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py
index e5223bc..e472149 100644
--- a/swh/indexer/tests/test_mimetype.py
+++ b/swh/indexer/tests/test_mimetype.py
@@ -1,192 +1,190 @@
 # Copyright (C) 2017-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
-import logging
 
 from unittest.mock import patch
 
 from swh.indexer.mimetype import (
     MimetypeIndexer, MimetypeRangeIndexer, compute_mimetype_encoding
 )
 
 from swh.indexer.tests.test_utils import (
     MockObjStorage, BasicMockStorage, BasicMockIndexerStorage,
     CommonContentIndexerTest, CommonContentIndexerRangeTest,
     CommonIndexerWithErrorsTest, CommonIndexerNoTool,
     BASE_TEST_CONFIG
 )
 
 
 class FakeMagicResult:
     def __init__(self, mimetype, encoding):
         self.mime_type = mimetype
         self.encoding = encoding
 
 
 class BasicTest(unittest.TestCase):
     @patch('swh.indexer.mimetype.magic')
     def test_compute_mimetype_encoding(self, mock_magic):
         """Compute mimetype encoding should return results"""
         for _input, _mimetype, _encoding in [
                 (b'some-content', 'text/plain', 'utf-8'),
                 (b'raw-content', 'application/json', 'ascii')]:
             mock_magic.detect_from_content.return_value = FakeMagicResult(
                 _mimetype, _encoding)
 
             actual_result = compute_mimetype_encoding(_input)
             self.assertEqual(actual_result, {
                 'mimetype': _mimetype,
                 'encoding': _encoding
             })
 
 
 class MimetypeTestIndexer(MimetypeIndexer):
     """Specific mimetype indexer instance whose configuration is enough to
        satisfy the indexing tests.
 
     """
     def parse_config_file(self, *args, **kwargs):
         return {
             **BASE_TEST_CONFIG,
             'tools': {
                 'name': 'file',
                 'version': '1:5.30-1+deb9u1',
                 'configuration': {
                     "type": "library",
                     "debian-package": "python3-magic"
                 },
             },
         }
 
-    def prepare(self):
-        super().prepare()
-        self.idx_storage = BasicMockIndexerStorage()
-        self.log = logging.getLogger('swh.indexer')
-        self.objstorage = MockObjStorage()
-
 
 class TestMimetypeIndexer(CommonContentIndexerTest, unittest.TestCase):
     """Mimetype indexer test scenarios:
 
     - Known sha1s in the input list have their data indexed
     - Unknown sha1 in the input list are not indexed
 
     """
+
+    def get_indexer_results(self, ids):
+        yield from self.idx_storage.content_mimetype_get(ids)
+
     def setUp(self):
         self.indexer = MimetypeTestIndexer()
+        self.idx_storage = self.indexer.idx_storage
 
         self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5'
         self.id1 = '688a5ef812c53907562fe379d4b3851e69c7cb15'
         self.id2 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'
         tool_id = self.indexer.tool['id']
         self.expected_results = {
             self.id0: {
                 'id': self.id0,
                 'indexer_configuration_id': tool_id,
                 'mimetype': 'text/plain',
                 'encoding': 'us-ascii',
             },
             self.id1: {
                 'id': self.id1,
                 'indexer_configuration_id': tool_id,
                 'mimetype': 'text/plain',
                 'encoding': 'us-ascii',
             },
             self.id2: {
                 'id': self.id2,
                 'indexer_configuration_id': tool_id,
                 'mimetype': 'application/x-empty',
                 'encoding': 'binary',
             }
         }
 
 
 class MimetypeRangeIndexerTest(MimetypeRangeIndexer):
     """Specific mimetype whose configuration is enough to satisfy the
        indexing tests.
 
     """
     def parse_config_file(self, *args, **kwargs):
         return {
             **BASE_TEST_CONFIG,
             'tools': {
                 'name': 'file',
                 'version': '1:5.30-1+deb9u1',
                 'configuration': {
                     "type": "library",
                     "debian-package": "python3-magic"
                 },
             },
             'write_batch_size': 100,
         }
 
     def prepare(self):
         super().prepare()
         self.idx_storage = BasicMockIndexerStorage()
         # this hardcodes some contents, will use this to setup the storage
         self.objstorage = MockObjStorage()
         # sync objstorage and storage
         contents = [{'sha1': c_id} for c_id in self.objstorage]
         self.storage = BasicMockStorage(contents)
 
 
 class TestMimetypeRangeIndexer(
         CommonContentIndexerRangeTest, unittest.TestCase):
     """Range Mimetype Indexer tests.
 
     - new data within range are indexed
     - no data outside a range are indexed
     - with filtering existing indexed data prior to compute new index
     - without filtering existing indexed data prior to compute new index
 
     """
     def setUp(self):
         self.indexer = MimetypeRangeIndexerTest()
         # will play along with the objstorage's mocked contents for now
         self.contents = sorted(self.indexer.objstorage)
         # FIXME: leverage swh.objstorage.in_memory_storage's
         # InMemoryObjStorage, swh.storage.tests's gen_contents, and
         # hypothesis to generate data to actually run indexer on those
 
         self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5'
         self.id1 = '02fb2c89e14f7fab46701478c83779c7beb7b069'
         self.id2 = '103bc087db1d26afc3a0283f38663d081e9b01e6'
         tool_id = self.indexer.tool['id']
 
         self.expected_results = {
             self.id0: {
                 'encoding': 'us-ascii',
                 'id': self.id0,
                 'indexer_configuration_id': tool_id,
                 'mimetype': 'text/plain'},
             self.id1: {
                 'encoding': 'us-ascii',
                 'id': self.id1,
                 'indexer_configuration_id': tool_id,
                 'mimetype': 'text/x-python'},
             self.id2: {
                 'encoding': 'us-ascii',
                 'id': self.id2,
                 'indexer_configuration_id': tool_id,
                 'mimetype': 'text/plain'}
         }
 
 
 class MimetypeIndexerUnknownToolTestStorage(
         CommonIndexerNoTool, MimetypeTestIndexer):
     """Mimetype indexer with wrong configuration"""
 
 
 class MimetypeRangeIndexerUnknownToolTestStorage(
         CommonIndexerNoTool, MimetypeRangeIndexerTest):
     """Mimetype range indexer with wrong configuration"""
 
 
 class TestMimetypeIndexersErrors(
         CommonIndexerWithErrorsTest, unittest.TestCase):
     """Test the indexer raise the right errors when wrongly initialized"""
     Indexer = MimetypeIndexerUnknownToolTestStorage
     RangeIndexer = MimetypeRangeIndexerUnknownToolTestStorage