diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py
index 4372969..1d3c1b3 100644
--- a/swh/indexer/storage/in_memory.py
+++ b/swh/indexer/storage/in_memory.py
@@ -1,560 +1,610 @@
 # Copyright (C) 2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import bisect
 from collections import defaultdict
 import json
 
 SHA1_DIGEST_SIZE = 160
 
 
 def _transform_tool(tool):
     return {
         'id': tool['id'],
         'name': tool['tool_name'],
         'version': tool['tool_version'],
         'configuration': tool['tool_configuration'],
     }
 
 
 class SubStorage:
     """Implements common missing/get/add logic for each indexer type."""
     def __init__(self, tools):
         self._tools = tools
         self._sorted_ids = []
         self._data = {}  # map (id_, tool_id) -> metadata_dict
         self._tools_per_id = defaultdict(set)  # map id_ -> Set[tool_id]
 
     def missing(self, ids):
         """List data missing from storage.
 
         Args:
             data (iterable): dictionaries with keys:
 
                 - **id** (bytes): sha1 identifier
                 - **indexer_configuration_id** (int): tool used to compute
                   the results
 
         Yields:
             missing sha1s
 
         """
         for id_ in ids:
             tool_id = id_['indexer_configuration_id']
             id_ = id_['id']
             if tool_id not in self._tools_per_id.get(id_, set()):
                 yield id_
 
     def get(self, ids):
         """Retrieve data per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             dict: dictionaries with the following keys:
 
               - **id** (bytes)
               - **tool** (dict): tool used to compute metadata
               - arbitrary data (as provided to `add`)
 
         """
         for id_ in ids:
             for tool_id in self._tools_per_id.get(id_, set()):
                 key = (id_, tool_id)
                 yield {
                     'id': id_,
                     'tool': _transform_tool(self._tools[tool_id]),
                     **self._data[key],
                 }
 
     def get_range(self, start, end, indexer_configuration_id, limit):
         """Retrieve data within range [start, end] bound by limit.
 
         Args:
             **start** (bytes): Starting identifier range (expected smaller
                            than end)
             **end** (bytes): Ending identifier range (expected larger
                              than start)
             **indexer_configuration_id** (int): The tool used to index data
             **limit** (int): Limit result
 
         Raises:
             ValueError for limit to None
 
         Returns:
             a dict with keys:
             - **ids** [bytes]: iterable of content ids within the range.
             - **next** (Optional[bytes]): The next range of sha1 starts at
                                           this sha1 if any
 
         """
         if limit is None:
             raise ValueError('Development error: limit should not be None')
         from_index = bisect.bisect_left(self._sorted_ids, start)
         to_index = bisect.bisect_right(self._sorted_ids, end, lo=from_index)
         if to_index - from_index >= limit:
             return {
                 'ids': self._sorted_ids[from_index:from_index+limit],
                 'next': self._sorted_ids[from_index+limit],
             }
         else:
             return {
                 'ids': self._sorted_ids[from_index:to_index],
                 'next': None,
                 }
 
     def add(self, data, conflict_update):
         """Add data not present in storage.
 
         Args:
             data (iterable): dictionaries with keys:
 
               - **id**: sha1
               - **indexer_configuration_id**: tool used to compute the
                 results
               - arbitrary data
 
             conflict_update (bool): Flag to determine if we want to overwrite
               (true) or skip duplicates (false)
 
         """
         for item in data:
             item = item.copy()
             tool_id = item.pop('indexer_configuration_id')
             id_ = item.pop('id')
             data = item
             if not conflict_update and \
                     tool_id in self._tools_per_id.get(id_, set()):
                 # Duplicate, should not be updated
                 continue
             key = (id_, tool_id)
             self._data[key] = data
             self._tools_per_id[id_].add(tool_id)
             if id_ not in self._sorted_ids:
                 bisect.insort(self._sorted_ids, id_)
 
     def add_merge(self, new_data, conflict_update, merged_key):
         for new_item in new_data:
             id_ = new_item['id']
             tool_id = new_item['indexer_configuration_id']
             if conflict_update:
                 all_subitems = []
             else:
                 existing = list(self.get([id_]))
                 all_subitems = [
                     old_subitem
                     for existing_item in existing
                     if existing_item['tool']['id'] == tool_id
                     for old_subitem in existing_item[merged_key]
                 ]
             for new_subitem in new_item[merged_key]:
                 if new_subitem not in all_subitems:
                     all_subitems.append(new_subitem)
             self.add([
                 {
                     'id': id_,
                     'indexer_configuration_id': tool_id,
                     merged_key: all_subitems,
                 }
             ], conflict_update=True)
             if id_ not in self._sorted_ids:
                 bisect.insort(self._sorted_ids, id_)
 
 
 class IndexerStorage:
     """In-memory SWH indexer storage."""
 
     def __init__(self):
         self._tools = {}
         self._mimetypes = SubStorage(self._tools)
+        self._languages = SubStorage(self._tools)
         self._content_ctags = SubStorage(self._tools)
         self._licenses = SubStorage(self._tools)
         self._content_metadata = SubStorage(self._tools)
         self._revision_metadata = SubStorage(self._tools)
 
     def content_mimetype_missing(self, mimetypes):
         """Generate mimetypes missing from storage.
 
         Args:
             mimetypes (iterable): iterable of dict with keys:
 
               - **id** (bytes): sha1 identifier
               - **indexer_configuration_id** (int): tool used to compute the
                 results
 
         Yields:
             tuple (id, indexer_configuration_id): missing id
 
         """
         yield from self._mimetypes.missing(mimetypes)
 
     def content_mimetype_get_range(
             self, start, end, indexer_configuration_id, limit=1000):
         """Retrieve mimetypes within range [start, end] bound by limit.
 
         Args:
             **start** (bytes): Starting identifier range (expected smaller
                            than end)
             **end** (bytes): Ending identifier range (expected larger
                              than start)
             **indexer_configuration_id** (int): The tool used to index data
             **limit** (int): Limit result (default to 1000)
 
         Raises:
             ValueError for limit to None
 
         Returns:
             a dict with keys:
             - **ids** [bytes]: iterable of content ids within the range.
             - **next** (Optional[bytes]): The next range of sha1 starts at
                                           this sha1 if any
 
         """
         return self._mimetypes.get_range(
             start, end, indexer_configuration_id, limit)
 
     def content_mimetype_add(self, mimetypes, conflict_update=False):
         """Add mimetypes not present in storage.
 
         Args:
             mimetypes (iterable): dictionaries with keys:
 
               - **id** (bytes): sha1 identifier
               - **mimetype** (bytes): raw content's mimetype
               - **encoding** (bytes): raw content's encoding
               - **indexer_configuration_id** (int): tool's id used to
                 compute the results
               - **conflict_update** (bool): Flag to determine if we want to
                 overwrite (``True``) or skip duplicates (``False``, the
                 default)
 
         """
         if not all(isinstance(x['id'], bytes) for x in mimetypes):
             raise TypeError('identifiers must be bytes.')
         self._mimetypes.add(mimetypes, conflict_update)
 
     def content_mimetype_get(self, ids, db=None, cur=None):
         """Retrieve full content mimetype per ids.
 
         Args:
             ids (iterable): sha1 identifier
 
         Yields:
             mimetypes (iterable): dictionaries with keys:
 
                 - **id** (bytes): sha1 identifier
                 - **mimetype** (bytes): raw content's mimetype
                 - **encoding** (bytes): raw content's encoding
                 - **tool** (dict): Tool used to compute the language
 
         """
         yield from self._mimetypes.get(ids)
 
+    def content_language_missing(self, languages):
+        """List languages missing from storage.
+
+        Args:
+            languages (iterable): dictionaries with keys:
+
+                - **id** (bytes): sha1 identifier
+                - **indexer_configuration_id** (int): tool used to compute
+                  the results
+
+        Yields:
+            an iterable of missing id for the tuple (id,
+            indexer_configuration_id)
+
+        """
+        yield from self._languages.missing(languages)
+
+    def content_language_get(self, ids):
+        """Retrieve full content language per ids.
+
+        Args:
+            ids (iterable): sha1 identifier
+
+        Yields:
+            languages (iterable): dictionaries with keys:
+
+                - **id** (bytes): sha1 identifier
+                - **lang** (bytes): raw content's language
+                - **tool** (dict): Tool used to compute the language
+
+        """
+        yield from self._languages.get(ids)
+
+    def content_language_add(self, languages, conflict_update=False):
+        """Add languages not present in storage.
+
+        Args:
+            languages (iterable): dictionaries with keys:
+
+                - **id** (bytes): sha1
+                - **lang** (bytes): language detected
+
+            conflict_update (bool): Flag to determine if we want to
+                overwrite (true) or skip duplicates (false, the
+                default)
+
+        """
+        self._languages.add(languages, conflict_update)
+
     def content_ctags_missing(self, ctags):
         """List ctags missing from storage.
 
         Args:
             ctags (iterable): dicts with keys:
 
                 - **id** (bytes): sha1 identifier
                 - **indexer_configuration_id** (int): tool used to compute
                   the results
 
         Yields:
             an iterable of missing id for the tuple (id,
             indexer_configuration_id)
 
         """
         yield from self._content_ctags.missing(ctags)
 
     def content_ctags_get(self, ids):
         """Retrieve ctags per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             Dictionaries with keys:
 
                 - **id** (bytes): content's identifier
                 - **name** (str): symbol's name
                 - **kind** (str): symbol's kind
                 - **lang** (str): language for that content
                 - **tool** (dict): tool used to compute the ctags' info
 
 
         """
         for item in self._content_ctags.get(ids):
             for item_ctags_item in item['ctags']:
                 yield {
                     'id': item['id'],
                     'tool': item['tool'],
                     **item_ctags_item
                 }
 
     def content_ctags_add(self, ctags, conflict_update=False):
         """Add ctags not present in storage
 
         Args:
             ctags (iterable): dictionaries with keys:
 
               - **id** (bytes): sha1
               - **ctags** ([list): List of dictionary with keys: name, kind,
                   line, lang
               - **indexer_configuration_id**: tool used to compute the
                 results
 
         """
         if not all(isinstance(x['id'], bytes) for x in ctags):
             raise TypeError('identifiers must be bytes.')
         self._content_ctags.add_merge(ctags, conflict_update, 'ctags')
 
     def content_ctags_search(self, expression,
                              limit=10, last_sha1=None, db=None, cur=None):
         """Search through content's raw ctags symbols.
 
         Args:
             expression (str): Expression to search for
             limit (int): Number of rows to return (default to 10).
             last_sha1 (str): Offset from which retrieving data (default to '').
 
         Yields:
             rows of ctags including id, name, lang, kind, line, etc...
 
         """
         nb_matches = 0
         for ((id_, tool_id), item) in \
                 sorted(self._content_ctags._data.items()):
             if id_ <= (last_sha1 or bytes(0 for _ in range(SHA1_DIGEST_SIZE))):
                 continue
             nb_matches += 1
             for ctags_item in item['ctags']:
                 if ctags_item['name'] != expression:
                     continue
                 yield {
                     'id': id_,
                     'tool': _transform_tool(self._tools[tool_id]),
                     **ctags_item
                 }
             if nb_matches >= limit:
                 return
 
     def content_fossology_license_get(self, ids):
         """Retrieve licenses per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             `{id: facts}` where `facts` is a dict with the following keys:
 
                 - **licenses** ([str]): associated licenses for that content
                 - **tool** (dict): Tool used to compute the license
 
         """
         # TODO: remove this reformatting in order to yield items with the
         # same format as other _get methods.
         res = {}
         for d in self._licenses.get(ids):
             res.setdefault(d.pop('id'), []).append(d)
         for (id_, facts) in res.items():
             yield {id_: facts}
 
     def content_fossology_license_add(self, licenses, conflict_update=False):
         """Add licenses not present in storage.
 
         Args:
             licenses (iterable): dictionaries with keys:
 
                 - **id**: sha1
                 - **licenses** ([bytes]): List of licenses associated to sha1
                 - **tool** (str): nomossa
 
             conflict_update: Flag to determine if we want to overwrite (true)
                 or skip duplicates (false, the default)
 
         Returns:
             list: content_license entries which failed due to unknown licenses
 
         """
         if not all(isinstance(x['id'], bytes) for x in licenses):
             raise TypeError('identifiers must be bytes.')
         self._licenses.add_merge(licenses, conflict_update, 'licenses')
 
     def content_fossology_license_get_range(
             self, start, end, indexer_configuration_id, limit=1000):
         """Retrieve licenses within range [start, end] bound by limit.
 
         Args:
             **start** (bytes): Starting identifier range (expected smaller
                            than end)
             **end** (bytes): Ending identifier range (expected larger
                              than start)
             **indexer_configuration_id** (int): The tool used to index data
             **limit** (int): Limit result (default to 1000)
 
         Raises:
             ValueError for limit to None
 
         Returns:
             a dict with keys:
             - **ids** [bytes]: iterable of content ids within the range.
             - **next** (Optional[bytes]): The next range of sha1 starts at
                                           this sha1 if any
 
         """
         return self._licenses.get_range(
             start, end, indexer_configuration_id, limit)
 
     def content_metadata_missing(self, metadata):
         """List metadata missing from storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
               - **id** (bytes): sha1 identifier
               - **indexer_configuration_id** (int): tool used to compute
                 the results
 
         Yields:
             missing sha1s
 
         """
         yield from self._content_metadata.missing(metadata)
 
     def content_metadata_get(self, ids):
         """Retrieve metadata per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             dictionaries with the following keys:
 
               - **id** (bytes)
               - **translated_metadata** (str): associated metadata
               - **tool** (dict): tool used to compute metadata
 
         """
         yield from self._content_metadata.get(ids)
 
     def content_metadata_add(self, metadata, conflict_update=False):
         """Add metadata not present in storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
               - **id**: sha1
               - **translated_metadata**: arbitrary dict
               - **indexer_configuration_id**: tool used to compute the
                 results
 
             conflict_update: Flag to determine if we want to overwrite (true)
                 or skip duplicates (false, the default)
 
         """
         if not all(isinstance(x['id'], bytes) for x in metadata):
             raise TypeError('identifiers must be bytes.')
         self._content_metadata.add(metadata, conflict_update)
 
     def revision_metadata_missing(self, metadata):
         """List metadata missing from storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
               - **id** (bytes): sha1_git revision identifier
               - **indexer_configuration_id** (int): tool used to compute
                 the results
 
         Yields:
             missing ids
 
         """
         yield from self._revision_metadata.missing(metadata)
 
     def revision_metadata_get(self, ids):
         """Retrieve revision metadata per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             dictionaries with the following keys:
 
             - **id** (bytes)
             - **translated_metadata** (str): associated metadata
             - **tool** (dict): tool used to compute metadata
 
         """
         yield from self._revision_metadata.get(ids)
 
     def revision_metadata_add(self, metadata, conflict_update=False):
         """Add metadata not present in storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
               - **id**: sha1_git of revision
               - **translated_metadata**: arbitrary dict
               - **indexer_configuration_id**: tool used to compute metadata
 
             conflict_update: Flag to determine if we want to overwrite (true)
               or skip duplicates (false, the default)
 
         """
         if not all(isinstance(x['id'], bytes) for x in metadata):
             raise TypeError('identifiers must be bytes.')
         self._revision_metadata.add(metadata, conflict_update)
 
     def indexer_configuration_add(self, tools):
         """Add new tools to the storage.
 
         Args:
             tools ([dict]): List of dictionary representing tool to
               insert in the db. Dictionary with the following keys:
 
               - **tool_name** (str): tool's name
               - **tool_version** (str): tool's version
               - **tool_configuration** (dict): tool's configuration
                 (free form dict)
 
         Returns:
             list: List of dict inserted in the db (holding the id key as
             well). The order of the list is not guaranteed to match
             the order of the initial list.
 
         """
         inserted = []
         for tool in tools:
             tool = tool.copy()
             id_ = self._tool_key(tool)
             tool['id'] = id_
             self._tools[id_] = tool
             inserted.append(tool)
         return inserted
 
     def indexer_configuration_get(self, tool):
         """Retrieve tool information.
 
         Args:
             tool (dict): Dictionary representing a tool with the
               following keys:
 
               - **tool_name** (str): tool's name
               - **tool_version** (str): tool's version
               - **tool_configuration** (dict): tool's configuration
                 (free form dict)
 
         Returns:
             The same dictionary with an `id` key, None otherwise.
 
         """
         return self._tools.get(self._tool_key(tool))
 
     def _tool_key(self, tool):
         return (tool['tool_name'], tool['tool_version'],
                 json.dumps(tool['tool_configuration'], sort_keys=True))
diff --git a/swh/indexer/tests/storage/test_in_memory.py b/swh/indexer/tests/storage/test_in_memory.py
index fef8c25..a709886 100644
--- a/swh/indexer/tests/storage/test_in_memory.py
+++ b/swh/indexer/tests/storage/test_in_memory.py
@@ -1,65 +1,49 @@
 from unittest import TestCase
 import pytest
 
 from .test_storage import CommonTestStorage
 
 
 class IndexerTestInMemoryStorage(CommonTestStorage, TestCase):
     def setUp(self):
         self.storage_config = {
             'cls': 'memory',
             'args': {
             },
         }
         super().setUp()
 
     def reset_storage_tables(self):
         self.storage = self.storage.__class__()
 
     @pytest.mark.xfail
     def test_check_config(self):
         pass
 
-    @pytest.mark.xfail
-    def test_content_language_missing(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_language_get(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_language_add__drop_duplicate(self):
-        pass
-
-    @pytest.mark.xfail
-    def test_content_language_add__update_in_place_duplicate(self):
-        pass
-
     @pytest.mark.xfail
     def test_origin_intrinsic_metadata_get(self):
         pass
 
     @pytest.mark.xfail
     def test_origin_intrinsic_metadata_add_drop_duplicate(self):
         pass
 
     @pytest.mark.xfail
     def test_origin_intrinsic_metadata_add_update_in_place_duplicate(self):
         pass
 
     @pytest.mark.xfail
     def test_origin_intrinsic_metadata_search_fulltext(self):
         pass
 
     @pytest.mark.xfail
     def test_origin_intrinsic_metadata_search_fulltext_rank(self):
         pass
 
     @pytest.mark.xfail
     def test_indexer_configuration_metadata_get_missing_context(self):
         pass
 
     @pytest.mark.xfail
     def test_indexer_configuration_metadata_get(self):
         pass
diff --git a/swh/indexer/tests/test_language.py b/swh/indexer/tests/test_language.py
index c53524e..3f7ab1d 100644
--- a/swh/indexer/tests/test_language.py
+++ b/swh/indexer/tests/test_language.py
@@ -1,98 +1,98 @@
 # Copyright (C) 2017-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
 from swh.indexer import language
 from swh.indexer.language import LanguageIndexer
 from swh.indexer.tests.test_utils import (
-    BasicMockIndexerStorage, MockObjStorage, CommonContentIndexerTest,
-    CommonIndexerWithErrorsTest, CommonIndexerNoTool, BASE_TEST_CONFIG
+    CommonContentIndexerTest, CommonIndexerWithErrorsTest,
+    CommonIndexerNoTool, BASE_TEST_CONFIG, fill_storage, fill_obj_storage
 )
 
 
 class LanguageTestIndexer(LanguageIndexer):
     """Specific language whose configuration is enough to satisfy the
        indexing tests.
     """
     def parse_config_file(self, *args, **kwargs):
         return {
             **BASE_TEST_CONFIG,
             'tools':  {
                 'name': 'pygments',
                 'version': '2.0.1+dfsg-1.1+deb8u1',
                 'configuration': {
                     'type': 'library',
                     'debian-package': 'python3-pygments',
                     'max_content_size': 10240,
                 },
             }
         }
 
-    def prepare(self):
-        super().prepare()
-        self.idx_storage = BasicMockIndexerStorage()
-        self.objstorage = MockObjStorage()
-        self.tool_config = self.config['tools']['configuration']
-
 
 class Language(unittest.TestCase):
     """Tests pygments tool for language detection
 
     """
     def test_compute_language_none(self):
         # given
         self.content = ""
         self.declared_language = {
             'lang': None
         }
         # when
         result = language.compute_language(self.content)
         # then
         self.assertEqual(self.declared_language, result)
 
 
 class TestLanguageIndexer(CommonContentIndexerTest, unittest.TestCase):
     """Language indexer test scenarios:
 
     - Known sha1s in the input list have their data indexed
     - Unknown sha1 in the input list are not indexed
 
     """
+
+    def get_indexer_results(self, ids):
+        yield from self.indexer.idx_storage.content_language_get(ids)
+
     def setUp(self):
         self.indexer = LanguageTestIndexer()
+        fill_storage(self.indexer.storage)
+        fill_obj_storage(self.indexer.objstorage)
 
         self.id0 = '02fb2c89e14f7fab46701478c83779c7beb7b069'
         self.id1 = '103bc087db1d26afc3a0283f38663d081e9b01e6'
         self.id2 = 'd4c647f0fc257591cc9ba1722484229780d1c607'
         tool_id = self.indexer.tool['id']
 
         self.expected_results = {
             self.id0: {
                 'id': self.id0,
                 'indexer_configuration_id': tool_id,
                 'lang': 'python',
             },
             self.id1: {
                 'id': self.id1,
                 'indexer_configuration_id': tool_id,
                 'lang': 'c'
             },
             self.id2: {
                 'id': self.id2,
                 'indexer_configuration_id': tool_id,
                 'lang': 'text-only'
             }
         }
 
 
 class LanguageIndexerUnknownToolTestStorage(
         CommonIndexerNoTool, LanguageTestIndexer):
     """Fossology license indexer with wrong configuration"""
 
 
 class TestLanguageIndexersErrors(
         CommonIndexerWithErrorsTest, unittest.TestCase):
     """Test the indexer raise the right errors when wrongly initialized"""
     Indexer = LanguageIndexerUnknownToolTestStorage