diff --git a/swh/indexer/__init__.py b/swh/indexer/__init__.py
index 014c42b..90e662a 100644
--- a/swh/indexer/__init__.py
+++ b/swh/indexer/__init__.py
@@ -1,22 +1,25 @@
 # Copyright (C) 2016  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from .file_properties import ContentMimetypeIndexer
+from .language import ContentLanguageIndexer
 
 
 INDEXER_CLASSES = {
     'mimetype': ContentMimetypeIndexer,
+    'language': ContentLanguageIndexer,
 }
 
 
 TASK_NAMES = {
     'orchestrator': 'swh.indexer.tasks.SWHOrchestratorTask',
     'mimetype': 'swh.indexer.tasks.SWHContentMimetypeTask',
+    'language': 'swh.indexer.tasks.SWHContentLanguageTask',
 }
 
 
 __all__ = [
     'INDEXER_CLASSES', 'TASK_NAMES'
 ]
diff --git a/swh/indexer/language.py b/swh/indexer/language.py
index 2a7855c..eba2cd5 100644
--- a/swh/indexer/language.py
+++ b/swh/indexer/language.py
@@ -1,48 +1,100 @@
 # Copyright (C) 2016  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 from pygments.lexers import guess_lexer
-from pygments.util import ClassNotFound
 from chardet import detect
 
+from .indexer import BaseIndexer
 
-def cleanup_classname(classname):
+
+def _cleanup_classname(classname):
     """Determine the language from the pygments' lexer names.
 
     """
     return classname.lower().replace(' ', '-')
 
 
-def run_language(raw_content):
+def compute_language(raw_content):
     """Determine the raw content's language.
 
     Args:
         raw_content (bytes): content to determine raw content
 
     Returns:
         Dict with keys:
         - lang: None if nothing found or the possible language
         - decoding_failure: True if a decoding failure happened
 
     """
     try:
-        encoding = detect(raw_content)['encoding']
+        stats = detect(raw_content)
+        encoding = stats['encoding']
         content = raw_content.decode(encoding)
-        lang = cleanup_classname(
+        lang = _cleanup_classname(
             guess_lexer(content).name)
-
         return {
             'lang': lang
         }
-    except ClassNotFound as e:
-        return {
-            'lang': None
-        }
-    except LookupError as e:  # Unknown encoding
+    except Exception:
         return {
-            'decoding_failure': True,
             'lang': None
         }
+
+
+class ContentLanguageIndexer(BaseIndexer):
+    """Indexer in charge of:
+    - filtering out content already indexed
+    - reading content from objstorage per the content's id (sha1)
+    - computing {mimetype, encoding} from that content
+    - store result in storage
+
+    """
+    ADDITIONAL_CONFIG = {
+        'workdir': ('str', '/tmp/swh/worker.file.properties'),
+    }
+
+    def __init__(self):
+        super().__init__()
+        self.working_directory = self.config['workdir']
+
+    def filter_contents(self, sha1s):
+        """Filter out known sha1s and return only missing ones.
+
+        """
+        yield from self.storage.content_language_missing(sha1s)
+
+    def index_content(self, sha1, raw_content):
+        """Index sha1s' content and store result.
+
+        Args:
+            sha1 (bytes): content's identifier
+            raw_content (bytes): raw content in bytes
+
+        Returns:
+            A dict, representing a content_mimetype, with keys:
+              - id (bytes): content's identifier (sha1)
+              - lang (bytes): detected language
+
+        """
+        result = compute_language(raw_content)
+        result.update({
+            'id': sha1,
+        })
+
+        return result
+
+    def persist_index_computations(self, results):
+        """Persist the results in storage.
+
+        Args:
+
+            results ([dict]): list of content_mimetype, dict with the
+            following keys:
+              - id (bytes): content's identifier (sha1)
+              - lang (bytes): detected language
+
+        """
+        self.storage.content_language_add(results)
diff --git a/swh/indexer/tasks.py b/swh/indexer/tasks.py
index a487ad8..957a857 100644
--- a/swh/indexer/tasks.py
+++ b/swh/indexer/tasks.py
@@ -1,30 +1,41 @@
 # Copyright (C) 2016  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from swh.scheduler.task import Task
 
 from .orchestrator import OrchestratorIndexer
 from .file_properties import ContentMimetypeIndexer
+from .language import ContentLanguageIndexer
 
 
 class SWHOrchestratorTask(Task):
     """Main task in charge of reading messages and broadcasting them back
     to other tasks.
 
     """
     task_queue = 'swh_indexer_orchestrator'
 
     def run(self, *args, **kwargs):
         OrchestratorIndexer().run(*args, **kwargs)
 
 
 class SWHContentMimetypeTask(Task):
     """Task which computes the mimetype, encoding from the sha1's content.
 
     """
     task_queue = 'swh_indexer_content_mimetype'
 
     def run(self, *args, **kwargs):
         ContentMimetypeIndexer().run(*args, **kwargs)
+
+
+class SWHContentLanguageTask(Task):
+    """Task which computes the language from the sha1's content.
+
+    """
+    task_queue = 'swh_indexer_content_language'
+
+    def run(self, *args, **kwargs):
+        ContentLanguageIndexer().run(*args, **kwargs)