diff --git a/swh/indexer/ctags.py b/swh/indexer/ctags.py
index f5bd817..6bdd78d 100644
--- a/swh/indexer/ctags.py
+++ b/swh/indexer/ctags.py
@@ -1,150 +1,164 @@
 # Copyright (C) 2015-2016  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import click
 import subprocess
 import json
 
 from swh.core import hashutil
 
 from .language import compute_language
 from .indexer import BaseIndexer, DiskIndexer
 
 
 # Options used to compute tags
 __FLAGS = [
     '--fields=+lnz',  # +l: language
                       # +n: line number of tag definition
                       # +z: include the symbol's kind (function, variable, ...)
     '--sort=no',      # sort output on tag name
     '--links=no',     # do not follow symlinks
     '--output-format=json',  # outputs in json
 ]
 
 
-def run_ctags(path, lang=None, ctags_binary='ctags'):
+def run_ctags(path, lang=None, ctags_command='ctags'):
     """Run ctags on file path with optional language.
 
     Args:
         path: path to the file
         lang: language for that path (optional)
 
     Returns:
         ctags' output
 
     """
     optional = []
     if lang:
         optional = ['--language-force=%s' % lang]
 
-    cmd = [ctags_binary] + __FLAGS + optional + [path]
+    cmd = [ctags_command] + __FLAGS + optional + [path]
     output = subprocess.check_output(cmd, universal_newlines=True)
 
     for symbol in output.split('\n'):
         if not symbol:
             continue
         js_symbol = json.loads(symbol)
         yield {
             'name': js_symbol['name'],
             'kind': js_symbol['kind'],
             'line': js_symbol['line'],
             'lang': js_symbol['language'],
         }
 
 
 class CtagsIndexer(BaseIndexer, DiskIndexer):
     CONFIG_BASE_FILENAME = 'indexer/ctags'
 
     ADDITIONAL_CONFIG = {
-        'ctags': ('str', '/usr/bin/ctags'),
         'workdir': ('str', '/tmp/swh/indexer.ctags'),
+        'tool': ('dict', {
+            'name': 'universal-ctags',
+            'version': '~git7859817b',
+            'command': '/usr/bin/ctags',
+        }),
         'languages': ('dict', {
             'ada': 'Ada',
             'adl': None,
             'agda': None,
             # ...
         })
     }
 
     def __init__(self):
         super().__init__()
         self.working_directory = self.config['workdir']
         self.language_map = self.config['languages']
-        self.ctags_binary = self.config['ctags']
+        self.ctags_command = self.config['tool']['command']
+        self.tool_name = self.config['tool']['name']
+        self.tool_version = self.config['tool']['version']
 
     def filter_contents(self, sha1s):
         """Filter out known sha1s and return only missing ones.
 
         """
-        yield from self.storage.content_ctags_missing(sha1s)
+        yield from self.storage.content_ctags_missing((
+            {
+                'id': sha1,
+                'tool_name': self.tool_name,
+                'tool_version': self.tool_version
+            } for sha1 in sha1s
+        ))
 
     def index_content(self, sha1, raw_content):
         """Index sha1s' content and store result.
 
         Args:
             sha1 (bytes): content's identifier
             raw_content (bytes): raw content in bytes
 
         Returns:
             A dict, representing a content_mimetype, with keys:
               - id (bytes): content's identifier (sha1)
               - ctags ([dict]): ctags list of symbols
 
         """
         lang = compute_language(raw_content)['lang']
 
         if not lang:
             return None
 
         ctags_lang = self.language_map.get(lang)
 
         if not ctags_lang:
             return None
 
         ctags = {
             'id': sha1,
         }
 
         filename = hashutil.hash_to_hex(sha1)
         content_path = self.write_to_temp(
             filename=filename,
             data=raw_content)
 
         result = run_ctags(content_path,
                            lang=ctags_lang,
-                           ctags_binary=self.ctags_binary)
+                           ctags_command=self.ctags_command)
         ctags.update({
             'ctags': list(result),
+            'tool_name': self.tool_name,
+            'tool_version': self.tool_version,
         })
 
         self.cleanup(content_path)
 
         return ctags
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - ctags ([dict]): ctags list of symbols
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.storage.content_ctags_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
 
 @click.command()
 @click.option('--path', help="Path to execute index on")
 def main(path):
     r = list(run_ctags(path))
     print(r)
 
 
 if __name__ == '__main__':
     main()
diff --git a/swh/indexer/fossology_license.py b/swh/indexer/fossology_license.py
index d3864b9..05ce54b 100644
--- a/swh/indexer/fossology_license.py
+++ b/swh/indexer/fossology_license.py
@@ -1,132 +1,138 @@
 # Copyright (C) 2016  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import click
 import subprocess
 
 from swh.core import hashutil
 
 from .indexer import BaseIndexer, DiskIndexer
 
 
 def compute_license(tool, path):
     """Determine license from file at path.
 
     Args:
         path: filepath to determine the license
 
     Returns:
         A dict with the following keys:
         - licenses ([str]): associated detected licenses to path
         - path (bytes): content filepath
         - tool (str): tool used to compute the output
 
     """
     properties = subprocess.check_output([tool, path],
                                          universal_newlines=True)
     if properties:
         res = properties.rstrip().split(' contains license(s) ')
         licenses = res[1].split(',')
 
         return {
             'licenses': licenses,
             'path': path,
         }
 
 
 class ContentFossologyLicenseIndexer(BaseIndexer, DiskIndexer):
     """Indexer in charge of:
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
     - computing {license, encoding} from that content
     - store result in storage
 
     """
     ADDITIONAL_CONFIG = {
         'workdir': ('str', '/tmp/swh/indexer.fossology.license'),
         'tool': ('dict', {
-            'cli': '/usr/local/bin/nomossa',
             'name': 'nomos',
-            'version': '3.1.0rc2-31-ga2cbb8c'
+            'version': '3.1.0rc2-31-ga2cbb8c',
+            'command': '/usr/bin/nomossa',
         }),
     }
 
     CONFIG_BASE_FILENAME = 'indexer/fossology_license'
 
     def __init__(self):
         super().__init__()
         self.working_directory = self.config['workdir']
-        self.tool = self.config['tool']['cli']
+        self.tool = self.config['tool']['command']
         self.tool_name = self.config['tool']['name']
         self.tool_version = self.config['tool']['version']
 
     def filter_contents(self, sha1s):
         """Filter out known sha1s and return only missing ones.
 
         """
-        yield from self.storage.content_fossology_license_missing(sha1s)
+        yield from self.storage.content_fossology_license_missing((
+            {
+                'id': sha1,
+                'tool_name': self.tool_name,
+                'tool_version': self.tool_version
+            } for sha1 in sha1s
+        ))
 
     def index_content(self, sha1, content):
         """Index sha1s' content and store result.
 
         Args:
             sha1 (bytes): content's identifier
             content (bytes): raw content in bytes
 
         Returns:
             A dict, representing a content_license, with keys:
               - id (bytes): content's identifier (sha1)
               - license (bytes): license in bytes
               - path (bytes): path
 
         """
         filename = hashutil.hash_to_hex(sha1)
         content_path = self.write_to_temp(
             filename=filename,
             data=content)
 
         properties = compute_license(self.tool, path=content_path)
         properties.update({
             'id': sha1,
             'tool_name': self.tool_name,
             'tool_version': self.tool_version,
         })
 
         self.cleanup(content_path)
         return properties
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_license, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - license (bytes): license in bytes
               - path (bytes): path
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         wrong_licenses = self.storage.content_fossology_license_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
         if wrong_licenses:
             for l in wrong_licenses:
                 self.log.warn('Content %s has some unknown licenses: %s' % (
                     hashutil.hash_to_hex(l['id']),
                     ','.join((name for name in l['licenses'])))
                 )
 
 
 @click.command(help='Compute license for path using tool')
 @click.option('--tool', default='nomossa', help="Path to tool")
 @click.option('--path', required=1, help="Path to execute index on")
 def main(tool, path):
     print(compute_license(tool, path))
 
 
 if __name__ == '__main__':
     main()
diff --git a/swh/indexer/language.py b/swh/indexer/language.py
index eb065de..84838f8 100644
--- a/swh/indexer/language.py
+++ b/swh/indexer/language.py
@@ -1,97 +1,116 @@
 # Copyright (C) 2016  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 from pygments.lexers import guess_lexer
 from chardet import detect
 
 from .indexer import BaseIndexer
 
 
 def _cleanup_classname(classname):
     """Determine the language from the pygments' lexer names.
 
     """
     return classname.lower().replace(' ', '-')
 
 
 def compute_language(raw_content):
     """Determine the raw content's language.
 
     Args:
         raw_content (bytes): content to determine raw content
 
     Returns:
         Dict with keys:
         - lang: None if nothing found or the possible language
         - decoding_failure: True if a decoding failure happened
 
     """
     try:
         stats = detect(raw_content)
         encoding = stats['encoding']
         content = raw_content.decode(encoding)
         lang = _cleanup_classname(
             guess_lexer(content).name)
         return {
             'lang': lang
         }
     except Exception:
         return {
             'lang': None
         }
 
 
 class ContentLanguageIndexer(BaseIndexer):
     """Indexer in charge of:
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
     - computing {mimetype, encoding} from that content
     - store result in storage
 
     """
+    CONFIG_BASE_FILENAME = 'indexer/language'
+
+    ADDITIONAL_CONFIG = {
+        'tool': ('dict', {
+            'name': 'pygments',
+            'version': '2.0.1+dfsg-1.1+deb8u1',
+        }),
+    }
+
     def __init__(self):
         super().__init__()
+        self.tool_name = self.config['tool']['name']
+        self.tool_version = self.config['tool']['version']
 
     def filter_contents(self, sha1s):
         """Filter out known sha1s and return only missing ones.
 
         """
-        yield from self.storage.content_language_missing(sha1s)
+        yield from self.storage.content_language_missing((
+            {
+                'id': sha1,
+                'tool_name': self.tool_name,
+                'tool_version': self.tool_version
+            } for sha1 in sha1s
+        ))
 
     def index_content(self, sha1, raw_content):
         """Index sha1s' content and store result.
 
         Args:
             sha1 (bytes): content's identifier
             raw_content (bytes): raw content in bytes
 
         Returns:
             A dict, representing a content_mimetype, with keys:
               - id (bytes): content's identifier (sha1)
               - lang (bytes): detected language
 
         """
         result = compute_language(raw_content)
         result.update({
             'id': sha1,
+            'tool_name': self.tool_name,
+            'tool_version': self.tool_version,
         })
 
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - lang (bytes): detected language
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.storage.content_language_add(
             results, conflict_update=(policy_update == 'update-dups'))
diff --git a/swh/indexer/mimetype.py b/swh/indexer/mimetype.py
index 01378a2..97fc605 100644
--- a/swh/indexer/mimetype.py
+++ b/swh/indexer/mimetype.py
@@ -1,139 +1,153 @@
 # Copyright (C) 2016  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import click
 import subprocess
 
 from swh.core import hashutil
 from swh.scheduler.celery_backend.config import app
 
 from .indexer import BaseIndexer, DiskIndexer
 
 
 def compute_mimetype_encoding(path):
     """Determine mimetype and encoding from file at path.
 
     Args:
         path: filepath to determine the mime type
 
     Returns:
         A dict with mimetype and encoding key and corresponding values.
 
     """
     cmd = ['file', '--mime', path]
     properties = subprocess.check_output(cmd)
     if properties:
         res = properties.split(b': ')[1].strip().split(b'; ')
         mimetype = res[0]
         encoding = res[1].split(b'=')[1]
         return {
             'mimetype': mimetype,
             'encoding': encoding
         }
 
 
 class ContentMimetypeIndexer(BaseIndexer, DiskIndexer):
     """Indexer in charge of:
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
     - computing {mimetype, encoding} from that content
     - store result in storage
 
     """
     ADDITIONAL_CONFIG = {
         'workdir': ('str', '/tmp/swh/indexer.mimetype'),
         'destination_queue': (
-            'str', 'swh.indexer.tasks.SWHOrchestratorTextContentsTask')
+            'str', 'swh.indexer.tasks.SWHOrchestratorTextContentsTask'),
+        'tool': ('dict', {
+            'name': 'file',
+            'version': '5.22'
+        }),
     }
 
     CONFIG_BASE_FILENAME = 'indexer/mimetype'
 
     def __init__(self):
         super().__init__()
         self.working_directory = self.config['workdir']
         destination_queue = self.config['destination_queue']
         self.task_destination = app.tasks[destination_queue]
+        self.tool_name = self.config['tool']['name']
+        self.tool_version = self.config['tool']['version']
 
     def filter_contents(self, sha1s):
         """Filter out known sha1s and return only missing ones.
 
         """
-        yield from self.storage.content_mimetype_missing(sha1s)
+        yield from self.storage.content_mimetype_missing((
+            {
+                'id': sha1,
+                'tool_name': self.tool_name,
+                'tool_version': self.tool_version
+            } for sha1 in sha1s
+        ))
 
     def index_content(self, sha1, content):
         """Index sha1s' content and store result.
 
         Args:
             sha1 (bytes): content's identifier
             content (bytes): raw content in bytes
 
         Returns:
             A dict, representing a content_mimetype, with keys:
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
 
         """
         filename = hashutil.hash_to_hex(sha1)
         content_path = self.write_to_temp(
             filename=filename,
             data=content)
 
         properties = compute_mimetype_encoding(content_path)
         properties.update({
             'id': sha1,
+            'tool_name': self.tool_name,
+            'tool_version': self.tool_version,
         })
 
         self.cleanup(content_path)
         return properties
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.storage.content_mimetype_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def _filter_text(self, results):
         """Filter sha1 whose raw content is text.
 
         """
         for result in results:
             if b'binary' in result['encoding']:
                 continue
             yield result['id']
 
     def next_step(self, results):
         """When the computations is done, we'd like to send over only text
         contents to the text content orchestrator.
 
         Args:
             results ([dict]): List of content_mimetype results, dict
             with the following keys:
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
 
         """
         self.task_destination.delay(list(self._filter_text(results)))
 
 
 @click.command()
 @click.option('--path', help="Path to execute index on")
 def main(path):
     print(compute_mimetype_encoding(path))
 
 
 if __name__ == '__main__':
     main()