diff --git a/swh/indexer/ctags.py b/swh/indexer/ctags.py
index aa118b3..bf5bcba 100644
--- a/swh/indexer/ctags.py
+++ b/swh/indexer/ctags.py
@@ -1,160 +1,160 @@
 # Copyright (C) 2015-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import click
 import subprocess
 import json
 
 from swh.model import hashutil
 
 from .language import compute_language
 from .indexer import ContentIndexer, DiskIndexer
 
 
 # Options used to compute tags
 __FLAGS = [
     '--fields=+lnz',  # +l: language
                       # +n: line number of tag definition
                       # +z: include the symbol's kind (function, variable, ...)
     '--sort=no',      # sort output on tag name
     '--links=no',     # do not follow symlinks
     '--output-format=json',  # outputs in json
 ]
 
 
 def run_ctags(path, lang=None, ctags_command='ctags'):
     """Run ctags on file path with optional language.
 
     Args:
         path: path to the file
         lang: language for that path (optional)
 
     Returns:
         ctags' output
 
     """
     optional = []
     if lang:
         optional = ['--language-force=%s' % lang]
 
     cmd = [ctags_command] + __FLAGS + optional + [path]
     output = subprocess.check_output(cmd, universal_newlines=True)
 
     for symbol in output.split('\n'):
         if not symbol:
             continue
         js_symbol = json.loads(symbol)
         yield {
             'name': js_symbol['name'],
             'kind': js_symbol['kind'],
             'line': js_symbol['line'],
             'lang': js_symbol['language'],
         }
 
 
 class CtagsIndexer(ContentIndexer, DiskIndexer):
     CONFIG_BASE_FILENAME = 'indexer/ctags'
 
     ADDITIONAL_CONFIG = {
         'workdir': ('str', '/tmp/swh/indexer.ctags'),
         'tools': ('dict', {
             'name': 'universal-ctags',
             'version': '~git7859817b',
             'configuration': {
                 'command_line': '''ctags --fields=+lnz --sort=no --links=no '''
                                 '''--output-format=json <filepath>'''
             },
         }),
         'languages': ('dict', {
             'ada': 'Ada',
             'adl': None,
             'agda': None,
             # ...
         })
     }
 
     def prepare(self):
         super().prepare()
         self.working_directory = self.config['workdir']
         self.language_map = self.config['languages']
 
-    def filter(self, sha1s):
+    def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.storage.content_ctags_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tools['id'],
-            } for sha1 in sha1s
+            } for sha1 in ids
         ))
 
-    def index(self, sha1, raw_content):
+    def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
-            sha1 (bytes): content's identifier
-            raw_content (bytes): raw content in bytes
+            id (bytes): content's identifier
+            data (bytes): raw content in bytes
 
         Returns:
             A dict, representing a content_mimetype, with keys:
               - id (bytes): content's identifier (sha1)
               - ctags ([dict]): ctags list of symbols
 
         """
-        lang = compute_language(raw_content, log=self.log)['lang']
+        lang = compute_language(data, log=self.log)['lang']
 
         if not lang:
             return None
 
         ctags_lang = self.language_map.get(lang)
 
         if not ctags_lang:
             return None
 
         ctags = {
-            'id': sha1,
+            'id': id,
         }
 
-        filename = hashutil.hash_to_hex(sha1)
+        filename = hashutil.hash_to_hex(id)
         content_path = self.write_to_temp(
             filename=filename,
-            data=raw_content)
+            data=data)
 
         result = run_ctags(content_path, lang=ctags_lang)
         ctags.update({
             'ctags': list(result),
             'indexer_configuration_id': self.tools['id'],
         })
 
         self.cleanup(content_path)
 
         return ctags
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - ctags ([dict]): ctags list of symbols
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.storage.content_ctags_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
 
 @click.command()
 @click.option('--path', help="Path to execute index on")
 def main(path):
     r = list(run_ctags(path))
     print(r)
 
 
 if __name__ == '__main__':
     main()
diff --git a/swh/indexer/fossology_license.py b/swh/indexer/fossology_license.py
index ca79dab..687ca4f 100644
--- a/swh/indexer/fossology_license.py
+++ b/swh/indexer/fossology_license.py
@@ -1,140 +1,140 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import click
 import subprocess
 
 from swh.model import hashutil
 
 from .indexer import ContentIndexer, DiskIndexer
 
 
 def compute_license(path, log=None):
     """Determine license from file at path.
 
     Args:
         path: filepath to determine the license
 
     Returns:
         A dict with the following keys:
         - licenses ([str]): associated detected licenses to path
         - path (bytes): content filepath
         - tool (str): tool used to compute the output
 
     """
     try:
         properties = subprocess.check_output(['nomossa', path],
                                              universal_newlines=True)
         if properties:
             res = properties.rstrip().split(' contains license(s) ')
             licenses = res[1].split(',')
 
             return {
                 'licenses': licenses,
                 'path': path,
             }
     except subprocess.CalledProcessError:
         if log:
             from os import path as __path
             log.exception('Problem during license detection for sha1 %s' %
                           __path.basename(path))
         return {
             'licenses': [],
             'path': path,
         }
 
 
 class ContentFossologyLicenseIndexer(ContentIndexer, DiskIndexer):
     """Indexer in charge of:
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
     - computing {license, encoding} from that content
     - store result in storage
 
     """
     ADDITIONAL_CONFIG = {
         'workdir': ('str', '/tmp/swh/indexer.fossology.license'),
         'tools': ('dict', {
             'name': 'nomos',
             'version': '3.1.0rc2-31-ga2cbb8c',
             'configuration': {
                 'command_line': 'nomossa <filepath>',
             },
         }),
     }
 
     CONFIG_BASE_FILENAME = 'indexer/fossology_license'
 
     def prepare(self):
         super().prepare()
         self.working_directory = self.config['workdir']
 
-    def filter(self, sha1s):
+    def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.storage.content_fossology_license_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tools['id'],
-            } for sha1 in sha1s
+            } for sha1 in ids
         ))
 
-    def index(self, sha1, raw_content):
+    def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
             sha1 (bytes): content's identifier
             raw_content (bytes): raw content in bytes
 
         Returns:
             A dict, representing a content_license, with keys:
               - id (bytes): content's identifier (sha1)
               - license (bytes): license in bytes
               - path (bytes): path
 
         """
-        filename = hashutil.hash_to_hex(sha1)
+        filename = hashutil.hash_to_hex(id)
         content_path = self.write_to_temp(
             filename=filename,
-            data=raw_content)
+            data=data)
 
         try:
             properties = compute_license(path=content_path, log=self.log)
             properties.update({
-                'id': sha1,
+                'id': id,
                 'indexer_configuration_id': self.tools['id'],
             })
         finally:
             self.cleanup(content_path)
 
         return properties
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_license, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - license (bytes): license in bytes
               - path (bytes): path
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.storage.content_fossology_license_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
 
 @click.command(help='Compute license for path using tool')
 @click.option('--tool', default='nomossa', help="Path to tool")
 @click.option('--path', required=1, help="Path to execute index on")
 def main(tool, path):
     print(compute_license(tool, path))
 
 
 if __name__ == '__main__':
     main()
diff --git a/swh/indexer/language.py b/swh/indexer/language.py
index 18fa13d..38a1931 100644
--- a/swh/indexer/language.py
+++ b/swh/indexer/language.py
@@ -1,207 +1,207 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 import io
 
 from pygments.lexers import guess_lexer
 from pygments.util import ClassNotFound
 from chardet.universaldetector import UniversalDetector
 
 from .indexer import ContentIndexer
 
 
 def _cleanup_classname(classname):
     """Determine the language from the pygments' lexer names.
 
     """
     return classname.lower().replace(' ', '-')
 
 
 def _read_raw(raw_content, size=2048):
     """Read raw content in chunk.
 
     """
     bs = io.BytesIO(raw_content)
     while True:
         chunk = bs.read(size)
         if not chunk:
             break
         yield chunk
 
 
 def _detect_encoding(raw_content):
     """Given a raw content, try and detect its encoding.
 
     """
     detector = UniversalDetector()
     for chunk in _read_raw(raw_content):
         detector.feed(chunk)
         if detector.done:
             break
     detector.close()
     return detector.result['encoding']
 
 
 def compute_language_from_chunk(encoding, length, raw_content, max_size,
                                 log=None):
     """Determine the raw content's language.
 
     Args:
         encoding (str): Encoding to use to decode the content
         length (int): raw_content's length
         raw_content (bytes): raw content to work with
         max_size (int): max size to split the raw content at
 
     Returns:
         Dict with keys:
         - lang: None if nothing found or the possible language
 
     """
     try:
         if max_size <= length:
             raw_content = raw_content[0:max_size]
 
         content = raw_content.decode(encoding)
         lang = _cleanup_classname(
             guess_lexer(content).name)
     except ClassNotFound:
         lang = None
     except UnicodeDecodeError:
         raise
     except Exception:
         if log:
             log.exception('Problem during language detection, skipping')
         lang = None
     return {
         'lang': lang
     }
 
 
 def compute_language(raw_content, encoding=None, log=None):
     """Determine the raw content's language.
 
     Args:
         raw_content (bytes): raw content to work with
 
     Returns:
         Dict with keys:
         - lang: None if nothing found or the possible language
 
     """
     try:
         encoding = _detect_encoding(raw_content)
         content = raw_content.decode(encoding)
         lang = _cleanup_classname(
             guess_lexer(content).name)
     except ClassNotFound:
         lang = None
     except Exception:
         if log:
             log.exception('Problem during language detection, skipping')
         lang = None
     return {
         'lang': lang
     }
 
 
 class ContentLanguageIndexer(ContentIndexer):
     """Indexer in charge of:
 
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
     - computing {mimetype, encoding} from that content
     - store result in storage
 
     """
     CONFIG_BASE_FILENAME = 'indexer/language'
 
     ADDITIONAL_CONFIG = {
         'tools': ('dict', {
             'name': 'pygments',
             'version': '2.0.1+dfsg-1.1+deb8u1',
             'configuration': {
                 'type': 'library',
                 'debian-package': 'python3-pygments',
                 'max_content_size': 10240,
             },
         }),
     }
 
     def prepare(self):
         super().prepare()
         c = self.config
         self.max_content_size = c['tools']['configuration']['max_content_size']
 
-    def filter(self, sha1s):
+    def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.storage.content_language_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tools['id'],
-            } for sha1 in sha1s
+            } for sha1 in ids
         ))
 
-    def index(self, sha1, raw_content):
+    def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
-            sha1 (bytes): content's identifier
-            raw_content (bytes): raw content in bytes
+            id (bytes): content's identifier
+            data (bytes): raw content in bytes
 
         Returns:
             A dict, representing a content_mimetype, with keys:
               - id (bytes): content's identifier (sha1)
               - lang (bytes): detected language
 
         """
         result = {
-            'id': sha1,
+            'id': id,
             'indexer_configuration_id': self.tools['id'],
             'lang': None,
         }
 
-        encoding = _detect_encoding(raw_content)
+        encoding = _detect_encoding(data)
 
         if not encoding:
             return result
 
-        l = len(raw_content)
+        l = len(data)
         for i in range(0, 9):
             max_size = self.max_content_size + i
 
             try:
                 result = compute_language_from_chunk(
-                    encoding, l, raw_content, max_size, log=self.log)
+                    encoding, l, data, max_size, log=self.log)
             except UnicodeDecodeError:
                 self.log.warn('Decoding failed on wrong byte chunk at [0-%s]'
                               ', trying again at next ending byte.' % max_size)
                 continue
 
             # we found something, so we return it
             result.update({
-                'id': sha1,
+                'id': id,
                 'indexer_configuration_id': self.tools['id'],
             })
             break
 
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - lang (bytes): detected language
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.storage.content_language_add(
             results, conflict_update=(policy_update == 'update-dups'))
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
index ee574f1..0629039 100644
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -1,294 +1,294 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 import click
 import logging
 
 from swh.indexer.indexer import ContentIndexer, RevisionIndexer
 from swh.indexer.metadata_dictionary import compute_metadata
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata_detector import extract_minimal_metadata_dict
 
 from swh.model import hashutil
 
 
 class ContentMetadataIndexer(ContentIndexer):
     """Content-level indexer
 
     This indexer is in charge of:
 
     - filtering out content already indexed in content_metadata
     - reading content from objstorage with the content's id sha1
     - computing translated_metadata by given context
     - using the metadata_dictionary as the 'swh-metadata-translator' tool
     - store result in content_metadata table
 
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
     def __init__(self, tool, config):
         self.tool = tool
         # twisted way to use the exact same config of RevisionMetadataIndexer
         # object that uses internally ContentMetadataIndexer
         self.config = config
         super().__init__()
 
     def prepare(self):
         self.results = []
         if self.config['storage']:
             self.storage = self.config['storage']
         if self.config['objstorage']:
             self.objstorage = self.config['objstorage']
         l = logging.getLogger('requests.packages.urllib3.connectionpool')
         l.setLevel(logging.WARN)
         self.log = logging.getLogger('swh.indexer')
         self.tools = self.retrieve_tools_information()
 
     def retrieve_tools_information(self):
         self.config['tools'] = self.tool
         return super().retrieve_tools_information()
 
-    def filter(self, sha1s):
+    def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
         """
         yield from self.storage.content_metadata_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tools['id'],
-            } for sha1 in sha1s
+            } for sha1 in ids
         ))
 
-    def index(self, sha1, raw_content):
+    def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
-            sha1 (bytes): content's identifier
-            raw_content (bytes): raw content in bytes
+            id (bytes): content's identifier
+            data (bytes): raw content in bytes
 
         Returns:
             dict: dictionary representing a content_metadata. If the
             translation wasn't successful the translated_metadata keys will
             be returned as None
 
         """
         result = {
-            'id': sha1,
+            'id': id,
             'indexer_configuration_id': self.tools['id'],
             'translated_metadata': None
         }
         try:
             context = self.tools['tool_configuration']['context']
-            result['translated_metadata'] = compute_metadata(
-                                            context, raw_content)
+            result['translated_metadata'] = compute_metadata(context, data)
             # a twisted way to keep result with indexer object for get_results
             self.results.append(result)
         except:
             self.log.exception(
                 "Problem during tool retrieval of metadata translation")
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_metadata, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - translated_metadata (jsonb): detected metadata
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.storage.content_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def get_results(self):
-        """
-        can be called only if run method was called before
+        """can be called only if run method was called before
 
         Returns:
-            list: list of content_metadata entries calculated by current indxer
+            list: list of content_metadata entries calculated by
+                  current indexer
+
         """
         return self.results
 
 
 class RevisionMetadataIndexer(RevisionIndexer):
     """Revision-level indexer
 
     This indexer is in charge of:
 
     - filtering revisions already indexed in revision_metadata table with
       defined computation tool
     - retrieve all entry_files in root directory
     - use metadata_detector for file_names containig metadata
     - compute metadata translation if necessary and possible (depends on tool)
     - send sha1s to content indexing if possible
     - store the results for revision
 
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
     ADDITIONAL_CONFIG = {
         'tools': ('dict', {
             'name': 'swh-metadata-detector',
             'version': '0.0.1',
             'configuration': {
                 'type': 'local',
                 'context': ['npm', 'codemeta']
             },
         }),
     }
 
     def prepare(self):
         super().prepare()
 
     def filter(self, sha1_gits):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.storage.revision_metadata_missing((
             {
                 'id': sha1_git,
                 'indexer_configuration_id': self.tools['id'],
             } for sha1_git in sha1_gits
         ))
 
     def index(self, rev):
         """Index rev by processing it and organizing result.
 
         use metadata_detector to iterate on filenames
 
         - if one filename detected -> sends file to content indexer
         - if multiple file detected -> translation needed at revision level
 
         Args:
           rev (bytes): revision artifact from storage
 
         Returns:
             dict: dictionary representing a revision_metadata, with keys:
 
                 - id (bytes): rev's identifier (sha1_git)
                 - indexer_configuration_id (bytes): tool used
                 - translated_metadata (bytes): dict of retrieved metadata
 
         """
         try:
             result = {
                 'id': rev['id'],
                 'indexer_configuration_id': self.tools['id'],
                 'translated_metadata': None
             }
 
             root_dir = rev['directory']
             dir_ls = self.storage.directory_ls(root_dir, recursive=False)
             files = (entry for entry in dir_ls if entry['type'] == 'file')
             detected_files = detect_metadata(files)
             result['translated_metadata'] = self.translate_revision_metadata(
                                                                 detected_files)
         except Exception as e:
             self.log.exception(
                 'Problem when indexing rev')
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         # TODO: add functions in storage to keep data in revision_metadata
         self.storage.revision_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def translate_revision_metadata(self, detected_files):
         """
         Determine plan of action to translate metadata when containing
         one or multiple detected files:
 
         Args:
             detected_files (dict): dictionary mapping context names (e.g.,
               "npm", "authors") to list of sha1
 
         Returns:
             dict: dict with translated metadata according to the CodeMeta
             vocabulary
 
         """
         translated_metadata = []
         tool = {
                 'name': 'swh-metadata-translator',
                 'version': '0.0.1',
                 'configuration': {
                     'type': 'local',
                     'context': None
                 },
             }
         # TODO: iterate on each context, on each file
         # -> get raw_contents
         # -> translate each content
         config = {
             'storage': self.storage,
             'objstorage': self.objstorage
         }
         for context in detected_files.keys():
             tool['configuration']['context'] = context
             c_metadata_indexer = ContentMetadataIndexer(tool, config)
             # sha1s that are in content_metadata table
             sha1s_in_storage = []
             metadata_generator = self.storage.content_metadata_get(
                                               detected_files[context])
             for c in metadata_generator:
                 # extracting translated_metadata
                 sha1 = c['id']
                 sha1s_in_storage.append(sha1)
                 local_metadata = c['translated_metadata']
                 # local metadata is aggregated
                 if local_metadata:
                     translated_metadata.append(local_metadata)
 
             sha1s_filtered = [item for item in detected_files[context]
                               if item not in sha1s_in_storage]
 
             if sha1s_filtered:
                 # schedule indexation of content
                 try:
                     c_metadata_indexer.run(sha1s_filtered,
                                            policy_update='ignore-dups')
                     # on the fly possibility:
                     results = c_metadata_indexer.get_results()
 
                     for result in results:
                         local_metadata = result['translated_metadata']
                         translated_metadata.append(local_metadata)
 
                 except Exception as e:
                     self.log.warn("""Exception while indexing content""", e)
 
         # transform translated_metadata into min set with swh-metadata-detector
         min_metadata = extract_minimal_metadata_dict(translated_metadata)
         return min_metadata
 
 
 @click.command()
 @click.option('--revs', '-i',
               default=['8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
                        '026040ea79dec1b49b4e3e7beda9132b6b26b51b',
                        '9699072e21eded4be8d45e3b8d543952533fa190'],
               help='Default sha1_git to lookup', multiple=True)
 def main(revs):
     _git_sha1s = list(map(hashutil.hash_to_bytes, revs))
     rev_metadata_indexer = RevisionMetadataIndexer()
     rev_metadata_indexer.run(_git_sha1s, 'update-dups')
 
 
 if __name__ == '__main__':
     logging.basicConfig(level=logging.INFO)
     main()
diff --git a/swh/indexer/mimetype.py b/swh/indexer/mimetype.py
index f8f4262..f2dd87b 100644
--- a/swh/indexer/mimetype.py
+++ b/swh/indexer/mimetype.py
@@ -1,165 +1,165 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import click
 
 from subprocess import Popen, PIPE
 from swh.scheduler import utils
 
 from .indexer import ContentIndexer
 
 
 def compute_mimetype_encoding(raw_content):
     """Determine mimetype and encoding from the raw content.
 
     Args:
         raw_content (bytes): content's raw data
 
     Returns:
         A dict with mimetype and encoding key and corresponding values.
 
     """
     if raw_content is b'':
         return {
             'mimetype': b'application/x-empty',
             'encoding': b'binary'
         }
 
     with Popen(['file', '--mime', '-'], stdin=PIPE,
                stdout=PIPE, stderr=PIPE) as p:
         properties, _ = p.communicate(raw_content)
 
         if properties:
             res = properties.split(b': ')[1].strip().split(b'; ')
             mimetype = res[0]
             encoding = res[1].split(b'=')[1]
             return {
                 'mimetype': mimetype,
                 'encoding': encoding
             }
 
 
 class ContentMimetypeIndexer(ContentIndexer):
     """Indexer in charge of:
 
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
     - computing {mimetype, encoding} from that content
     - store result in storage
 
     """
     ADDITIONAL_CONFIG = {
         # chained queue message, e.g:
         # swh.indexer.tasks.SWHOrchestratorTextContentsTask
         'destination_queue': ('str', None),
         'tools': ('dict', {
             'name': 'file',
             'version': '5.22',
             'configuration': {
                 'command_line': 'file --mime <filename>',
             },
         }),
     }
 
     CONFIG_BASE_FILENAME = 'indexer/mimetype'
 
     def prepare(self):
         super().prepare()
         destination_queue = self.config.get('destination_queue')
         if destination_queue:
             self.task_destination = utils.get_task(destination_queue)
         else:
             self.task_destination = None
         self.tools = self.retrieve_tools_information()
 
-    def filter(self, sha1s):
+    def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.storage.content_mimetype_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tools['id'],
-            } for sha1 in sha1s
+            } for sha1 in ids
         ))
 
-    def index(self, sha1, raw_content):
+    def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
-            sha1 (bytes): content's identifier
-            raw_content (bytes): raw content in bytes
+            id (bytes): content's identifier
+            data (bytes): raw content in bytes
 
         Returns:
             A dict, representing a content_mimetype, with keys:
 
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
 
         """
-        properties = compute_mimetype_encoding(raw_content)
+        properties = compute_mimetype_encoding(data)
         properties.update({
-            'id': sha1,
+            'id': id,
             'indexer_configuration_id': self.tools['id'],
         })
 
         return properties
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
 
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
 
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.storage.content_mimetype_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def _filter_text(self, results):
         """Filter sha1 whose raw content is text.
 
         """
         for result in results:
             if b'binary' in result['encoding']:
                 continue
             yield result['id']
 
     def next_step(self, results):
         """When the computations is done, we'd like to send over only text
         contents to the text content orchestrator.
 
         Args:
             results ([dict]): List of content_mimetype results, dict
             with the following keys:
 
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
 
         """
         if self.task_destination:
             self.task_destination.delay(list(self._filter_text(results)))
 
 
 @click.command()
 @click.option('--path', help="Path to execute index on")
 def main(path):
     with open(path, 'rb') as f:
         raw_content = f.read()
 
     print(compute_mimetype_encoding(raw_content))
 
 
 if __name__ == '__main__':
     main()