diff --git a/swh/indexer/ctags.py b/swh/indexer/ctags.py
index 492e7c0..1ad6022 100644
--- a/swh/indexer/ctags.py
+++ b/swh/indexer/ctags.py
@@ -1,155 +1,156 @@
 # Copyright (C) 2015-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import subprocess
 import json
 
 from swh.model import hashutil
 
 from .language import compute_language
 from .indexer import ContentIndexer, DiskIndexer
 
 
 # Options used to compute tags
 __FLAGS = [
     '--fields=+lnz',  # +l: language
                       # +n: line number of tag definition
                       # +z: include the symbol's kind (function, variable, ...)
     '--sort=no',      # sort output on tag name
     '--links=no',     # do not follow symlinks
     '--output-format=json',  # outputs in json
 ]
 
 
 def run_ctags(path, lang=None, ctags_command='ctags'):
     """Run ctags on file path with optional language.
 
     Args:
         path: path to the file
         lang: language for that path (optional)
 
-    Returns:
-        ctags' output
+    Yields:
+        dict: ctags' output
 
     """
     optional = []
     if lang:
         optional = ['--language-force=%s' % lang]
 
     cmd = [ctags_command] + __FLAGS + optional + [path]
     output = subprocess.check_output(cmd, universal_newlines=True)
 
     for symbol in output.split('\n'):
         if not symbol:
             continue
         js_symbol = json.loads(symbol)
         yield {
             'name': js_symbol['name'],
             'kind': js_symbol['kind'],
             'line': js_symbol['line'],
             'lang': js_symbol['language'],
         }
 
 
 class CtagsIndexer(ContentIndexer, DiskIndexer):
     CONFIG_BASE_FILENAME = 'indexer/ctags'
 
     ADDITIONAL_CONFIG = {
         'workdir': ('str', '/tmp/swh/indexer.ctags'),
         'tools': ('dict', {
             'name': 'universal-ctags',
             'version': '~git7859817b',
             'configuration': {
                 'command_line': '''ctags --fields=+lnz --sort=no --links=no '''
                                 '''--output-format=json <filepath>'''
             },
         }),
         'languages': ('dict', {
             'ada': 'Ada',
             'adl': None,
             'agda': None,
             # ...
         })
     }
 
     def prepare(self):
         super().prepare()
         self.working_directory = self.config['workdir']
         self.language_map = self.config['languages']
         self.tool = self.tools[0]
 
     def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.idx_storage.content_ctags_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1 in ids
         ))
 
     def compute_ctags(self, path, lang):
         """Compute ctags on file at path with language lang.
 
         """
         return run_ctags(path, lang=lang)
 
     def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
             id (bytes): content's identifier
             data (bytes): raw content in bytes
 
         Returns:
-            A dict, representing a content_mimetype, with keys:
-              - id (bytes): content's identifier (sha1)
-              - ctags ([dict]): ctags list of symbols
+            dict: a dict representing a content_mimetype with keys:
+
+            - **id** (bytes): content's identifier (sha1)
+            - **ctags** ([dict]): ctags list of symbols
 
         """
         lang = compute_language(data, log=self.log)['lang']
 
         if not lang:
             return None
 
         ctags_lang = self.language_map.get(lang)
 
         if not ctags_lang:
             return None
 
         ctags = {
             'id': id,
         }
 
         filename = hashutil.hash_to_hex(id)
         content_path = self.write_to_temp(
             filename=filename,
             data=data)
 
         result = run_ctags(content_path, lang=ctags_lang)
         ctags.update({
             'ctags': list(result),
             'indexer_configuration_id': self.tool['id'],
         })
 
         self.cleanup(content_path)
 
         return ctags
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
-            following keys:
+              following keys:
               - id (bytes): content's identifier (sha1)
               - ctags ([dict]): ctags list of symbols
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
-            respectively update duplicates or ignore them
+              respectively update duplicates or ignore them
 
         """
         self.idx_storage.content_ctags_add(
             results, conflict_update=(policy_update == 'update-dups'))
diff --git a/swh/indexer/fossology_license.py b/swh/indexer/fossology_license.py
index 58e341f..60b5523 100644
--- a/swh/indexer/fossology_license.py
+++ b/swh/indexer/fossology_license.py
@@ -1,185 +1,191 @@
 # Copyright (C) 2016-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import subprocess
 
 from swh.model import hashutil
 
 from .indexer import ContentIndexer, ContentRangeIndexer, DiskIndexer
 
 
 def compute_license(path, log=None):
     """Determine license from file at path.
 
     Args:
         path: filepath to determine the license
 
     Returns:
-        A dict with the following keys:
+        dict: A dict with the following keys:
+
         - licenses ([str]): associated detected licenses to path
         - path (bytes): content filepath
 
     """
     try:
         properties = subprocess.check_output(['nomossa', path],
                                              universal_newlines=True)
         if properties:
             res = properties.rstrip().split(' contains license(s) ')
             licenses = res[1].split(',')
         else:
             licenses = []
 
         return {
             'licenses': licenses,
             'path': path,
         }
     except subprocess.CalledProcessError:
         if log:
             from os import path as __path
             log.exception('Problem during license detection for sha1 %s' %
                           __path.basename(path))
         return {
             'licenses': [],
             'path': path,
         }
 
 
 class MixinFossologyLicenseIndexer:
     """Mixin fossology license indexer.
 
     See :class:`ContentFossologyLicenseIndexer` and
     :class:`FossologyLicenseRangeIndexer`
 
     """
     ADDITIONAL_CONFIG = {
         'workdir': ('str', '/tmp/swh/indexer.fossology.license'),
         'tools': ('dict', {
             'name': 'nomos',
             'version': '3.1.0rc2-31-ga2cbb8c',
             'configuration': {
                 'command_line': 'nomossa <filepath>',
             },
         }),
         'write_batch_size': ('int', 1000),
     }
 
     CONFIG_BASE_FILENAME = 'indexer/fossology_license'
 
     def prepare(self):
         super().prepare()
         self.working_directory = self.config['workdir']
         self.tool = self.tools[0]
 
     def compute_license(self, path, log=None):
         """Determine license from file at path.
 
         Args:
             path: filepath to determine the license
 
         Returns:
-            A dict with the following keys:
+            dict: A dict with the following keys:
+
             - licenses ([str]): associated detected licenses to path
             - path (bytes): content filepath
 
         """
         return compute_license(path, log=log)
 
     def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
             id (bytes): content's identifier
             raw_content (bytes): associated raw content to content id
 
         Returns:
-            A dict, representing a content_license, with keys:
-              - id (bytes): content's identifier (sha1)
-              - license (bytes): license in bytes
-              - path (bytes): path
-              - indexer_configuration_id (int): tool used to compute the output
+            dict: A dict, representing a content_license, with keys:
+
+            - id (bytes): content's identifier (sha1)
+            - license (bytes): license in bytes
+            - path (bytes): path
+            - indexer_configuration_id (int): tool used to compute the output
 
         """
         content_path = self.write_to_temp(
             filename=hashutil.hash_to_hex(id),  # use the id as pathname
             data=data)
 
         try:
             properties = self.compute_license(path=content_path, log=self.log)
             properties.update({
                 'id': id,
                 'indexer_configuration_id': self.tool['id'],
             })
         finally:
             self.cleanup(content_path)
 
         return properties
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_license, dict with the
-            following keys:
+              following keys:
+
               - id (bytes): content's identifier (sha1)
               - license (bytes): license in bytes
               - path (bytes): path
+
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
-            respectively update duplicates or ignore them
+              respectively update duplicates or ignore them
 
         """
         self.idx_storage.content_fossology_license_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
 
 class ContentFossologyLicenseIndexer(
         MixinFossologyLicenseIndexer, DiskIndexer, ContentIndexer):
     """Indexer in charge of:
+
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
     - computing {license, encoding} from that content
     - store result in storage
 
     """
     def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.idx_storage.content_fossology_license_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1 in ids
         ))
 
 
 class FossologyLicenseRangeIndexer(
         MixinFossologyLicenseIndexer, DiskIndexer, ContentRangeIndexer):
     """FossologyLicense Range Indexer working on range of content identifiers.
 
-    It:
     - filters out the non textual content
     - (optionally) filters out content already indexed (cf
-      :func:`indexed_contents_in_range`)
+      :meth:`.indexed_contents_in_range`)
     - reads content from objstorage per the content's id (sha1)
     - computes {mimetype, encoding} from that content
     - stores result in storage
 
     """
     def indexed_contents_in_range(self, start, end):
         """Retrieve indexed content id within range [start, end].
 
-        Args
-            **start** (bytes): Starting bound from range identifier
-            **end** (bytes): End range identifier
+        Args:
+            start (bytes): Starting bound from range identifier
+            end (bytes): End range identifier
 
         Returns:
-            a dict with keys:
+            dict: a dict with keys:
+
             - **ids** [bytes]: iterable of content ids within the range.
             - **next** (Optional[bytes]): The next range of sha1 starts at
-                                          this sha1 if any
+              this sha1 if any
 
         """
         return self.idx_storage.content_fossology_license_get_range(
                 start, end, self.tool['id'])
diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py
index 65946b5..8f7df07 100644
--- a/swh/indexer/indexer.py
+++ b/swh/indexer/indexer.py
@@ -1,610 +1,607 @@
 # Copyright (C) 2016-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import abc
 import os
 import logging
 import shutil
 import tempfile
 import datetime
 from copy import deepcopy
 
 from swh.scheduler import get_scheduler
 from swh.storage import get_storage
 from swh.core.config import SWHConfig
 from swh.objstorage import get_objstorage
 from swh.objstorage.exc import ObjNotFoundError
 from swh.indexer.storage import get_indexer_storage, INDEXER_CFG_KEY
 from swh.model import hashutil
 from swh.core import utils
 
 
 class DiskIndexer:
     """Mixin intended to be used with other SomethingIndexer classes.
 
        Indexers inheriting from this class are a category of indexers
        which needs the disk for their computations.
 
        Note:
            This expects `self.working_directory` variable defined at
            runtime.
 
     """
     def write_to_temp(self, filename, data):
         """Write the sha1's content in a temporary file.
 
         Args:
             filename (str): one of sha1's many filenames
             data (bytes): the sha1's content to write in temporary
-            file
+              file
 
         Returns:
             The path to the temporary file created. That file is
             filled in with the raw content's data.
 
         """
         os.makedirs(self.working_directory, exist_ok=True)
         temp_dir = tempfile.mkdtemp(dir=self.working_directory)
         content_path = os.path.join(temp_dir, filename)
 
         with open(content_path, 'wb') as f:
             f.write(data)
 
         return content_path
 
     def cleanup(self, content_path):
         """Remove content_path from working directory.
 
         Args:
             content_path (str): the file to remove
 
         """
         temp_dir = os.path.dirname(content_path)
         shutil.rmtree(temp_dir)
 
 
 class BaseIndexer(SWHConfig, metaclass=abc.ABCMeta):
     """Base class for indexers to inherit from.
 
     The main entry point is the :func:`run` function which is in
     charge of triggering the computations on the batch dict/ids
     received.
 
     Indexers can:
 
     - filter out ids whose data has already been indexed.
     - retrieve ids data from storage or objstorage
     - index this data depending on the object and store the result in
       storage.
 
     To implement a new object type indexer, inherit from the
     BaseIndexer and implement indexing:
 
-    :func:`run`:
+    :meth:`.run`:
       object_ids are different depending on object. For example: sha1 for
       content, sha1_git for revision, directory, release, and id for origin
 
     To implement a new concrete indexer, inherit from the object level
     classes: :class:`ContentIndexer`, :class:`RevisionIndexer`,
     :class:`OriginIndexer`.
 
     Then you need to implement the following functions:
 
-    :func:`filter`:
+    :meth:`.filter`:
       filter out data already indexed (in storage).
 
-    :func:`index_object`:
+    :meth:`.index_object`:
       compute index on id with data (retrieved from the storage or the
       objstorage by the id key) and return the resulting index computation.
 
-    :func:`persist_index_computations`:
+    :meth:`.persist_index_computations`:
       persist the results of multiple index computations in the storage.
 
     The new indexer implementation can also override the following functions:
 
-    :func:`prepare`:
+    :meth:`.prepare`:
       Configuration preparation for the indexer.  When overriding, this must
       call the `super().prepare()` instruction.
 
-    :func:`check`:
+    :meth:`.check`:
       Configuration check for the indexer.  When overriding, this must call the
       `super().check()` instruction.
 
-    :func:`register_tools`:
+    :meth:`.register_tools`:
       This should return a dict of the tool(s) to use when indexing or
       filtering.
 
     """
     CONFIG = 'indexer/base'
 
     DEFAULT_CONFIG = {
         INDEXER_CFG_KEY: ('dict', {
             'cls': 'remote',
             'args': {
                 'url': 'http://localhost:5007/'
             }
         }),
         'storage': ('dict', {
             'cls': 'remote',
             'args': {
                 'url': 'http://localhost:5002/',
             }
         }),
         'objstorage': ('dict', {
             'cls': 'remote',
             'args': {
                 'url': 'http://localhost:5003/',
             }
         })
     }
 
     ADDITIONAL_CONFIG = {}
 
     def __init__(self):
         """Prepare and check that the indexer is ready to run.
 
         """
         super().__init__()
         self.prepare()
         self.check()
 
     def prepare(self):
         """Prepare the indexer's needed runtime configuration.
            Without this step, the indexer cannot possibly run.
 
         """
         # HACK to deal with edge case (e.g revision metadata indexer)
         if not hasattr(self, 'config'):
             self.config = self.parse_config_file(
                 additional_configs=[self.ADDITIONAL_CONFIG])
         config_storage = self.config.get('storage')
         if config_storage:
             self.storage = get_storage(**config_storage)
         objstorage = self.config['objstorage']
         self.objstorage = get_objstorage(objstorage['cls'], objstorage['args'])
         idx_storage = self.config[INDEXER_CFG_KEY]
         self.idx_storage = get_indexer_storage(**idx_storage)
 
         _log = logging.getLogger('requests.packages.urllib3.connectionpool')
         _log.setLevel(logging.WARN)
         self.log = logging.getLogger('swh.indexer')
         self.tools = list(self.register_tools(self.config['tools']))
 
     def check(self, *, check_tools=True):
         """Check the indexer's configuration is ok before proceeding.
            If ok, does nothing. If not raise error.
 
         """
         if check_tools and not self.tools:
             raise ValueError('Tools %s is unknown, cannot continue' %
                              self.tools)
 
     def _prepare_tool(self, tool):
         """Prepare the tool dict to be compliant with the storage api.
 
         """
         return {'tool_%s' % key: value for key, value in tool.items()}
 
     def register_tools(self, tools):
         """Permit to register tools to the storage.
 
            Add a sensible default which can be overridden if not
            sufficient.  (For now, all indexers use only one tool)
 
            Expects the self.config['tools'] property to be set with
            one or more tools.
 
         Args:
             tools (dict/[dict]): Either a dict or a list of dict.
 
         Returns:
-            List of dict with additional id key.
+            list: List of dicts with additional id key.
 
         Raises:
-            ValueError if not a list nor a dict.
+            ValueError: if not a list nor a dict.
 
         """
         if isinstance(tools, list):
             tools = list(map(self._prepare_tool, tools))
         elif isinstance(tools, dict):
             tools = [self._prepare_tool(tools)]
         else:
             raise ValueError('Configuration tool(s) must be a dict or list!')
 
         if tools:
             return self.idx_storage.indexer_configuration_add(tools)
         else:
             return []
 
     @abc.abstractmethod
     def index(self, id, data):
         """Index computation for the id and associated raw data.
 
         Args:
             id (bytes): identifier
             data (bytes): id's data from storage or objstorage depending on
-                             object type
+              object type
 
         Returns:
-            a dict that makes sense for the persist_index_computations
-        function.
+            dict: a dict that makes sense for the
+            :meth:`.persist_index_computations` method.
 
         """
         pass
 
     @abc.abstractmethod
     def persist_index_computations(self, results, policy_update):
         """Persist the computation resulting from the index.
 
         Args:
 
             results ([result]): List of results. One result is the
-                                result of the index function.
+              result of the index function.
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
-                                   respectively update duplicates or ignore
-                                   them
+              respectively update duplicates or ignore them
 
         Returns:
             None
 
         """
         pass
 
     def next_step(self, results, task):
         """Do something else with computations results (e.g. send to another
         queue, ...).
 
         (This is not an abstractmethod since it is optional).
 
         Args:
             results ([result]): List of results (dict) as returned
-                                by index function.
+              by index function.
             task (dict): a dict in the form expected by
-                        `scheduler.backend.SchedulerBackend.create_tasks`
-                        without `next_run`, plus an optional `result_name` key.
+              `scheduler.backend.SchedulerBackend.create_tasks`
+              without `next_run`, plus an optional `result_name` key.
 
         Returns:
             None
 
         """
         if task:
             if getattr(self, 'scheduler', None):
                 scheduler = self.scheduler
             else:
                 scheduler = get_scheduler(**self.config['scheduler'])
             task = deepcopy(task)
             result_name = task.pop('result_name', None)
             task['next_run'] = datetime.datetime.now()
             if result_name:
                 task['arguments']['kwargs'][result_name] = self.results
             scheduler.create_tasks([task])
 
     @abc.abstractmethod
     def run(self, ids, policy_update,
             next_step=None, **kwargs):
         """Given a list of ids:
 
         - retrieves the data from the storage
         - executes the indexing computations
         - stores the results (according to policy_update)
 
         Args:
             ids ([bytes]): id's identifier list
             policy_update (str): either 'update-dups' or 'ignore-dups' to
-            respectively update duplicates or ignore them
+              respectively update duplicates or ignore them
             next_step (dict): a dict in the form expected by
-                        `scheduler.backend.SchedulerBackend.create_tasks`
-                        without `next_run`, plus a `result_name` key.
+              `scheduler.backend.SchedulerBackend.create_tasks`
+              without `next_run`, plus a `result_name` key.
             **kwargs: passed to the `index` method
 
         """
         pass
 
 
 class ContentIndexer(BaseIndexer):
     """A content indexer working on a list of ids directly.
 
     To work on indexer range, use the :class:`ContentRangeIndexer`
     instead.
 
     Note: :class:`ContentIndexer` is not an instantiable object. To
     use it, one should inherit from this class and override the
     methods mentioned in the :class:`BaseIndexer` class.
 
     """
     @abc.abstractmethod
     def filter(self, ids):
         """Filter missing ids for that particular indexer.
 
         Args:
             ids ([bytes]): list of ids
 
         Yields:
             iterator of missing ids
 
         """
         pass
 
     def run(self, ids, policy_update,
             next_step=None, **kwargs):
         """Given a list of ids:
 
         - retrieve the content from the storage
         - execute the indexing computations
         - store the results (according to policy_update)
 
         Args:
             ids ([bytes]): sha1's identifier list
             policy_update (str): either 'update-dups' or 'ignore-dups' to
                                  respectively update duplicates or ignore
                                  them
             next_step (dict): a dict in the form expected by
                         `scheduler.backend.SchedulerBackend.create_tasks`
                         without `next_run`, plus an optional `result_name` key.
             **kwargs: passed to the `index` method
 
         """
         results = []
         try:
             for sha1 in ids:
                 try:
                     raw_content = self.objstorage.get(sha1)
                 except ObjNotFoundError:
                     self.log.warning('Content %s not found in objstorage' %
                                      hashutil.hash_to_hex(sha1))
                     continue
                 res = self.index(sha1, raw_content, **kwargs)
                 if res:  # If no results, skip it
                     results.append(res)
 
             self.persist_index_computations(results, policy_update)
             self.results = results
             return self.next_step(results, task=next_step)
         except Exception:
             self.log.exception(
                 'Problem when reading contents metadata.')
 
 
 class ContentRangeIndexer(BaseIndexer):
     """A content range indexer.
 
     This expects as input a range of ids to index.
 
     To work on a list of ids, use the :class:`ContentIndexer` instead.
 
     Note: :class:`ContentRangeIndexer` is not an instantiable
     object. To use it, one should inherit from this class and override
     the methods mentioned in the :class:`BaseIndexer` class.
 
     """
     @abc.abstractmethod
     def indexed_contents_in_range(self, start, end):
         """Retrieve indexed contents within range [start, end].
 
-        Args
-            **start** (bytes): Starting bound from range identifier
-            **end** (bytes): End range identifier
+        Args:
+            start (bytes): Starting bound from range identifier
+            end (bytes): End range identifier
 
         Yields:
-            Content identifier (bytes) present in the range [start, end]
+            bytes: Content identifier present in the range ``[start, end]``
 
         """
         pass
 
     def _list_contents_to_index(self, start, end, indexed):
         """Compute from storage the new contents to index in the range [start,
            end]. The already indexed contents are skipped.
 
         Args:
-            **start** (bytes): Starting bound from range identifier
-            **end** (bytes): End range identifier
-            **indexed** (Set[bytes]): Set of content already indexed.
+            start (bytes): Starting bound from range identifier
+            end (bytes): End range identifier
+            indexed (Set[bytes]): Set of content already indexed.
 
         Yields:
-            Identifier (bytes) of contents to index.
+            bytes: Identifier of contents to index.
 
         """
         while start:
             result = self.storage.content_get_range(start, end)
             contents = result['contents']
             for c in contents:
                 _id = c['sha1']
                 if _id in indexed:
                     continue
                 yield _id
             start = result['next']
 
     def _index_contents(self, start, end, indexed, **kwargs):
         """Index the contents from within range [start, end]
 
         Args:
-            **start** (bytes): Starting bound from range identifier
-            **end** (bytes): End range identifier
-            **indexed** (Set[bytes]): Set of content already indexed.
+            start (bytes): Starting bound from range identifier
+            end (bytes): End range identifier
+            indexed (Set[bytes]): Set of content already indexed.
 
         Yields:
-            Data indexed (dict) to persist using the indexer storage
+            dict: Data indexed to persist using the indexer storage
 
         """
         for sha1 in self._list_contents_to_index(start, end, indexed):
             try:
                 raw_content = self.objstorage.get(sha1)
             except ObjNotFoundError:
                 self.log.warning('Content %s not found in objstorage' %
                                  hashutil.hash_to_hex(sha1))
                 continue
             res = self.index(sha1, raw_content, **kwargs)
             if res:
                 yield res
 
     def _index_with_skipping_already_done(self, start, end):
         """Index not already indexed contents in range [start, end].
 
         Args:
-            **start** (Union[bytes, str]): Starting range identifier
-            **end** (Union[bytes, str]): Ending range identifier
+            start** (Union[bytes, str]): Starting range identifier
+            end (Union[bytes, str]): Ending range identifier
 
         Yields:
-            Content identifier (bytes) present in the range [start,
-            end] which are not already indexed.
+            bytes: Content identifier present in the range
+            ``[start, end]`` which are not already indexed.
 
         """
         while start:
             indexed_page = self.indexed_contents_in_range(start, end)
             contents = indexed_page['ids']
             _end = contents[-1] if contents else end
             yield from self._index_contents(
                     start, _end, contents)
             start = indexed_page['next']
 
     def run(self, start, end, skip_existing=True, **kwargs):
         """Given a range of content ids, compute the indexing computations on
            the contents within. Either the indexer is incremental
            (filter out existing computed data) or not (compute
            everything from scratch).
 
         Args:
-            **start** (Union[bytes, str]): Starting range identifier
-            **end** (Union[bytes, str]): Ending range identifier
-            **skip_existing** (bool): Skip existing indexed data
-                                     (default) or not
+            start (Union[bytes, str]): Starting range identifier
+            end (Union[bytes, str]): Ending range identifier
+            skip_existing (bool): Skip existing indexed data
+              (default) or not
             **kwargs: passed to the `index` method
 
         Returns:
-            a boolean. True if data was indexed, False otherwise.
+            bool: True if data was indexed, False otherwise.
 
         """
         with_indexed_data = False
         try:
             if isinstance(start, str):
                 start = hashutil.hash_to_bytes(start)
             if isinstance(end, str):
                 end = hashutil.hash_to_bytes(end)
 
             if skip_existing:
                 gen = self._index_with_skipping_already_done(start, end)
             else:
                 gen = self._index_contents(start, end, indexed=[])
 
             for results in utils.grouper(gen,
                                          n=self.config['write_batch_size']):
                 self.persist_index_computations(
                     results, policy_update='update-dups')
                 with_indexed_data = True
         except Exception:
             self.log.exception(
                 'Problem when computing metadata.')
         finally:
             return with_indexed_data
 
 
 class OriginIndexer(BaseIndexer):
     """An object type indexer, inherits from the :class:`BaseIndexer` and
     implements Origin indexing using the run method
 
     Note: the :class:`OriginIndexer` is not an instantiable object.
     To use it in another context one should inherit from this class
     and override the methods mentioned in the :class:`BaseIndexer`
     class.
 
     """
     def run(self, ids, policy_update='update-dups', parse_ids=True,
             next_step=None, **kwargs):
         """Given a list of origin ids:
 
         - retrieve origins from storage
         - execute the indexing computations
         - store the results (according to policy_update)
 
         Args:
             ids ([Union[int, Tuple[str, bytes]]]): list of origin ids or
-                                                   (type, url) tuples.
+              (type, url) tuples.
             policy_update (str): either 'update-dups' or 'ignore-dups' to
-                                   respectively update duplicates (default)
-                                   or ignore them
+              respectively update duplicates (default) or ignore them
             next_step (dict): a dict in the form expected by
-                        `scheduler.backend.SchedulerBackend.create_tasks`
-                        without `next_run`, plus an optional `result_name` key.
+              `scheduler.backend.SchedulerBackend.create_tasks` without
+              `next_run`, plus an optional `result_name` key.
             parse_ids (bool): Do we need to parse id or not (default)
             **kwargs: passed to the `index` method
 
         """
         if parse_ids:
             ids = [o.split('+', 1) if ':' in o else int(o)  # type+url or id
                    for o in ids]
 
         results = []
 
         for id_ in ids:
             if isinstance(id_, (tuple, list)):
                 if len(id_) != 2:
                     raise TypeError('Expected a (type, url) tuple.')
                 (type_, url) = id_
                 params = {'type': type_, 'url': url}
             elif isinstance(id_, int):
                 params = {'id': id_}
             else:
                 raise TypeError('Invalid value in "ids": %r' % id_)
             origin = self.storage.origin_get(params)
             if not origin:
                 self.log.warning('Origins %s not found in storage' %
                                  list(ids))
                 continue
             try:
                 res = self.index(origin, **kwargs)
                 if origin:  # If no results, skip it
                     results.append(res)
             except Exception:
                 self.log.exception(
                         'Problem when processing origin %s' % id_)
         self.persist_index_computations(results, policy_update)
         self.results = results
         return self.next_step(results, task=next_step)
 
 
 class RevisionIndexer(BaseIndexer):
     """An object type indexer, inherits from the :class:`BaseIndexer` and
     implements Revision indexing using the run method
 
     Note: the :class:`RevisionIndexer` is not an instantiable object.
     To use it in another context one should inherit from this class
     and override the methods mentioned in the :class:`BaseIndexer`
     class.
 
     """
     def run(self, ids, policy_update, next_step=None):
         """Given a list of sha1_gits:
 
         - retrieve revisions from storage
         - execute the indexing computations
         - store the results (according to policy_update)
 
         Args:
             ids ([bytes or str]): sha1_git's identifier list
             policy_update (str): either 'update-dups' or 'ignore-dups' to
-                                 respectively update duplicates or ignore
-                                 them
+              respectively update duplicates or ignore them
 
         """
         results = []
         ids = [hashutil.hash_to_bytes(id_) if isinstance(id_, str) else id_
                for id_ in ids]
         revs = self.storage.revision_get(ids)
 
         for rev in revs:
             if not rev:
                 self.log.warning('Revisions %s not found in storage' %
                                  list(map(hashutil.hash_to_hex, ids)))
                 continue
             try:
                 res = self.index(rev)
                 if res:  # If no results, skip it
                     results.append(res)
             except Exception:
                 self.log.exception(
                         'Problem when processing revision')
         self.persist_index_computations(results, policy_update)
         self.results = results
         return self.next_step(results, task=next_step)
diff --git a/swh/indexer/language.py b/swh/indexer/language.py
index 5ac61ec..7d0e3a4 100644
--- a/swh/indexer/language.py
+++ b/swh/indexer/language.py
@@ -1,209 +1,209 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 import io
 
 from pygments.lexers import guess_lexer
 from pygments.util import ClassNotFound
 from chardet.universaldetector import UniversalDetector
 
 from .indexer import ContentIndexer
 
 
 def _cleanup_classname(classname):
     """Determine the language from the pygments' lexer names.
 
     """
     return classname.lower().replace(' ', '-')
 
 
 def _read_raw(raw_content, size=2048):
     """Read raw content in chunk.
 
     """
     bs = io.BytesIO(raw_content)
     while True:
         chunk = bs.read(size)
         if not chunk:
             break
         yield chunk
 
 
 def _detect_encoding(raw_content):
     """Given a raw content, try and detect its encoding.
 
     """
     detector = UniversalDetector()
     for chunk in _read_raw(raw_content):
         detector.feed(chunk)
         if detector.done:
             break
     detector.close()
     return detector.result['encoding']
 
 
 def compute_language_from_chunk(encoding, length, raw_content, max_size,
                                 log=None):
     """Determine the raw content's language.
 
     Args:
         encoding (str): Encoding to use to decode the content
         length (int): raw_content's length
         raw_content (bytes): raw content to work with
         max_size (int): max size to split the raw content at
 
     Returns:
-        Dict with keys:
-        - lang: None if nothing found or the possible language
+        dict: Dict with keys:
+        - **lang**: None if nothing found or the possible language
 
     """
     try:
         if max_size <= length:
             raw_content = raw_content[0:max_size]
 
         content = raw_content.decode(encoding)
         lang = _cleanup_classname(
             guess_lexer(content).name)
     except ClassNotFound:
         lang = None
     except UnicodeDecodeError:
         raise
     except Exception:
         if log:
             log.exception('Problem during language detection, skipping')
         lang = None
     return {
         'lang': lang
     }
 
 
 def compute_language(raw_content, encoding=None, log=None):
     """Determine the raw content's language.
 
     Args:
         raw_content (bytes): raw content to work with
 
     Returns:
-        Dict with keys:
-        - lang: None if nothing found or the possible language
+        dict: Dict with keys:
+        - **lang**: None if nothing found or the possible language
 
     """
     try:
         encoding = _detect_encoding(raw_content)
         content = raw_content.decode(encoding)
         lang = _cleanup_classname(
             guess_lexer(content).name)
     except ClassNotFound:
         lang = None
     except Exception:
         if log:
             log.exception('Problem during language detection, skipping')
         lang = None
     return {
         'lang': lang
     }
 
 
 class ContentLanguageIndexer(ContentIndexer):
     """Indexer in charge of:
 
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
     - computing {mimetype, encoding} from that content
     - store result in storage
 
     """
     CONFIG_BASE_FILENAME = 'indexer/language'
 
     ADDITIONAL_CONFIG = {
         'tools': ('dict', {
             'name': 'pygments',
             'version': '2.0.1+dfsg-1.1+deb8u1',
             'configuration': {
                 'type': 'library',
                 'debian-package': 'python3-pygments',
                 'max_content_size': 10240,
             },
         }),
     }
 
     def prepare(self):
         super().prepare()
         c = self.config
         self.max_content_size = c['tools']['configuration']['max_content_size']
         self.tool = self.tools[0]
 
     def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.idx_storage.content_language_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tool['id']
             } for sha1 in ids
         ))
 
     def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
             id (bytes): content's identifier
             data (bytes): raw content in bytes
 
         Returns:
-            A dict, representing a content_mimetype, with keys:
-              - id (bytes): content's identifier (sha1)
-              - lang (bytes): detected language
+            dict: Dict that represents a content_mimetype, with keys:
+            - id (bytes): content's identifier (sha1)
+            - lang (bytes): detected language
 
         """
         result = {
             'id': id,
             'indexer_configuration_id': self.tool['id'],
             'lang': None,
         }
 
         encoding = _detect_encoding(data)
 
         if not encoding:
             return result
 
         _len = len(data)
         for i in range(0, 9):
             max_size = self.max_content_size + i
 
             try:
                 result = compute_language_from_chunk(
                     encoding, _len, data, max_size, log=self.log)
             except UnicodeDecodeError:
                 self.log.warning(
                     'Decoding failed on wrong byte chunk at [0-%s]'
                     ', trying again at next ending byte.' % max_size)
                 continue
 
             # we found something, so we return it
             result.update({
                 'id': id,
                 'indexer_configuration_id': self.tool['id'],
             })
             break
 
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
-            following keys:
+              following keys:
               - id (bytes): content's identifier (sha1)
               - lang (bytes): detected language
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
-            respectively update duplicates or ignore them
+              respectively update duplicates or ignore them
 
         """
         self.idx_storage.content_language_add(
             results, conflict_update=(policy_update == 'update-dups'))
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
index 84827fc..4549305 100644
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -1,337 +1,336 @@
 # Copyright (C) 2017-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import click
 import itertools
 import logging
 
 from swh.indexer.indexer import ContentIndexer, RevisionIndexer, OriginIndexer
 from swh.indexer.metadata_dictionary import MAPPINGS
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata_detector import extract_minimal_metadata_dict
 from swh.indexer.storage import INDEXER_CFG_KEY
 
 from swh.model import hashutil
 
 
 class ContentMetadataIndexer(ContentIndexer):
     """Content-level indexer
 
     This indexer is in charge of:
 
     - filtering out content already indexed in content_metadata
     - reading content from objstorage with the content's id sha1
     - computing translated_metadata by given context
     - using the metadata_dictionary as the 'swh-metadata-translator' tool
     - store result in content_metadata table
 
     """
     # Note: This used when the content metadata indexer is used alone
     # (not the case for example in the case of the RevisionMetadataIndexer)
     CONFIG_BASE_FILENAME = 'indexer/content_metadata'
 
     def __init__(self, tool, config):
         # FIXME: Simplify this twisted way to use the exact same
         # config of RevisionMetadataIndexer object that uses
         # internally ContentMetadataIndexer
         self.config = config
         self.config['tools'] = tool
         self.results = []
         super().__init__()
         self.tool = self.tools[0]  # Tool is now registered (cf. prepare call)
 
     def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
         """
         yield from self.idx_storage.content_metadata_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1 in ids
         ))
 
     def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
             id (bytes): content's identifier
             data (bytes): raw content in bytes
 
         Returns:
             dict: dictionary representing a content_metadata. If the
             translation wasn't successful the translated_metadata keys will
             be returned as None
 
         """
         result = {
             'id': id,
             'indexer_configuration_id': self.tool['id'],
             'translated_metadata': None
         }
         try:
             mapping_name = self.tool['tool_configuration']['context']
             result['translated_metadata'] = MAPPINGS[mapping_name] \
                 .translate(data)
         except Exception:
             self.log.exception(
                 "Problem during tool retrieval of metadata translation")
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_metadata, dict with the
-            following keys:
+              following keys:
               - id (bytes): content's identifier (sha1)
               - translated_metadata (jsonb): detected metadata
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
-            respectively update duplicates or ignore them
+              respectively update duplicates or ignore them
 
         """
         self.idx_storage.content_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
 
 class RevisionMetadataIndexer(RevisionIndexer):
     """Revision-level indexer
 
     This indexer is in charge of:
 
     - filtering revisions already indexed in revision_metadata table with
       defined computation tool
     - retrieve all entry_files in root directory
     - use metadata_detector for file_names containing metadata
     - compute metadata translation if necessary and possible (depends on tool)
     - send sha1s to content indexing if possible
     - store the results for revision
 
     """
     CONFIG_BASE_FILENAME = 'indexer/revision_metadata'
 
     ADDITIONAL_CONFIG = {
         'tools': ('dict', {
             'name': 'swh-metadata-detector',
             'version': '0.0.2',
             'configuration': {
                 'type': 'local',
                 'context': ['NpmMapping', 'CodemetaMapping']
             },
         }),
     }
 
     ContentMetadataIndexer = ContentMetadataIndexer
 
     def prepare(self):
         super().prepare()
         self.tool = self.tools[0]
 
     def filter(self, sha1_gits):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.idx_storage.revision_metadata_missing((
             {
                 'id': sha1_git,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1_git in sha1_gits
         ))
 
     def index(self, rev):
         """Index rev by processing it and organizing result.
 
         use metadata_detector to iterate on filenames
 
         - if one filename detected -> sends file to content indexer
         - if multiple file detected -> translation needed at revision level
 
         Args:
           rev (bytes): revision artifact from storage
 
         Returns:
             dict: dictionary representing a revision_metadata, with keys:
 
-                - id (str): rev's identifier (sha1_git)
-                - indexer_configuration_id (bytes): tool used
-                - translated_metadata: dict of retrieved metadata
+            - id (str): rev's identifier (sha1_git)
+            - indexer_configuration_id (bytes): tool used
+            - translated_metadata: dict of retrieved metadata
 
         """
         result = {
             'id': rev['id'],
             'indexer_configuration_id': self.tool['id'],
             'translated_metadata': None
         }
 
         try:
             root_dir = rev['directory']
             dir_ls = self.storage.directory_ls(root_dir, recursive=False)
             files = [entry for entry in dir_ls if entry['type'] == 'file']
             detected_files = detect_metadata(files)
             result['translated_metadata'] = self.translate_revision_metadata(
                     detected_files)
         except Exception as e:
             self.log.exception(
                 'Problem when indexing rev: %r', e)
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
-            following keys:
+              following keys:
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
-            respectively update duplicates or ignore them
+              respectively update duplicates or ignore them
 
         """
         # TODO: add functions in storage to keep data in revision_metadata
         self.idx_storage.revision_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def translate_revision_metadata(self, detected_files):
         """
         Determine plan of action to translate metadata when containing
         one or multiple detected files:
 
         Args:
             detected_files (dict): dictionary mapping context names (e.g.,
               "npm", "authors") to list of sha1
 
         Returns:
             dict: dict with translated metadata according to the CodeMeta
             vocabulary
 
         """
         translated_metadata = []
         tool = {
                 'name': 'swh-metadata-translator',
                 'version': '0.0.2',
                 'configuration': {
                     'type': 'local',
                     'context': None
                 },
             }
         # TODO: iterate on each context, on each file
         # -> get raw_contents
         # -> translate each content
         config = {
             k: self.config[k]
             for k in [INDEXER_CFG_KEY, 'objstorage', 'storage']
         }
         for context in detected_files.keys():
             tool['configuration']['context'] = context
             c_metadata_indexer = self.ContentMetadataIndexer(tool, config)
             # sha1s that are in content_metadata table
             sha1s_in_storage = []
             metadata_generator = self.idx_storage.content_metadata_get(
                 detected_files[context])
             for c in metadata_generator:
                 # extracting translated_metadata
                 sha1 = c['id']
                 sha1s_in_storage.append(sha1)
                 local_metadata = c['translated_metadata']
                 # local metadata is aggregated
                 if local_metadata:
                     translated_metadata.append(local_metadata)
 
             sha1s_filtered = [item for item in detected_files[context]
                               if item not in sha1s_in_storage]
 
             if sha1s_filtered:
                 # content indexing
                 try:
                     c_metadata_indexer.run(sha1s_filtered,
                                            policy_update='ignore-dups')
                     # on the fly possibility:
                     for result in c_metadata_indexer.results:
                         local_metadata = result['translated_metadata']
                         translated_metadata.append(local_metadata)
 
                 except Exception:
                     self.log.exception(
                         "Exception while indexing metadata on contents")
 
         # transform translated_metadata into min set with swh-metadata-detector
         min_metadata = extract_minimal_metadata_dict(translated_metadata)
         return min_metadata
 
 
 class OriginMetadataIndexer(OriginIndexer):
     CONFIG_BASE_FILENAME = 'indexer/origin_intrinsic_metadata'
 
     ADDITIONAL_CONFIG = {
         'tools': ('list', [])
     }
 
     def check(self, **kwargs):
         kwargs['check_tools'] = False
         super().check(**kwargs)
 
     def filter(self, ids):
         return ids
 
     def run(self, origin_head, policy_update):
         """Expected to be called with the result of RevisionMetadataIndexer
         as first argument; ie. not a list of ids as other indexers would.
 
         Args:
-
-            * `origin_head` (dict): {str(origin_id): rev_id}
+            origin_head (dict): {str(origin_id): rev_id}
               keys `origin_id` and `revision_id`, which is the result
               of OriginHeadIndexer.
-            * `policy_update`: `'ignore-dups'` or `'update-dups'`
+            policy_update (str): `'ignore-dups'` or `'update-dups'`
         """
         origin_head_map = {int(origin_id): hashutil.hash_to_bytes(rev_id)
                            for (origin_id, rev_id) in origin_head.items()}
 
         # Fix up the argument order. revisions_metadata has to be the
         # first argument because of celery.chain; the next line calls
         # run() with the usual order, ie. origin ids first.
         return super().run(ids=list(origin_head_map),
                            policy_update=policy_update,
                            parse_ids=False,
                            origin_head_map=origin_head_map)
 
     def index(self, origin, *, origin_head_map):
         # Get the last revision of the origin.
         revision_id = origin_head_map[origin['id']]
 
         revision_metadata = self.idx_storage \
             .revision_metadata_get([revision_id])
 
         results = []
         for item in revision_metadata:
             assert item['id'] == revision_id
             # Get the metadata of that revision, and return it
             results.append({
                     'origin_id': origin['id'],
                     'metadata': item['translated_metadata'],
                     'from_revision': revision_id,
                     'indexer_configuration_id':
                     item['tool']['id'],
                     })
         return results
 
     def persist_index_computations(self, results, policy_update):
         self.idx_storage.origin_intrinsic_metadata_add(
             list(itertools.chain(*results)),
             conflict_update=(policy_update == 'update-dups'))
 
 
 @click.command()
 @click.option('--revs', '-i',
               help='Default sha1_git to lookup', multiple=True)
 def main(revs):
     _git_sha1s = list(map(hashutil.hash_to_bytes, revs))
     rev_metadata_indexer = RevisionMetadataIndexer()
     rev_metadata_indexer.run(_git_sha1s, 'update-dups')
 
 
 if __name__ == '__main__':
     logging.basicConfig(level=logging.INFO)
     main()
diff --git a/swh/indexer/metadata_detector.py b/swh/indexer/metadata_detector.py
index 629974a..fb7fc3f 100644
--- a/swh/indexer/metadata_detector.py
+++ b/swh/indexer/metadata_detector.py
@@ -1,60 +1,62 @@
 # Copyright (C) 2017 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from swh.indexer.codemeta import compact, expand
 from swh.indexer.codemeta import make_absolute_uri
 from swh.indexer.metadata_dictionary import MAPPINGS
 
 
 def detect_metadata(files):
     """
     Detects files potentially containing metadata
+
     Args:
-        - file_entries (list): list of files
+        file_entries (list): list of files
 
     Returns:
-        - empty list if nothing was found
-        - dictionary {mapping_filenames[name]:f['sha1']}
+        dict: {mapping_filenames[name]:f['sha1']} (may be empty)
     """
     results = {}
     for (mapping_name, mapping) in MAPPINGS.items():
         matches = mapping.detect_metadata_files(files)
         if matches:
             results[mapping_name] = matches
     return results
 
 
 _MINIMAL_PROPERTY_SET = {
     "developmentStatus", "version", "operatingSystem", "description",
     "keywords", "issueTracker", "name", "author", "relatedLink",
     "url", "license", "maintainer", "email", "identifier",
     "codeRepository"}
 
 MINIMAL_METADATA_SET = {make_absolute_uri(prop)
                         for prop in _MINIMAL_PROPERTY_SET}
 
 
 def extract_minimal_metadata_dict(metadata_list):
     """
     Every item in the metadata_list is a dict of translated_metadata in the
-    CodeMeta vocabulary
-    we wish to extract a minimal set of terms and keep all values corresponding
-    to this term without duplication
+    CodeMeta vocabulary.
+
+    We wish to extract a minimal set of terms and keep all values corresponding
+    to this term without duplication.
+
     Args:
-        - metadata_list (list): list of dicts of translated_metadata
+        metadata_list (list): list of dicts of translated_metadata
 
     Returns:
-        - minimal_dict (dict): one dict with selected values of metadata
+        dict: minimal_dict; dict with selected values of metadata
     """
     minimal_dict = {}
     for document in metadata_list:
         for metadata_item in expand(document):
             for (term, value) in metadata_item.items():
                 if term in MINIMAL_METADATA_SET:
                     if term not in minimal_dict:
                         minimal_dict[term] = [value]
                     elif value not in minimal_dict[term]:
                         minimal_dict[term].append(value)
     return compact(minimal_dict)
diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
index b8e01b9..7e71140 100644
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -1,284 +1,284 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 import re
 import abc
 import json
 import logging
 import xmltodict
 
 from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
 from swh.indexer.codemeta import compact, expand
 
 
 MAPPINGS = {}
 
 
 def register_mapping(cls):
     MAPPINGS[cls.__name__] = cls()
     return cls
 
 
 class BaseMapping(metaclass=abc.ABCMeta):
     """Base class for mappings to inherit from
 
     To implement a new mapping:
 
     - inherit this class
     - override translate function
     """
     def __init__(self):
         self.log = logging.getLogger('%s.%s' % (
             self.__class__.__module__,
             self.__class__.__name__))
 
     @abc.abstractmethod
     def detect_metadata_files(self, files):
         """
         Detects files potentially containing metadata
+
         Args:
-            - file_entries (list): list of files
+            file_entries (list): list of files
 
         Returns:
-            - empty list if nothing was found
-            - list of sha1 otherwise
+            list: list of sha1 (possibly empty)
         """
         pass
 
     @abc.abstractmethod
     def translate(self, file_content):
         pass
 
     def normalize_translation(self, metadata):
         return compact(metadata)
 
 
 class SingleFileMapping(BaseMapping):
     """Base class for all mappings that use a single file as input."""
 
     @property
     @abc.abstractmethod
     def filename(self):
         """The .json file to extract metadata from."""
         pass
 
     def detect_metadata_files(self, file_entries):
         for entry in file_entries:
             if entry['name'] == self.filename:
                 return [entry['sha1']]
         return []
 
 
 class DictMapping(BaseMapping):
     """Base class for mappings that take as input a file that is mostly
     a key-value store (eg. a shallow JSON dict)."""
 
     @property
     @abc.abstractmethod
     def mapping(self):
         """A translation dict to map dict keys into a canonical name."""
         pass
 
     def translate_dict(self, content_dict, *, normalize=True):
         """
         Translates content  by parsing content from a dict object
         and translating with the appropriate mapping
 
         Args:
-            content_dict (dict)
+            content_dict (dict): content dict to translate
 
         Returns:
             dict: translated metadata in json-friendly form needed for
-                  the indexer
+            the indexer
 
         """
         translated_metadata = {'@type': SCHEMA_URI + 'SoftwareSourceCode'}
         for k, v in content_dict.items():
             # First, check if there is a specific translation
             # method for this key
             translation_method = getattr(self, 'translate_' + k, None)
             if translation_method:
                 translation_method(translated_metadata, v)
             elif k in self.mapping:
                 # if there is no method, but the key is known from the
                 # crosswalk table
 
                 # if there is a normalization method, use it on the value
                 normalization_method = getattr(self, 'normalize_' + k, None)
                 if normalization_method:
                     v = normalization_method(v)
 
                 # set the translation metadata with the normalized value
                 translated_metadata[self.mapping[k]] = v
         if normalize:
             return self.normalize_translation(translated_metadata)
         else:
             return translated_metadata
 
 
 class JsonMapping(DictMapping, SingleFileMapping):
     """Base class for all mappings that use a JSON file as input."""
 
     def translate(self, raw_content):
         """
         Translates content by parsing content from a bytestring containing
         json data and translating with the appropriate mapping
 
         Args:
-            raw_content: bytes
+            raw_content (bytes): raw content to translate
 
         Returns:
             dict: translated metadata in json-friendly form needed for
-                  the indexer
+            the indexer
 
         """
         try:
             raw_content = raw_content.decode()
         except UnicodeDecodeError:
             self.log.warning('Error unidecoding %r', raw_content)
             return
         try:
             content_dict = json.loads(raw_content)
         except json.JSONDecodeError:
             self.log.warning('Error unjsoning %r' % raw_content)
             return
         return self.translate_dict(content_dict)
 
 
 @register_mapping
 class NpmMapping(JsonMapping):
     """
     dedicated class for NPM (package.json) mapping and translation
     """
     mapping = CROSSWALK_TABLE['NodeJS']
     filename = b'package.json'
 
     _schema_shortcuts = {
             'github': 'https://github.com/',
             'gist': 'https://gist.github.com/',
             'bitbucket': 'https://bitbucket.org/',
             'gitlab': 'https://gitlab.com/',
             }
 
     def normalize_repository(self, d):
         """https://docs.npmjs.com/files/package.json#repository"""
         if isinstance(d, dict):
             return '{type}+{url}'.format(**d)
         elif isinstance(d, str):
             if '://' in d:
                 return d
             elif ':' in d:
                 (schema, rest) = d.split(':', 1)
                 if schema in self._schema_shortcuts:
                     return self._schema_shortcuts[schema] + rest
                 else:
                     return None
             else:
                 return self._schema_shortcuts['github'] + d
 
         else:
             return None
 
     def normalize_bugs(self, d):
         return '{url}'.format(**d)
 
     _parse_author = re.compile(r'^ *'
                                r'(?P<name>.*?)'
                                r'( +<(?P<email>.*)>)?'
                                r'( +\((?P<url>.*)\))?'
                                r' *$')
 
     def normalize_author(self, d):
         'https://docs.npmjs.com/files/package.json' \
                 '#people-fields-author-contributors'
         author = {'@type': SCHEMA_URI+'Person'}
         if isinstance(d, dict):
             name = d.get('name', None)
             email = d.get('email', None)
             url = d.get('url', None)
         elif isinstance(d, str):
             match = self._parse_author.match(d)
             name = match.group('name')
             email = match.group('email')
             url = match.group('url')
         else:
             return None
         if name:
             author[SCHEMA_URI+'name'] = name
         if email:
             author[SCHEMA_URI+'email'] = email
         if url:
             author[SCHEMA_URI+'url'] = url
         return author
 
 
 @register_mapping
 class CodemetaMapping(SingleFileMapping):
     """
     dedicated class for CodeMeta (codemeta.json) mapping and translation
     """
     filename = b'codemeta.json'
 
     def translate(self, content):
         return self.normalize_translation(expand(json.loads(content.decode())))
 
 
 @register_mapping
 class MavenMapping(DictMapping, SingleFileMapping):
     """
     dedicated class for Maven (pom.xml) mapping and translation
     """
     filename = b'pom.xml'
     mapping = CROSSWALK_TABLE['Java (Maven)']
 
     def translate(self, content):
         d = xmltodict.parse(content)['project']
         metadata = self.translate_dict(d, normalize=False)
         metadata[SCHEMA_URI+'codeRepository'] = self.parse_repositories(d)
         return self.normalize_translation(metadata)
 
     _default_repository = {'url': 'https://repo.maven.apache.org/maven2/'}
 
     def parse_repositories(self, d):
         """https://maven.apache.org/pom.html#Repositories"""
         if 'repositories' not in d:
             return [self.parse_repository(d, self._default_repository)]
         else:
             repositories = d['repositories'].get('repository', [])
             if not isinstance(repositories, list):
                 repositories = [repositories]
             results = []
             for repo in repositories:
                 res = self.parse_repository(d, repo)
                 if res:
                     results.append(res)
             return results
 
     def parse_repository(self, d, repo):
         if repo.get('layout', 'default') != 'default':
             return  # TODO ?
         url = repo['url']
         if d['groupId']:
             url = os.path.join(url, *d['groupId'].split('.'))
             if d['artifactId']:
                 url = os.path.join(url, d['artifactId'])
         return url
 
 
 def main():
     raw_content = """{"name": "test_name", "unknown_term": "ut"}"""
     raw_content1 = b"""{"name": "test_name",
                         "unknown_term": "ut",
                         "prerequisites" :"packageXYZ"}"""
     result = MAPPINGS["NpmMapping"].translate(raw_content)
     result1 = MAPPINGS["MavenMapping"].translate(raw_content1)
 
     print(result)
     print(result1)
 
 
 if __name__ == "__main__":
     main()
diff --git a/swh/indexer/mimetype.py b/swh/indexer/mimetype.py
index 1877644..b058cb0 100644
--- a/swh/indexer/mimetype.py
+++ b/swh/indexer/mimetype.py
@@ -1,153 +1,155 @@
 # Copyright (C) 2016-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import magic
 
 from swh.model import hashutil
 
 from .indexer import ContentIndexer, ContentRangeIndexer
 
 
 def compute_mimetype_encoding(raw_content):
     """Determine mimetype and encoding from the raw content.
 
     Args:
         raw_content (bytes): content's raw data
 
     Returns:
-        A dict with mimetype and encoding key and corresponding values
+        dict: mimetype and encoding key and corresponding values
         (as bytes).
 
     """
     r = magic.detect_from_content(raw_content)
     return {
         'mimetype': r.mime_type,
         'encoding': r.encoding,
     }
 
 
 class MixinMimetypeIndexer:
     """Mixin mimetype indexer.
 
     See :class:`ContentMimetypeIndexer` and :class:`MimetypeRangeIndexer`
 
     """
     ADDITIONAL_CONFIG = {
         'tools': ('dict', {
             'name': 'file',
             'version': '1:5.30-1+deb9u1',
             'configuration': {
                 "type": "library",
                 "debian-package": "python3-magic"
             },
         }),
         'write_batch_size': ('int', 1000),
     }
 
     CONFIG_BASE_FILENAME = 'indexer/mimetype'
 
     def prepare(self):
         super().prepare()
         self.tool = self.tools[0]
 
     def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
             id (bytes): content's identifier
             data (bytes): raw content in bytes
 
         Returns:
-            A dict, representing a content_mimetype, with keys:
+            dict: content's mimetype; dict keys being
 
-              - id (bytes): content's identifier (sha1)
-              - mimetype (bytes): mimetype in bytes
-              - encoding (bytes): encoding in bytes
+            - **id** (bytes): content's identifier (sha1)
+            - **mimetype** (bytes): mimetype in bytes
+            - **encoding** (bytes): encoding in bytes
 
         """
         try:
             properties = compute_mimetype_encoding(data)
             properties.update({
                 'id': id,
                 'indexer_configuration_id': self.tool['id'],
                 })
         except TypeError:
             self.log.error('Detecting mimetype error for id %s' % (
                 hashutil.hash_to_hex(id), ))
             return None
 
         return properties
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
-            results ([dict]): list of content_mimetype, dict with the
-            following keys:
-
-              - id (bytes): content's identifier (sha1)
-              - mimetype (bytes): mimetype in bytes
-              - encoding (bytes): encoding in bytes
+            results ([dict]): list of content's mimetype dicts
+              (see :meth:`.index`)
 
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
-            respectively update duplicates or ignore them
+               respectively update duplicates or ignore them
 
         """
         self.idx_storage.content_mimetype_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
 
 class ContentMimetypeIndexer(MixinMimetypeIndexer, ContentIndexer):
     """Mimetype Indexer working on list of content identifiers.
 
     It:
-    - (optionally) filters out content already indexed (cf. :callable:`filter`)
+
+    - (optionally) filters out content already indexed (cf.
+      :meth:`.filter`)
     - reads content from objstorage per the content's id (sha1)
     - computes {mimetype, encoding} from that content
     - stores result in storage
 
     FIXME:
-    - 1. Rename redundant ContentMimetypeIndexer to MimetypeIndexer
-    - 2. Do we keep it afterwards? ~> i think this can be used with the journal
+
+    1. Rename redundant ContentMimetypeIndexer to MimetypeIndexer
+    2. Do we keep it afterwards? ~> i think this can be used with the journal
 
     """
     def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.idx_storage.content_mimetype_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1 in ids
         ))
 
 
 class MimetypeRangeIndexer(MixinMimetypeIndexer, ContentRangeIndexer):
     """Mimetype Range Indexer working on range of content identifiers.
 
     It:
-    - (optionally) filters out content already indexed (cf :callable:`range`)
+
+    - (optionally) filters out content already indexed (cf
+      :meth:`.indexed_contents_in_range`)
     - reads content from objstorage per the content's id (sha1)
     - computes {mimetype, encoding} from that content
     - stores result in storage
 
     """
     def indexed_contents_in_range(self, start, end):
         """Retrieve indexed content id within range [start, end].
 
-        Args
-            **start** (bytes): Starting bound from range identifier
-            **end** (bytes): End range identifier
+        Args:
+            start (bytes): Starting bound from range identifier
+            end (bytes): End range identifier
 
         Returns:
-            a dict with keys:
+            dict: a dict with keys:
+
             - **ids** [bytes]: iterable of content ids within the range.
             - **next** (Optional[bytes]): The next range of sha1 starts at
-                                          this sha1 if any
+              this sha1 if any
 
         """
         return self.idx_storage.content_mimetype_get_range(
             start, end, self.tool['id'])
diff --git a/swh/indexer/rehash.py b/swh/indexer/rehash.py
index d2697e0..b01b326 100644
--- a/swh/indexer/rehash.py
+++ b/swh/indexer/rehash.py
@@ -1,172 +1,172 @@
 # Copyright (C) 2017-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
 import itertools
 
 from collections import defaultdict
 
 from swh.core import utils
 from swh.core.config import SWHConfig
 from swh.model import hashutil
 from swh.objstorage import get_objstorage
 from swh.objstorage.exc import ObjNotFoundError
 from swh.storage import get_storage
 
 
 class RecomputeChecksums(SWHConfig):
     """Class in charge of (re)computing content's hashes.
 
     Hashes to compute are defined across 2 configuration options:
 
     compute_checksums ([str])
       list of hash algorithms that
       py:func:`swh.model.hashutil.MultiHash.from_data` function should
       be able to deal with. For variable-length checksums, a desired
       checksum length should also be provided. Their format is
       <algorithm's name>:<variable-length> e.g: blake2:512
 
     recompute_checksums (bool)
       a boolean to notify that we also want to recompute potential existing
       hashes specified in compute_checksums.  Default to False.
 
     """
     DEFAULT_CONFIG = {
         # The storage to read from or update metadata to
         'storage': ('dict', {
             'cls': 'remote',
             'args': {
               'url': 'http://localhost:5002/'
             },
         }),
         # The objstorage to read contents' data from
         'objstorage': ('dict', {
             'cls': 'pathslicing',
             'args': {
                 'root': '/srv/softwareheritage/objects',
                 'slicing': '0:2/2:4/4:6',
             },
         }),
         # the set of checksums that should be computed.
         # Examples: 'sha1_git', 'blake2b512', 'blake2s256'
         'compute_checksums': (
             'list[str]', []),
         # whether checksums that already exist in the DB should be
         # recomputed/updated or left untouched
         'recompute_checksums': ('bool', False),
         # Number of contents to retrieve blobs at the same time
         'batch_size_retrieve_content': ('int', 10),
         # Number of contents to update at the same time
         'batch_size_update': ('int', 100),
     }
 
     CONFIG_BASE_FILENAME = 'indexer/rehash'
 
     def __init__(self):
         self.config = self.parse_config_file()
         self.storage = get_storage(**self.config['storage'])
         self.objstorage = get_objstorage(**self.config['objstorage'])
         self.compute_checksums = self.config['compute_checksums']
         self.recompute_checksums = self.config[
             'recompute_checksums']
         self.batch_size_retrieve_content = self.config[
             'batch_size_retrieve_content']
         self.batch_size_update = self.config[
             'batch_size_update']
         self.log = logging.getLogger('swh.indexer.rehash')
 
         if not self.compute_checksums:
             raise ValueError('Checksums list should not be empty.')
 
     def _read_content_ids(self, contents):
         """Read the content identifiers from the contents.
 
         """
         for c in contents:
             h = c['sha1']
             if isinstance(h, str):
                 h = hashutil.hash_to_bytes(h)
 
             yield h
 
     def get_new_contents_metadata(self, all_contents):
         """Retrieve raw contents and compute new checksums on the
            contents. Unknown or corrupted contents are skipped.
 
         Args:
             all_contents ([dict]): List of contents as dictionary with
-                                   the necessary primary keys
+              the necessary primary keys
             checksum_algorithms ([str]): List of checksums to compute
 
         Yields:
-            tuple of: content to update, list of checksums computed
+            tuple: tuple of (content to update, list of checksums computed)
 
         """
         content_ids = self._read_content_ids(all_contents)
         for contents in utils.grouper(content_ids,
                                       self.batch_size_retrieve_content):
             contents_iter = itertools.tee(contents, 2)
             try:
                 content_metadata = self.storage.content_get_metadata(
                     [s for s in contents_iter[0]])
             except Exception:
                 self.log.exception(
                     'Problem when reading contents metadata.')
                 continue
 
             for content in content_metadata:
                 if self.recompute_checksums:    # Recompute checksums provided
                                                 # in compute_checksums options
                     checksums_to_compute = list(self.compute_checksums)
                 else:   # Compute checksums provided in compute_checksums
                         # options not already defined for that content
                     checksums_to_compute = [h for h in self.compute_checksums
                                             if not content.get(h)]
 
                 if not checksums_to_compute:  # Nothing to recompute
                     continue
 
                 try:
                     raw_content = self.objstorage.get(content['sha1'])
                 except ObjNotFoundError:
                     self.log.warning('Content %s not found in objstorage!' %
                                      content['sha1'])
                     continue
 
                 content_hashes = hashutil.MultiHash.from_data(
                     raw_content, hash_names=checksums_to_compute).digest()
                 content.update(content_hashes)
                 yield content, checksums_to_compute
 
     def run(self, contents):
         """Given a list of content:
 
           - (re)compute a given set of checksums on contents available in our
             object storage
           - update those contents with the new metadata
 
           Args:
               contents (dict): contents as dictionary with necessary keys.
                   key present in such dictionary should be the ones defined in
                   the 'primary_key' option.
 
         """
         for data in utils.grouper(
                 self.get_new_contents_metadata(contents),
                 self.batch_size_update):
 
             groups = defaultdict(list)
             for content, keys_to_update in data:
                 keys = ','.join(keys_to_update)
                 groups[keys].append(content)
 
             for keys_to_update, contents in groups.items():
                 keys = keys_to_update.split(',')
                 try:
                     self.storage.content_update(contents,
                                                 keys=keys)
                 except Exception:
                     self.log.exception('Problem during update.')
                     continue
diff --git a/swh/indexer/storage/converters.py b/swh/indexer/storage/converters.py
index 65859fc..177dd53 100644
--- a/swh/indexer/storage/converters.py
+++ b/swh/indexer/storage/converters.py
@@ -1,138 +1,140 @@
 # Copyright (C) 2015-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 def ctags_to_db(ctags):
     """Convert a ctags entry into a ready ctags entry.
 
     Args:
         ctags (dict): ctags entry with the following keys:
 
             - id (bytes): content's identifier
             - tool_id (int): tool id used to compute ctags
             - ctags ([dict]): List of dictionary with the following keys:
 
               - name (str): symbol's name
               - kind (str): symbol's kind
               - line (int): symbol's line in the content
               - language (str): language
 
     Returns:
         list: list of ctags entries as dicts with the following keys:
 
-            - id (bytes): content's identifier
-            - name (str): symbol's name
-            - kind (str): symbol's kind
-            - language (str): language for that content
-            - tool_id (int): tool id used to compute ctags
+        - id (bytes): content's identifier
+        - name (str): symbol's name
+        - kind (str): symbol's kind
+        - language (str): language for that content
+        - tool_id (int): tool id used to compute ctags
 
     """
     id = ctags['id']
     tool_id = ctags['indexer_configuration_id']
     for ctag in ctags['ctags']:
         yield {
             'id': id,
             'name': ctag['name'],
             'kind': ctag['kind'],
             'line': ctag['line'],
             'lang': ctag['lang'],
             'indexer_configuration_id': tool_id,
         }
 
 
 def db_to_ctags(ctag):
     """Convert a ctags entry into a ready ctags entry.
 
     Args:
         ctags (dict): ctags entry with the following keys:
-        - id (bytes): content's identifier
-        - ctags ([dict]): List of dictionary with the following keys:
-          - name (str): symbol's name
-          - kind (str): symbol's kind
-          - line (int): symbol's line in the content
-          - language (str): language
+
+          - id (bytes): content's identifier
+          - ctags ([dict]): List of dictionary with the following keys:
+            - name (str): symbol's name
+            - kind (str): symbol's kind
+            - line (int): symbol's line in the content
+            - language (str): language
 
     Returns:
-        List of ctags ready entry (dict with the following keys):
+        list: list of ctags ready entry (dict with the following keys):
+
         - id (bytes): content's identifier
         - name (str): symbol's name
         - kind (str): symbol's kind
         - language (str): language for that content
         - tool (dict): tool used to compute the ctags
 
     """
     return {
         'id': ctag['id'],
         'name': ctag['name'],
         'kind': ctag['kind'],
         'line': ctag['line'],
         'lang': ctag['lang'],
         'tool': {
             'id': ctag['tool_id'],
             'name': ctag['tool_name'],
             'version': ctag['tool_version'],
             'configuration': ctag['tool_configuration']
         }
     }
 
 
 def db_to_mimetype(mimetype):
     """Convert a ctags entry into a ready ctags output.
 
     """
     return {
         'id': mimetype['id'],
         'encoding': mimetype['encoding'],
         'mimetype': mimetype['mimetype'],
         'tool': {
             'id': mimetype['tool_id'],
             'name': mimetype['tool_name'],
             'version': mimetype['tool_version'],
             'configuration': mimetype['tool_configuration']
         }
     }
 
 
 def db_to_language(language):
     """Convert a language entry into a ready language output.
 
     """
     return {
         'id': language['id'],
         'lang': language['lang'],
         'tool': {
             'id': language['tool_id'],
             'name': language['tool_name'],
             'version': language['tool_version'],
             'configuration': language['tool_configuration']
         }
     }
 
 
 def db_to_metadata(metadata):
     """Convert a metadata entry into a ready metadata output.
 
     """
     metadata['tool'] = {
         'id': metadata['tool_id'],
         'name': metadata['tool_name'],
         'version': metadata['tool_version'],
         'configuration': metadata['tool_configuration']
     }
     del metadata['tool_id'], metadata['tool_configuration']
     del metadata['tool_version'], metadata['tool_name']
     return metadata
 
 
 def db_to_fossology_license(license):
     return {
         'licenses': license['licenses'],
         'tool': {
             'id': license['tool_id'],
             'name': license['tool_name'],
             'version': license['tool_version'],
             'configuration': license['tool_configuration'],
         }
     }
diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py
index 0fea30c..68ee6c9 100644
--- a/swh/indexer/storage/db.py
+++ b/swh/indexer/storage/db.py
@@ -1,396 +1,397 @@
 # Copyright (C) 2015-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from swh.model import hashutil
 
 from swh.storage.db import BaseDb, stored_procedure, cursor_to_bytes
 from swh.storage.db import line_to_bytes, execute_values_to_bytes
 
 
 class Db(BaseDb):
     """Proxy to the SWH Indexer DB, with wrappers around stored procedures
 
     """
     content_mimetype_hash_keys = ['id', 'indexer_configuration_id']
 
     def _missing_from_list(self, table, data, hash_keys, cur=None):
         """Read from table the data with hash_keys that are missing.
 
         Args:
             table (str): Table name (e.g content_mimetype, content_language,
-                         etc...)
+              etc...)
             data (dict): Dict of data to read from
             hash_keys ([str]): List of keys to read in the data dict.
 
         Yields:
             The data which is missing from the db.
 
         """
         cur = self._cursor(cur)
         keys = ', '.join(hash_keys)
         equality = ' AND '.join(
             ('t.%s = c.%s' % (key, key)) for key in hash_keys
         )
         yield from execute_values_to_bytes(
             cur, """
             select %s from (values %%s) as t(%s)
             where not exists (
                 select 1 from %s c
                 where %s
             )
             """ % (keys, keys, table, equality),
             (tuple(m[k] for k in hash_keys) for m in data)
         )
 
     def content_mimetype_missing_from_list(self, mimetypes, cur=None):
         """List missing mimetypes.
 
         """
         yield from self._missing_from_list(
             'content_mimetype', mimetypes, self.content_mimetype_hash_keys,
             cur=cur)
 
     content_mimetype_cols = [
         'id', 'mimetype', 'encoding',
         'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
 
     @stored_procedure('swh_mktemp_content_mimetype')
     def mktemp_content_mimetype(self, cur=None): pass
 
     def content_mimetype_add_from_temp(self, conflict_update, cur=None):
         self._cursor(cur).execute("SELECT swh_content_mimetype_add(%s)",
                                   (conflict_update, ))
 
     def _convert_key(self, key, main_table='c'):
         """Convert keys according to specific use in the module.
+
         Args:
             key (str): Key expression to change according to the alias
-                       used in the query
+              used in the query
             main_table (str): Alias to use for the main table. Default
-                              to c for content_{something}.
+              to c for content_{something}.
 
         Expected:
             Tables content_{something} being aliased as 'c' (something
             in {language, mimetype, ...}), table indexer_configuration
             being aliased as 'i'.
 
         """
         if key == 'id':
             return '%s.id' % main_table
         elif key == 'tool_id':
             return 'i.id as tool_id'
         elif key == 'licenses':
             return '''
                 array(select name
                       from fossology_license
                       where id = ANY(
                          array_agg(%s.license_id))) as licenses''' % main_table
         return key
 
     def _get_from_list(self, table, ids, cols, cur=None, id_col='id'):
         """Fetches entries from the `table` such that their `id` field
         (or whatever is given to `id_col`) is in `ids`.
         Returns the columns `cols`.
         The `cur`sor is used to connect to the database.
         """
         cur = self._cursor(cur)
         keys = map(self._convert_key, cols)
         query = """
             select {keys}
             from (values %s) as t(id)
             inner join {table} c
                 on c.{id_col}=t.id
             inner join indexer_configuration i
                 on c.indexer_configuration_id=i.id;
             """.format(
                 keys=', '.join(keys),
                 id_col=id_col,
                 table=table)
         yield from execute_values_to_bytes(
             cur, query,
             ((_id,) for _id in ids)
         )
 
     content_indexer_names = {
         'mimetype': 'content_mimetype',
         'fossology_license': 'content_fossology_license',
     }
 
     def content_get_range(self, content_type, start, end,
                           indexer_configuration_id, limit=1000,
                           with_textual_data=False, cur=None):
         """Retrieve contents with content_type, within range [start, end]
            bound by limit and associated to the given indexer
            configuration id.
 
            When asking to work on textual content, that filters on the
            mimetype table with any mimetype that is not binary.
 
         """
         cur = self._cursor(cur)
         table = self.content_indexer_names[content_type]
         if with_textual_data:
             extra = """inner join content_mimetype cm
                          on (t.id=cm.id and cm.mimetype like 'text/%%')"""
         else:
             extra = ""
         query = """select t.id
                    from %s t
                    inner join indexer_configuration ic
                    on t.indexer_configuration_id=ic.id
                    %s
                    where ic.id=%%s and
                          %%s <= t.id and t.id <= %%s
                    order by t.indexer_configuration_id, t.id
                    limit %%s""" % (table, extra)
         cur.execute(query, (indexer_configuration_id, start, end, limit))
         yield from cursor_to_bytes(cur)
 
     def content_mimetype_get_from_list(self, ids, cur=None):
         yield from self._get_from_list(
             'content_mimetype', ids, self.content_mimetype_cols, cur=cur)
 
     content_language_hash_keys = ['id', 'indexer_configuration_id']
 
     def content_language_missing_from_list(self, languages, cur=None):
         """List missing languages.
 
         """
         yield from self._missing_from_list(
             'content_language', languages, self.content_language_hash_keys,
             cur=cur)
 
     content_language_cols = [
         'id', 'lang',
         'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
 
     @stored_procedure('swh_mktemp_content_language')
     def mktemp_content_language(self, cur=None): pass
 
     def content_language_add_from_temp(self, conflict_update, cur=None):
         self._cursor(cur).execute("SELECT swh_content_language_add(%s)",
                                   (conflict_update, ))
 
     def content_language_get_from_list(self, ids, cur=None):
         yield from self._get_from_list(
             'content_language', ids, self.content_language_cols, cur=cur)
 
     content_ctags_hash_keys = ['id', 'indexer_configuration_id']
 
     def content_ctags_missing_from_list(self, ctags, cur=None):
         """List missing ctags.
 
         """
         yield from self._missing_from_list(
             'content_ctags', ctags, self.content_ctags_hash_keys,
             cur=cur)
 
     content_ctags_cols = [
         'id', 'name', 'kind', 'line', 'lang',
         'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
 
     @stored_procedure('swh_mktemp_content_ctags')
     def mktemp_content_ctags(self, cur=None): pass
 
     def content_ctags_add_from_temp(self, conflict_update, cur=None):
         self._cursor(cur).execute("SELECT swh_content_ctags_add(%s)",
                                   (conflict_update, ))
 
     def content_ctags_get_from_list(self, ids, cur=None):
         cur = self._cursor(cur)
         keys = map(self._convert_key, self.content_ctags_cols)
         yield from execute_values_to_bytes(
             cur, """
             select %s
             from (values %%s) as t(id)
             inner join content_ctags c
                 on c.id=t.id
             inner join indexer_configuration i
                 on c.indexer_configuration_id=i.id
             order by line
             """ % ', '.join(keys),
             ((_id,) for _id in ids)
          )
 
     def content_ctags_search(self, expression, last_sha1, limit, cur=None):
         cur = self._cursor(cur)
         if not last_sha1:
             query = """SELECT %s
                        FROM swh_content_ctags_search(%%s, %%s)""" % (
                            ','.join(self.content_ctags_cols))
             cur.execute(query, (expression, limit))
         else:
             if last_sha1 and isinstance(last_sha1, bytes):
                 last_sha1 = '\\x%s' % hashutil.hash_to_hex(last_sha1)
             elif last_sha1:
                 last_sha1 = '\\x%s' % last_sha1
 
             query = """SELECT %s
                        FROM swh_content_ctags_search(%%s, %%s, %%s)""" % (
                            ','.join(self.content_ctags_cols))
             cur.execute(query, (expression, limit, last_sha1))
 
         yield from cursor_to_bytes(cur)
 
     content_fossology_license_cols = [
         'id', 'tool_id', 'tool_name', 'tool_version', 'tool_configuration',
         'licenses']
 
     @stored_procedure('swh_mktemp_content_fossology_license')
     def mktemp_content_fossology_license(self, cur=None): pass
 
     def content_fossology_license_add_from_temp(self, conflict_update,
                                                 cur=None):
         """Add new licenses per content.
 
         """
         self._cursor(cur).execute(
             "SELECT swh_content_fossology_license_add(%s)",
             (conflict_update, ))
 
     def content_fossology_license_get_from_list(self, ids, cur=None):
         """Retrieve licenses per id.
 
         """
         cur = self._cursor(cur)
         keys = map(self._convert_key, self.content_fossology_license_cols)
         yield from execute_values_to_bytes(
             cur, """
             select %s
             from (values %%s) as t(id)
             inner join content_fossology_license c on t.id=c.id
             inner join indexer_configuration i
                 on i.id=c.indexer_configuration_id
             group by c.id, i.id, i.tool_name, i.tool_version,
                      i.tool_configuration;
             """ % ', '.join(keys),
             ((_id,) for _id in ids)
         )
 
     content_metadata_hash_keys = ['id', 'indexer_configuration_id']
 
     def content_metadata_missing_from_list(self, metadata, cur=None):
         """List missing metadata.
 
         """
         yield from self._missing_from_list(
             'content_metadata', metadata, self.content_metadata_hash_keys,
             cur=cur)
 
     content_metadata_cols = [
         'id', 'translated_metadata',
         'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
 
     @stored_procedure('swh_mktemp_content_metadata')
     def mktemp_content_metadata(self, cur=None): pass
 
     def content_metadata_add_from_temp(self, conflict_update, cur=None):
         self._cursor(cur).execute("SELECT swh_content_metadata_add(%s)",
                                   (conflict_update, ))
 
     def content_metadata_get_from_list(self, ids, cur=None):
         yield from self._get_from_list(
             'content_metadata', ids, self.content_metadata_cols, cur=cur)
 
     revision_metadata_hash_keys = ['id', 'indexer_configuration_id']
 
     def revision_metadata_missing_from_list(self, metadata, cur=None):
         """List missing metadata.
 
         """
         yield from self._missing_from_list(
             'revision_metadata', metadata, self.revision_metadata_hash_keys,
             cur=cur)
 
     revision_metadata_cols = [
         'id', 'translated_metadata',
         'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
 
     @stored_procedure('swh_mktemp_revision_metadata')
     def mktemp_revision_metadata(self, cur=None): pass
 
     def revision_metadata_add_from_temp(self, conflict_update, cur=None):
         self._cursor(cur).execute("SELECT swh_revision_metadata_add(%s)",
                                   (conflict_update, ))
 
     def revision_metadata_get_from_list(self, ids, cur=None):
         yield from self._get_from_list(
             'revision_metadata', ids, self.revision_metadata_cols, cur=cur)
 
     origin_intrinsic_metadata_cols = [
         'origin_id', 'metadata', 'from_revision',
         'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
 
     origin_intrinsic_metadata_regconfig = 'pg_catalog.simple'
     """The dictionary used to normalize 'metadata' and queries.
     'pg_catalog.simple' provides no stopword, so it should be suitable
     for proper names and non-English content.
     When updating this value, make sure to add a new index on
     origin_intrinsic_metadata.metadata."""
 
     @stored_procedure('swh_mktemp_origin_intrinsic_metadata')
     def mktemp_origin_intrinsic_metadata(self, cur=None): pass
 
     def origin_intrinsic_metadata_add_from_temp(
             self, conflict_update, cur=None):
         cur = self._cursor(cur)
         cur.execute(
                 "SELECT swh_origin_intrinsic_metadata_add(%s)",
                 (conflict_update, ))
 
     def origin_intrinsic_metadata_get_from_list(self, orig_ids, cur=None):
         yield from self._get_from_list(
             'origin_intrinsic_metadata', orig_ids,
             self.origin_intrinsic_metadata_cols, cur=cur,
             id_col='origin_id')
 
     def origin_intrinsic_metadata_search_fulltext(self, terms, *, limit,
                                                   cur=None):
         regconfig = self.origin_intrinsic_metadata_regconfig
         tsquery_template = ' && '.join("plainto_tsquery('%s', %%s)" % regconfig
                                        for _ in terms)
         tsquery_args = [(term,) for term in terms]
         keys = map(self._convert_key, self.origin_intrinsic_metadata_cols)
         query = ("SELECT {keys} FROM origin_intrinsic_metadata AS oim "
                  "INNER JOIN indexer_configuration AS i "
                  "ON oim.indexer_configuration_id=i.id "
                  "JOIN LATERAL (SELECT {tsquery_template}) AS s(tsq) ON true "
                  "WHERE to_tsvector('{regconfig}', metadata) @@ tsq "
                  "ORDER BY ts_rank(oim.metadata_tsvector, tsq, 1) DESC "
                  "LIMIT %s;"
                  ).format(keys=', '.join(keys),
                           regconfig=regconfig,
                           tsquery_template=tsquery_template)
         cur.execute(query, tsquery_args + [limit])
         yield from cursor_to_bytes(cur)
 
     indexer_configuration_cols = ['id', 'tool_name', 'tool_version',
                                   'tool_configuration']
 
     @stored_procedure('swh_mktemp_indexer_configuration')
     def mktemp_indexer_configuration(self, cur=None):
         pass
 
     def indexer_configuration_add_from_temp(self, cur=None):
         cur = self._cursor(cur)
         cur.execute("SELECT %s from swh_indexer_configuration_add()" % (
             ','.join(self.indexer_configuration_cols), ))
         yield from cursor_to_bytes(cur)
 
     def indexer_configuration_get(self, tool_name,
                                   tool_version, tool_configuration, cur=None):
         cur = self._cursor(cur)
         cur.execute('''select %s
                        from indexer_configuration
                        where tool_name=%%s and
                              tool_version=%%s and
                              tool_configuration=%%s''' % (
                                  ','.join(self.indexer_configuration_cols)),
                     (tool_name, tool_version, tool_configuration))
 
         data = cur.fetchone()
         if not data:
             return None
         return line_to_bytes(data)
diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py
index 1398330..715ecb9 100644
--- a/swh/indexer/storage/in_memory.py
+++ b/swh/indexer/storage/in_memory.py
@@ -1,249 +1,249 @@
 # Copyright (C) 2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from collections import defaultdict
 import json
 
 
 class MetadataStorage:
     """Implements missing/get/add logic for both content_metadata and
     revision_metadata."""
     def __init__(self, tools):
         self._tools = tools
         self._metadata = {}  # map (id_, tool_id) -> metadata_dict
         self._tools_per_id = defaultdict(set)  # map id_ -> Set[tool_id]
 
     def _transform_tool(self, tool):
         return {
             'id': tool['id'],
             'name': tool['tool_name'],
             'version': tool['tool_version'],
             'configuration': tool['tool_configuration'],
         }
 
     def missing(self, ids):
         """List metadata missing from storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
                 - **id** (bytes): sha1 identifier
                 - **indexer_configuration_id** (int): tool used to compute
                   the results
 
         Yields:
             missing sha1s
 
         """
         for id_ in ids:
             tool_id = id_['indexer_configuration_id']
             id_ = id_['id']
             if tool_id not in self._tools_per_id.get(id_, set()):
                 yield id_
 
     def get(self, ids):
         """Retrieve metadata per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
-            dictionaries with the following keys:
+            dict: dictionaries with the following keys:
 
-                id (bytes)
-                translated_metadata (str): associated metadata
-                tool (dict): tool used to compute metadata
+              - **id** (bytes)
+              - **translated_metadata** (str): associated metadata
+              - **tool** (dict): tool used to compute metadata
 
         """
         for id_ in ids:
             for tool_id in self._tools_per_id.get(id_, set()):
                 key = (id_, tool_id)
                 yield {
                     'id': id_,
                     'tool': self._transform_tool(self._tools[tool_id]),
                     'translated_metadata': self._metadata[key],
                 }
 
     def add(self, metadata, conflict_update):
         """Add metadata not present in storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
-                - **id**: sha1
-                - **translated_metadata**: arbitrary dict
-                - **indexer_configuration_id**: tool used to compute the
-                                                results
+              - **id**: sha1
+              - **translated_metadata**: arbitrary dict
+              - **indexer_configuration_id**: tool used to compute the
+                results
 
-            conflict_update: Flag to determine if we want to overwrite (true)
-                or skip duplicates (false)
+            conflict_update (bool): Flag to determine if we want to overwrite
+              (true) or skip duplicates (false)
 
         """
         for item in metadata:
             tool_id = item['indexer_configuration_id']
             data = item['translated_metadata']
             id_ = item['id']
             if not conflict_update and \
                     tool_id in self._tools_per_id.get(id_, set()):
                 # Duplicate, should not be updated
                 continue
             key = (id_, tool_id)
             self._metadata[key] = data
             self._tools_per_id[id_].add(tool_id)
 
 
 class IndexerStorage:
     """In-memory SWH indexer storage."""
 
     def __init__(self):
         self._tools = {}
         self._content_metadata = MetadataStorage(self._tools)
         self._revision_metadata = MetadataStorage(self._tools)
 
     def content_metadata_missing(self, metadata):
         """List metadata missing from storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
-                - **id** (bytes): sha1 identifier
-                - **indexer_configuration_id** (int): tool used to compute
-                  the results
+              - **id** (bytes): sha1 identifier
+              - **indexer_configuration_id** (int): tool used to compute
+                the results
 
         Yields:
             missing sha1s
 
         """
         yield from self._content_metadata.missing(metadata)
 
     def content_metadata_get(self, ids):
         """Retrieve metadata per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             dictionaries with the following keys:
 
-                id (bytes)
-                translated_metadata (str): associated metadata
-                tool (dict): tool used to compute metadata
+              - **id** (bytes)
+              - **translated_metadata** (str): associated metadata
+              - **tool** (dict): tool used to compute metadata
 
         """
         yield from self._content_metadata.get(ids)
 
     def content_metadata_add(self, metadata, conflict_update=False):
         """Add metadata not present in storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
-                - **id**: sha1
-                - **translated_metadata**: arbitrary dict
-                - **indexer_configuration_id**: tool used to compute the
-                                                results
+              - **id**: sha1
+              - **translated_metadata**: arbitrary dict
+              - **indexer_configuration_id**: tool used to compute the
+                results
 
             conflict_update: Flag to determine if we want to overwrite (true)
                 or skip duplicates (false, the default)
 
         """
         self._content_metadata.add(metadata, conflict_update)
 
     def revision_metadata_missing(self, metadata):
         """List metadata missing from storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
-               - **id** (bytes): sha1_git revision identifier
-               - **indexer_configuration_id** (int): tool used to compute
-                 the results
+              - **id** (bytes): sha1_git revision identifier
+              - **indexer_configuration_id** (int): tool used to compute
+                the results
 
         Yields:
             missing ids
 
         """
         yield from self._revision_metadata.missing(metadata)
 
     def revision_metadata_get(self, ids):
         """Retrieve revision metadata per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             dictionaries with the following keys:
 
-                - **id** (bytes)
-                - **translated_metadata** (str): associated metadata
-                - **tool** (dict): tool used to compute metadata
+            - **id** (bytes)
+            - **translated_metadata** (str): associated metadata
+            - **tool** (dict): tool used to compute metadata
 
         """
         yield from self._revision_metadata.get(ids)
 
     def revision_metadata_add(self, metadata, conflict_update=False):
         """Add metadata not present in storage.
 
         Args:
             metadata (iterable): dictionaries with keys:
 
-                - **id**: sha1_git of revision
-                - **translated_metadata**: arbitrary dict
-                - **indexer_configuration_id**: tool used to compute metadata
+              - **id**: sha1_git of revision
+              - **translated_metadata**: arbitrary dict
+              - **indexer_configuration_id**: tool used to compute metadata
 
             conflict_update: Flag to determine if we want to overwrite (true)
               or skip duplicates (false, the default)
 
         """
         self._revision_metadata.add(metadata, conflict_update)
 
     def indexer_configuration_add(self, tools):
         """Add new tools to the storage.
 
         Args:
             tools ([dict]): List of dictionary representing tool to
-                insert in the db. Dictionary with the following keys:
+              insert in the db. Dictionary with the following keys:
 
-                - **tool_name** (str): tool's name
-                - **tool_version** (str): tool's version
-                - **tool_configuration** (dict): tool's configuration
-                  (free form dict)
+              - **tool_name** (str): tool's name
+              - **tool_version** (str): tool's version
+              - **tool_configuration** (dict): tool's configuration
+                (free form dict)
 
         Returns:
-            List of dict inserted in the db (holding the id key as
-            well).  The order of the list is not guaranteed to match
+            list: List of dict inserted in the db (holding the id key as
+            well). The order of the list is not guaranteed to match
             the order of the initial list.
 
         """
         inserted = []
         for tool in tools:
             tool = tool.copy()
             id_ = self._tool_key(tool)
             tool['id'] = id_
             self._tools[id_] = tool
             inserted.append(tool)
         return inserted
 
     def indexer_configuration_get(self, tool):
         """Retrieve tool information.
 
         Args:
             tool (dict): Dictionary representing a tool with the
-                following keys:
+              following keys:
 
-                - **tool_name** (str): tool's name
-                - **tool_version** (str): tool's version
-                - **tool_configuration** (dict): tool's configuration
-                  (free form dict)
+              - **tool_name** (str): tool's name
+              - **tool_version** (str): tool's version
+              - **tool_configuration** (dict): tool's configuration
+                (free form dict)
 
         Returns:
             The same dictionary with an `id` key, None otherwise.
 
         """
         return self._tools.get(self._tool_key(tool))
 
     def _tool_key(self, tool):
         return (tool['tool_name'], tool['tool_version'],
                 json.dumps(tool['tool_configuration'], sort_keys=True))