diff --git a/README b/README
index b8c009c..5c16501 100644
--- a/README
+++ b/README
@@ -1,79 +1,82 @@
 swh-indexer
 ===========
 
 Tools to compute multiple indexes on SWH's raw contents:
-- mimetype
-- ctags
-- language
-- fossology-license
-- metadata
+- content:
+  - mimetype
+  - ctags
+  - language
+  - fossology-license
+  - metadata
+- revision:
+  - metadata
 
 # Context
 
 SWH has currently stored around 3B contents.  The table `content`
 holds their checksums.
 
 Those contents are physically stored in an object storage (using
 disks) and replicated in another. Those object storages are not
 destined for reading yet.
 
 We are in the process to copy those contents over to azure's blob
 storages.  As such, we will use that opportunity to trigger the
 computations on these contents once those have been copied over.
 
 
 # Workers
 
 There exists 2 kinds:
 - orchestrators (orchestrator, orchestrator-text)
 - indexer (mimetype, language, ctags, fossology-license)
 
 ## Orchestrator
 
 Orchestrators:
 - receive batch of sha1s
 - split those batches
 - broadcast those to indexers
 
 There are 2 sorts:
 
 - orchestrator (swh_indexer_orchestrator_content_all): Receives and
   broadcast sha1 ids (of contents) to indexers (currently only the
   mimetype indexer)
 
 - orchestrator-text (swh_indexer_orchestrator_content_text): Receives
   batch of sha1 ids (of textual contents) and broadcast those to
   indexers (currently language, ctags, and fossology-license
   indexers).
 
 
 ## Indexers
 
 Indexers:
 - receive batch of ids
 - retrieve the associated data depending on object type
 - compute for that object some index
 - store the result to swh's storage
 - (and possibly do some broadcast itself)
 
 Current content indexers:
 
 - mimetype (queue swh_indexer_content_mimetype): compute the mimetype,
   filter out the textual contents and broadcast the list to the
   orchestrator-text
 
 - language (queue swh_indexer_content_language): detect the programming language
 
 - ctags (queue swh_indexer_content_ctags): try and compute tags
   information
 
 - fossology-license (queue swh_indexer_fossology_license): try and
   compute the license
 
 - metadata : translate file into translated_metadata dict
 
 Current revision indexers:
 
 - metadata: detects files containing metadata and retrieves translated_metadata
   in content_metadata table in storage or run content indexer to translate
   files.
diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py
index b18f954..78e58e0 100644
--- a/swh/indexer/indexer.py
+++ b/swh/indexer/indexer.py
@@ -1,385 +1,384 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import abc
 import os
 import logging
 import shutil
 import tempfile
 
 from swh.core.config import SWHConfig
 from swh.objstorage import get_objstorage
 from swh.objstorage.exc import ObjNotFoundError
 from swh.model import hashutil
 from swh.storage import get_storage
 from swh.scheduler.utils import get_task
 
 
 class DiskIndexer:
     """Mixin intended to be used with other *Indexer classes.
 
        Indexer* inheriting from this class are a category of indexers
        which needs the disk for their computations.
 
        Expects:
            self.working_directory variable defined at runtime.
 
     """
     def __init__(self):
         super().__init__()
 
     def write_to_temp(self, filename, data):
         """Write the sha1's content in a temporary file.
 
         Args:
             sha1 (str): the sha1 name
             filename (str): one of sha1's many filenames
             data (bytes): the sha1's content to write in temporary
             file
 
         Returns:
             The path to the temporary file created. That file is
             filled in with the raw content's data.
 
         """
         os.makedirs(self.working_directory, exist_ok=True)
         temp_dir = tempfile.mkdtemp(dir=self.working_directory)
         content_path = os.path.join(temp_dir, filename)
 
         with open(content_path, 'wb') as f:
             f.write(data)
 
         return content_path
 
     def cleanup(self, content_path):
         """Remove content_path from working directory.
 
         Args:
             content_path (str): the file to remove
 
         """
         temp_dir = os.path.dirname(content_path)
         shutil.rmtree(temp_dir)
 
 
 class BaseIndexer(SWHConfig,
                   metaclass=abc.ABCMeta):
     """Base class for indexers to inherit from.
 
     The main entry point is the `run` functions which is in charge to
     trigger the computations on the ids batch received.
 
     Indexers can:
     - filter out ids whose data has already been indexed.
     - retrieve ids data from storage or objstorage
     - index this data depending on the object and store the result in storage.
 
     To implement a new object type indexer, inherit from the BaseIndexer and
     implement the process of indexation :
 
         - def run(self, object_ids, policy_update): object_ids are different
         depending on object. For example: sha1 for content, sha1_git for
-        revision, directorie, release, and id for origin
+        revision, directory, release, and id for origin
 
     To implement a new concrete indexer, inherit from the object level classes:
     ContentIndexer, RevisionIndexer
     (later on OriginIndexer will also be available)
 
     Then you need to implement the following functions:
 
       - def filter(self, ids): filter out data already
         indexed (in storage). This function is used by the
         orchestrator and not directly by the indexer
         (cf. swh.indexer.orchestrator.BaseOrchestratorIndexer).
 
       - def index_object(self, id, data): compute index on
         id with data (retrieved from the storage or the objstorage by the
         id key) and return the resulting index computation.
 
       - def persist_index_computations(self, results, policy_update):
         persist the results of multiple index computations in the
         storage.
 
     The new indexer implementation can also override the following functions:
 
       - def prepare(self): Configuration preparation for the indexer.
         When overriding, this must call the super().prepare() function.
 
       - def check(self): Configuration check for the indexer.
         When overriding, this must call the super().check() function.
 
       - def retrieve_tools_information(self): This should return a
         dict of the tool(s) to use when indexing or filtering.
 
     """
     CONFIG = 'indexer/base'
 
     DEFAULT_CONFIG = {
         'storage': ('dict', {
             'host': 'uffizi',
             'cls': 'remote',
             'args': {'root': '/tmp/softwareheritage/objects',
                      'slicing': '0:2/2:4/4:6'}
         }),
         # queue to reschedule if problem (none for no rescheduling,
         # the default)
         'rescheduling_task': ('str', None),
         'objstorage': ('dict', {
             'cls': 'multiplexer',
             'args': {
                 'objstorages': [{
                     'cls': 'filtered',
                     'args': {
                         'storage_conf': {
                             'cls': 'azure-storage',
                             'args': {
                                 'account_name': '0euwestswh',
                                 'api_secret_key': 'secret',
                                 'container_name': 'contents'
                             }
                         },
                         'filters_conf': [
                             {'type': 'readonly'},
                             {'type': 'prefix', 'prefix': '0'}
                         ]
                     }
                 }, {
                     'cls': 'filtered',
                     'args': {
                         'storage_conf': {
                             'cls': 'azure-storage',
                             'args': {
                                 'account_name': '1euwestswh',
                                 'api_secret_key': 'secret',
                                 'container_name': 'contents'
                             }
                         },
                         'filters_conf': [
                             {'type': 'readonly'},
                             {'type': 'prefix', 'prefix': '1'}
                         ]
                     }
                 }]
             },
         }),
     }
 
     ADDITIONAL_CONFIG = {}
 
     def __init__(self):
         """Prepare and check that the indexer is ready to run.
 
         """
         super().__init__()
         self.prepare()
         self.check()
 
     def prepare(self):
         """Prepare the indexer's needed runtime configuration.
            Without this step, the indexer cannot possibly run.
 
         """
         self.config = self.parse_config_file(
             additional_configs=[self.ADDITIONAL_CONFIG])
         objstorage = self.config['objstorage']
         self.objstorage = get_objstorage(objstorage['cls'], objstorage['args'])
         storage = self.config['storage']
         self.storage = get_storage(storage['cls'], storage['args'])
         rescheduling_task = self.config['rescheduling_task']
         if rescheduling_task:
             self.rescheduling_task = get_task(rescheduling_task)
         else:
             self.rescheduling_task = None
 
         l = logging.getLogger('requests.packages.urllib3.connectionpool')
         l.setLevel(logging.WARN)
         self.log = logging.getLogger('swh.indexer')
         self.tools = self.retrieve_tools_information()
 
     def check(self):
         """Check the indexer's configuration is ok before proceeding.
            If ok, does nothing. If not raise error.
 
         """
         if not self.tools:
             raise ValueError('Tools %s is unknown, cannot continue' %
                              self.config['tools'])
 
     def retrieve_tools_information(self):
         """Permit to define how to retrieve tool information based on
            configuration.
 
            Add a sensible default which can be overridden if not
            sufficient.  (For now, all indexers use only one tool)
 
         """
         tool = {
             'tool_%s' % key: value for key, value
             in self.config['tools'].items()
         }
         return self.storage.indexer_configuration_get(tool)
 
     @abc.abstractmethod
     def filter(self, ids):
         """Filter missing ids for that particular indexer.
 
         Args:
             ids ([bytes]): list of ids
 
         Yields:
             iterator of missing ids
 
         """
         pass
 
     @abc.abstractmethod
     def index(self, id, data):
         """Index computation for the sha1 and associated raw content.
 
         Args:
             id (bytes): sha1 identifier
             content (bytes): id's data from storage or objstorage depending on
                              object type
 
         Returns:
             a dict that makes sense for the persist_index_computations
         function.
 
         """
         pass
 
     @abc.abstractmethod
     def persist_index_computations(self, results, policy_update):
         """Persist the computation resulting from the index.
 
         Args:
             results ([result]): List of results. One result is the
             result of the index function.
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         Returns:
             None
 
         """
         pass
 
     def next_step(self, results):
         """Do something else with computations results (e.g. send to another
         queue, ...).
 
         (This is not an abstractmethod since it is optional).
 
         Args:
             results ([result]): List of results (dict) as returned
             by index function.
 
         Returns:
             None
 
         """
         pass
 
     @abc.abstractmethod
     def run(self, ids, policy_update):
         """Given a list of ids:
         - retrieves the data from the storage
         - executes the indexing computations
         - stores the results (according to policy_update)
 
         Args:
             ids ([bytes]): id's identifier list
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         pass
 
 
 class ContentIndexer(BaseIndexer):
     """
-    An object type indexer, inherit from the BaseIndexer and
-    implement the process of indexation for Contents with the run method
+    An object type indexer, inherits from the BaseIndexer and
+    implements the process of indexation for Contents using the run method
 
     Note: the ContentIndexer is not an instantiable object
-    to use it in another context one should refer to the instructions in the
-    BaseIndexer
+    to use it in another context one should inherit from this class and
+    override the methods mentioned in the BaseIndexer class
     """
 
     def run(self, sha1s, policy_update):
         """Given a list of sha1s:
         - retrieve the content from the storage
         - execute the indexing computations
         - store the results (according to policy_update)
 
         Args:
             sha1s ([bytes]): sha1's identifier list
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         results = []
         try:
             for sha1 in sha1s:
                 try:
                     raw_content = self.objstorage.get(sha1)
                 except ObjNotFoundError:
                     self.log.warn('Content %s not found in objstorage' %
                                   hashutil.hash_to_hex(sha1))
                     continue
                 res = self.index(sha1, raw_content)
                 if res:  # If no results, skip it
                     results.append(res)
 
             self.persist_index_computations(results, policy_update)
             self.next_step(results)
         except Exception:
             self.log.exception(
                 'Problem when reading contents metadata.')
             if self.rescheduling_task:
                 self.log.warn('Rescheduling batch')
                 self.rescheduling_task.delay(sha1s, policy_update)
 
 
 class RevisionIndexer(BaseIndexer):
     """
-    An object type indexer, inherit from the BaseIndexer and
-    implement the process of indexation for Revisions with the run method
+    An object type indexer, inherits from the BaseIndexer and
+    implements the process of indexation for Revisions using the run method
 
     Note: the RevisionIndexer is not an instantiable object
-    to use it in another context one should refer to the instructions in the
-    BaseIndexer
+    to use it in another context one should inherit from this class and
+    override the methods mentioned in the BaseIndexer class
     """
 
     def run(self, sha1_gits, policy_update):
         """
         Given a list of sha1_gits:
         - retrieve revsions from storage
         - execute the indexing computations
         - store the results (according to policy_update)
         Args:
             sha1_gits ([bytes]): sha1_git's identifier list
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         results = []
-        try:
-            for sha1_git in sha1_gits:
-                try:
-                    revs = self.storage.revision_get([sha1_git])
-                except ValueError:
-                    self.log.warn('Revision %s not found in storage' %
-                                  hashutil.hash_to_hex(sha1_git))
-                    continue
-                for rev in revs:
-                    if rev:      # If no revision, skip it
-                        res = self.index(rev)
-                        if res:  # If no results, skip it
-                            results.append(res)
-                self.persist_index_computations(results, policy_update)
-        except Exception:
-            self.log.exception(
-                'Problem when processing revision')
+        revs = self.storage.revision_get(sha1_gits)
+
+        for rev in revs:
+            if not rev:
+                self.log.warn('Revision %s not found in storage' %
+                              hashutil.hash_to_hex(sha1_gits))
+                continue
+            try:
+                res = self.index(rev)
+                if res:  # If no results, skip it
+                    results.append(res)
+            except Exception:
+                self.log.exception(
+                        'Problem when processing revision')
+        self.persist_index_computations(results, policy_update)
+        print(results)
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
index 2491c0d..d5038cd 100644
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -1,262 +1,260 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from swh.indexer.indexer import ContentIndexer, RevisionIndexer
 from swh.indexer.metadata_dictionary import compute_metadata
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata_detector import extract_minimal_metadata_dict
 
 from swh.model import hashutil
 
 
 class ContentMetadataIndexer(ContentIndexer):
     """Indexer at content level in charge of:
     - filtering out content already indexed in content_metadata
     - reading content from objstorage with the content's id sha1
     - computing translated_metadata by given context
     - using the metadata_dictionary as the 'swh-metadata-translator' tool
     - store result in content_metadata table
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
     def __init__(self, tool):
         self.tool = tool
         super().__init__()
 
     def prepare(self):
         super().prepare()
         self.results = []
 
     def retrieve_tools_information(self):
         self.config['tools'] = self.tool
         return super().retrieve_tools_information()
 
     def filter(self, sha1s):
         """Filter out known sha1s and return only missing ones.
         """
         yield from self.storage.content_metadata_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tools['id'],
             } for sha1 in sha1s
         ))
 
     def index(self, sha1, raw_content):
         """Index sha1s' content and store result.
 
         Args:
             sha1 (bytes): content's identifier
             raw_content (bytes): raw content in bytes
 
         Returns:
             result (dict): representing a content_metadata
             if translation wasn't successful the translated_metadata keys
             will be kept as None
 
         """
         result = {
             'id': sha1,
             'indexer_configuration_id': self.tools['id'],
             'translated_metadata': None
         }
         try:
             context = self.tools['tool_configuration']['context']
             result['translated_metadata'] = compute_metadata(
                                             context, raw_content)
             # a twisted way to keep result with indexer object for get_results
             self.results.append(result)
         except:
             self.log.exception(
                 "Problem during tool retrieval of metadata translation")
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_metadata, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - translated_metadata (jsonb): detected metadata
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.storage.content_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def get_results(self):
         """
         can be called only if run method was called before
 
         Returns:
             results (list): list of content_metadata entries calculated
             by current indxer
         """
         return self.results
 
 
 class RevisionMetadataIndexer(RevisionIndexer):
     """Indexer at Revision level in charge of:
     - filtering revisions already indexed in revision_metadata table with
       defined computation tool
     - retrieve all entry_files in root directory
     - use metadata_detector for file_names containig metadata
     - compute metadata translation if necessary and possible (depends on tool)
     - send sha1s to content indexing if possible
     - store the results for revision
 
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
     ADDITIONAL_CONFIG = {
         'tools': ('dict', {
             'name': 'swh-metadata-detector',
             'version': '0.0.1',
             'configuration': {
                 'type': 'local',
                 'context': ['npm', 'codemeta']
             },
         }),
     }
 
     def prepare(self):
         super().prepare()
 
     def filter(self, sha1_gits):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.storage.revision_metadata_missing((
             {
                 'id': sha1_git,
                 'indexer_configuration_id': self.tools['id'],
             } for sha1_git in sha1_gits
         ))
 
     def index(self, rev):
         """Index rev by processing it and organizing result.
            use metadata_detector to iterate on filenames
            - if one filename detected -> sends file to content indexer
            - if multiple file detected -> translation needed at revision level
 
             Args:
                 rev (bytes): revision artifact from storage
 
             Returns:
                 A dict, representing a revision_metadata, with keys:
                   - id (bytes): rev's identifier (sha1_git)
                   - indexer_configuration_id (bytes): tool used
                   - translated_metadata (bytes): dict of retrieved metadata
 
         """
         try:
             result = {
                 'id': rev['id'],
                 'indexer_configuration_id': self.tools['id'],
                 'translated_metadata': None
             }
 
             root_dir = rev['directory']
             dir_ls = self.storage.directory_ls(root_dir, recursive=False)
             files = (entry for entry in dir_ls if entry['type'] == 'file')
             detected_files = detect_metadata(files)
             result['translated_metadata'] = self.translate_revision_metadata(
                                                                 detected_files)
         except Exception as e:
             self.log.exception(
                 'Problem when indexing rev')
             print(e)
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         # TODO: add functions in storage to keep data in revision_metadata
-        # self.storage.reivision_metadata_add(
-        #    results, conflict_update=(policy_update == 'update-dups'))
-        pass
+        self.storage.revision_metadata_add(
+            results, conflict_update=(policy_update == 'update-dups'))
 
     def translate_revision_metadata(self, detected_files):
         """
         Determine plan of action to translate metadata when containing
         one or multiple detected files:
         Args:
             - detected_files : dict with context name and list of sha1s
             (e.g : {'npm' : [sha1_1, sha1_2],
                      'authors': sha1_3})
 
         Returns:
             - translated_metadata: dict with the CodeMeta vocabulary
         """
         translated_metadata = []
         tool = {
                 'name': 'swh-metadata-translator',
                 'version': '0.0.1',
                 'configuration': {
                     'type': 'local',
                     'context': None
                 },
             }
         # TODO: iterate on each context, on each file
         # -> get raw_contents
         # -> translate each content
         for context in detected_files.keys():
             tool['configuration']['context'] = context
             for sha1 in detected_files[context]:
                 local_metadata = {}
                 # fetch content_metadata from storage
                 metadata_generator = self.storage.content_metadata_get([sha1])
                 metadata_generated = False
                 for c in metadata_generator:
-                    # print(c)
                     metadata_generated = True
                     # extracting translated_metadata
                     local_metadata = c['translated_metadata']
                 if not metadata_generated:
                     # schedule indexation of content
                     try:
                         c_metadata_indexer = ContentMetadataIndexer(tool)
                         c_metadata_indexer.run([sha1],
                                                policy_update='ignore-dups')
                         local_metadata = c_metadata_indexer.get_results()
                     except Exception as e:
                         self.log.warn("""indexing Content %s with
                                         ContentMetadataIndexer raises
                                         exeception""" %
                                       hashutil.hash_to_hex(sha1))
                         print(e)
                 # local metadata is aggregated
                 if local_metadata:
                     translated_metadata.append(local_metadata)
         # transform translated_metadata into min set with swh-metadata-detector
         min_metadata = extract_minimal_metadata_dict(translated_metadata)
         return min_metadata
 
 
 def main():
     rev_metadata_indexer = RevisionMetadataIndexer()
     sha1_git1 = hashutil.hash_to_bytes(
                                     '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
     sha1_git2 = hashutil.hash_to_bytes(
                                     '026040ea79dec1b49b4e3e7beda9132b6b26b51b')
     sha1_git3 = hashutil.hash_to_bytes(
                                     '9699072e21eded4be8d45e3b8d543952533fa190')
     sha1_gits = [sha1_git1, sha1_git2, sha1_git3]
     rev_metadata_indexer.run(sha1_gits, 'update-dups')
 
 
 if __name__ == '__main__':
     main()
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
index a8d3bbe..4594bb1 100644
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -1,244 +1,291 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
 import logging
 from nose.tools import istest
 
 from swh.indexer.metadata_dictionary import compute_metadata
+from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata import ContentMetadataIndexer
 from swh.indexer.metadata import RevisionMetadataIndexer
 from swh.indexer.tests.test_utils import MockObjStorage
-
-
-class MockStorage():
-    """Mock storage to simplify reading indexers' outputs.
-    """
-    def content_metadata_get(self, sha1s):
-        yield
-
-    def content_metadata_add(self, metadata, conflict_update=None):
-        self.state = metadata
-        self.conflict_update = conflict_update
-
-    def revision_metadata_add(self, metadata, conflict_update=None):
-        self.state = metadata
-        self.conflict_update = conflict_update
-
-    def indexer_configuration_get(self, tool):
-        if tool['tool_name'] == 'swh-metadata-translator':
-            return {
-                'id': 30,
-                'tool_name': 'swh-metadata-translator',
-                'tool_version': '0.0.1',
-                'tool_configuration': {
-                    'type': 'local',
-                    'context': 'npm'
-                },
-            }
-        elif tool['tool_name'] == 'swh-metadata-detector':
-            return {
-                'id': 7,
-                'tool_name': 'swh-metadata-detector',
-                'tool_version': '0.0.1',
-                'tool_configuration': {
-                    'type': 'local',
-                    'context': 'npm'
-                },
-            }
+from swh.indexer.tests.test_utils import MockStorage
 
 
 class TestContentMetadataIndexer(ContentMetadataIndexer):
     """Specific Metadata whose configuration is enough to satisfy the
        indexing tests.
     """
     def prepare(self):
         self.config = {
             'rescheduling_task': None,
         }
         self.storage = MockStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.task_destination = None
         self.rescheduling_task = self.config['rescheduling_task']
         self.tools = self.retrieve_tools_information()
         self.results = []
 
 
 class TestRevisionMetadataIndexer(RevisionMetadataIndexer):
     """Specific indexer whose configuration is enough to satisfy the
        indexing tests.
     """
     def prepare(self):
         self.config = {
             'rescheduling_task': None,
+            'tools': {
+                'name': 'swh-metadata-detector',
+                'version': '0.0.1',
+                'configuration': {
+                    'type': 'local',
+                    'context': 'npm'
+                }
+            }
         }
         self.storage = MockStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.task_destination = None
         self.rescheduling_task = self.config['rescheduling_task']
         self.tools = self.retrieve_tools_information()
         self.results = []
 
 
 class Metadata(unittest.TestCase):
     """
     Tests metadata_mock_tool tool for Metadata detection
     """
     def setUp(self):
         """
         shows the entire diff in the results
         """
         self.maxDiff = None
         self.content_tool = {
             'name': 'swh-metadata-translator',
             'version': '0.0.1',
             'configuration': {
                 'type': 'local',
                 'context': 'npm'
             }
         }
-        self.revision_tool = {
-            'name': 'swh-metadata-detector',
-            'version': '0.0.1',
-            'configuration': {
-                'type': 'local',
-                'context': 'npm'
-            }
-        }
 
     @istest
     def test_compute_metadata_none(self):
         """
         testing content empty content is empty
         should return None
         """
         # given
         content = b""
         context = "npm"
 
         # None if no metadata was found or an error occurred
         declared_metadata = None
         # when
         result = compute_metadata(context, content)
         # then
         self.assertEqual(declared_metadata, result)
 
     @istest
     def test_compute_metadata_npm(self):
         """
         testing only computation of metadata with hard_mapping_npm
         """
         # given
         content = b"""
             {
                 "name": "test_metadata",
                 "version": "0.0.1",
                 "description": "Simple package.json test for indexer",
                   "repository": {
                     "type": "git",
                     "url": "https://github.com/moranegg/metadata_test"
                 }
             }
         """
         declared_metadata = {
             'name': 'test_metadata',
             'version': '0.0.1',
             'description': 'Simple package.json test for indexer',
             'codeRepository': {
                 'type': 'git',
                 'url': 'https://github.com/moranegg/metadata_test'
               },
             'other': {}
         }
 
         # when
         result = compute_metadata("npm", content)
         # then
         self.assertEqual(declared_metadata, result)
 
     @istest
     def test_index_content_metadata_npm(self):
         """
         testing NPM with package.json
         - one sha1 uses a file that can't be translated to metadata and
           should return None in the translated metadata
         """
         # given
         sha1s = ['26a9f72a7c87cc9205725cfd879f514ff4f3d8d5',
                  'd4c647f0fc257591cc9ba1722484229780d1c607',
                  '02fb2c89e14f7fab46701478c83779c7beb7b069']
         # this metadata indexer computes only metadata for package.json
         # in npm context with a hard mapping
         metadata_indexer = TestContentMetadataIndexer(self.content_tool)
 
         # when
         metadata_indexer.run(sha1s, policy_update='ignore-dups')
         results = metadata_indexer.storage.state
 
         expected_results = [{
             'indexer_configuration_id': 30,
             'translated_metadata': {
                 'other': {},
                 'codeRepository': {
                     'type': 'git',
                     'url': 'https://github.com/moranegg/metadata_test'
                 },
                 'description': 'Simple package.json test for indexer',
                 'name': 'test_metadata',
                 'version': '0.0.1'
             },
             'id': '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5'
             }, {
             'indexer_configuration_id': 30,
             'translated_metadata': {
                 'softwareRequirements': {
                         'JSONStream': '~1.3.1',
                         'abbrev': '~1.1.0',
                         'ansi-regex': '~2.1.1',
                         'ansicolors': '~0.3.2',
                         'ansistyles': '~0.1.3'
                 },
                 'issueTracker': {
                     'url': 'https://github.com/npm/npm/issues'
                 },
                 'author':
                     'Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)',
                 'codeRepository': {
                     'type': 'git',
                     'url': 'https://github.com/npm/npm'
                 },
                 'description': 'a package manager for JavaScript',
                 'softwareSuggestions': {
                         'tacks': '~1.2.6',
                         'tap': '~10.3.2'
                 },
                 'license': 'Artistic-2.0',
                 'version': '5.0.3',
                 'other': {
                     'preferGlobal': True,
                     'config': {
                         'publishtest': False
                     }
                 },
                 'name': 'npm',
                 'keywords': [
                     'install',
                     'modules',
                     'package manager',
                     'package.json'
                 ],
                 'url': 'https://docs.npmjs.com/'
             },
             'id': 'd4c647f0fc257591cc9ba1722484229780d1c607'
             }, {
             'indexer_configuration_id': 30,
             'translated_metadata': None,
             'id': '02fb2c89e14f7fab46701478c83779c7beb7b069'
         }]
 
         # The assertion bellow returns False sometimes because of nested lists
         self.assertEqual(expected_results, results)
+
+    @istest
+    def test_detect_metadata_package_json(self):
+        # given
+        df = [{
+                'sha1_git': b'abc',
+                'name': b'index.js',
+                'target': b'abc',
+                'length': 897,
+                'status': 'visible',
+                'type': 'file',
+                'perms': 33188,
+                'dir_id': b'dir_a',
+                'sha1': b'bcd'
+            },
+            {
+                'sha1_git': b'aab',
+                'name': b'package.json',
+                'target': b'aab',
+                'length': 712,
+                'status': 'visible',
+                'type': 'file',
+                'perms': 33188,
+                'dir_id': b'dir_a',
+                'sha1': b'cde'
+        }]
+        # when
+        results = detect_metadata(df)
+
+        expected_results = {
+            'npm': [
+                b'cde'
+            ]
+        }
+        # then
+        self.assertEqual(expected_results, results)
+
+    @istest
+    def test_revision_metadata_indexer(self):
+        metadata_indexer = TestRevisionMetadataIndexer()
+
+        sha1_gits = [
+            b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
+        ]
+        metadata_indexer.run(sha1_gits, 'update-dups')
+
+        results = metadata_indexer.storage.state
+
+        expected_results = [{
+            'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
+            'translated_metadata': {
+                'identifier': None,
+                'maintainer': None,
+                'url': [
+                    'https://github.com/librariesio/yarn-parser#readme'
+                ],
+                'author': ['Andrew Nesbitt'],
+                'license': ['AGPL-3.0'],
+                'version': ['1.0.0'],
+                'description': [
+                    'Tiny web service for parsing yarn.lock files'
+                ],
+                'relatedLink': None,
+                'developmentStatus': None,
+                'operatingSystem': None,
+                'issueTracker': [{
+                    'url': 'https://github.com/librariesio/yarn-parser/issues'
+                }],
+                'softwareRequirements': [{
+                    'express': '^4.14.0',
+                    'yarn': '^0.21.0',
+                    'body-parser': '^1.15.2'
+                }],
+                'name': ['yarn-parser'],
+                'keywords': [['yarn', 'parse', 'lock', 'dependencies']],
+                'type': None,
+                'email': None
+            },
+            'indexer_configuration_id': 7
+        }]
+        # then
+        self.assertEqual(expected_results, results)
diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py
index aea186a..b6b340f 100644
--- a/swh/indexer/tests/test_utils.py
+++ b/swh/indexer/tests/test_utils.py
@@ -1,113 +1,246 @@
 
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from swh.objstorage.exc import ObjNotFoundError
 
 
 class MockStorageWrongConfiguration():
     def indexer_configuration_get(self, tool):
         return None
 
 
 class MockObjStorage():
     """Mock objstorage with predefined contents.
 
     """
     def __init__(self):
         self.data = {
             '01c9379dfc33803963d07c1ccc748d3fe4c96bb50': b'this is some text',
             '688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text',
             '8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text',
             '02fb2c89e14f7fab46701478c83779c7beb7b069': b"""
             import unittest
             import logging
             from nose.tools import istest
             from swh.indexer.mimetype import ContentMimetypeIndexer
             from swh.indexer.tests.test_utils import MockObjStorage
 
             class MockStorage():
                 def content_mimetype_add(self, mimetypes):
                     self.state = mimetypes
                     self.conflict_update = conflict_update
 
                 def indexer_configuration_get(self, tool):
                     return {
                         'id': 10,
                     }
             """,
             '103bc087db1d26afc3a0283f38663d081e9b01e6': b"""
                 #ifndef __AVL__
                 #define __AVL__
 
                 typedef struct _avl_tree avl_tree;
 
                 typedef struct _data_t {
                   int content;
                 } data_t;
             """,
             '93666f74f1cf635c8c8ac118879da6ec5623c410': b"""
             (should 'pygments (recognize 'lisp 'easily))
 
             """,
             '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5': b"""
             {
                 "name": "test_metadata",
                 "version": "0.0.1",
                 "description": "Simple package.json test for indexer",
                 "repository": {
                   "type": "git",
                   "url": "https://github.com/moranegg/metadata_test"
               }
             }
             """,
             'd4c647f0fc257591cc9ba1722484229780d1c607': b"""
             {
               "version": "5.0.3",
               "name": "npm",
               "description": "a package manager for JavaScript",
               "keywords": [
                 "install",
                 "modules",
                 "package manager",
                 "package.json"
               ],
               "preferGlobal": true,
               "config": {
                 "publishtest": false
               },
               "homepage": "https://docs.npmjs.com/",
               "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
               "repository": {
                 "type": "git",
                 "url": "https://github.com/npm/npm"
               },
               "bugs": {
                 "url": "https://github.com/npm/npm/issues"
               },
               "dependencies": {
                 "JSONStream": "~1.3.1",
                 "abbrev": "~1.1.0",
                 "ansi-regex": "~2.1.1",
                 "ansicolors": "~0.3.2",
                 "ansistyles": "~0.1.3"
               },
               "devDependencies": {
                 "tacks": "~1.2.6",
                 "tap": "~10.3.2"
               },
               "license": "Artistic-2.0"
             }
 
             """,
             'a7ab314d8a11d2c93e3dcf528ca294e7b431c449': b"""
             """
         }
 
     def get(self, sha1):
         raw_content = self.data.get(sha1)
         if not raw_content:
             raise ObjNotFoundError()
         return raw_content
+
+
+class MockStorage():
+    """Mock storage to simplify reading indexers' outputs.
+    """
+    def content_metadata_add(self, metadata, conflict_update=None):
+        self.state = metadata
+        self.conflict_update = conflict_update
+
+    def revision_metadata_add(self, metadata, conflict_update=None):
+        self.state = metadata
+        self.conflict_update = conflict_update
+
+    def indexer_configuration_get(self, tool):
+        if tool['tool_name'] == 'swh-metadata-translator':
+            return {
+                'id': 30,
+                'tool_name': 'swh-metadata-translator',
+                'tool_version': '0.0.1',
+                'tool_configuration': {
+                    'type': 'local',
+                    'context': 'npm'
+                },
+            }
+        elif tool['tool_name'] == 'swh-metadata-detector':
+            return {
+                'id': 7,
+                'tool_name': 'swh-metadata-detector',
+                'tool_version': '0.0.1',
+                'tool_configuration': {
+                    'type': 'local',
+                    'context': 'npm'
+                },
+            }
+
+    def revision_get(self, revisions):
+        return [{
+            'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
+            'committer': {
+                'id': 26,
+                'name': b'Andrew Nesbitt',
+                'fullname': b'Andrew Nesbitt <andrewnez@gmail.com>',
+                'email': b'andrewnez@gmail.com'
+            },
+            'synthetic': False,
+            'date': {
+                'negative_utc': False,
+                'timestamp': {
+                    'seconds': 1487596456,
+                    'microseconds': 0
+                },
+                'offset': 0
+            },
+            'directory': b'10'
+        }]
+
+    def directory_ls(self, directory, recursive=False, cur=None):
+        # with directory: b'\x9d',
+        return [{
+                'sha1_git': b'abc',
+                'name': b'index.js',
+                'target': b'abc',
+                'length': 897,
+                'status': 'visible',
+                'type': 'file',
+                'perms': 33188,
+                'dir_id': b'10',
+                'sha1': b'bcd'
+                },
+                {
+                'sha1_git': b'aab',
+                'name': b'package.json',
+                'target': b'aab',
+                'length': 712,
+                'status': 'visible',
+                'type': 'file',
+                'perms': 33188,
+                'dir_id': b'10',
+                'sha1': b'cde'
+                },
+                {
+                'dir_id': b'10',
+                'target': b'11',
+                'type': 'dir',
+                'length': None,
+                'name': b'.github',
+                'sha1': None,
+                'perms': 16384,
+                'sha1_git': None,
+                'status': None,
+                'sha256': None
+                }]
+
+    def content_metadata_get(self, sha1s):
+        return [{
+            'tool': {
+                'configuration': {
+                    'type': 'local',
+                    'context': 'npm'
+                    },
+                'version': '0.0.1',
+                'id': 6,
+                'name': 'swh-metadata-translator'
+            },
+            'id': b'cde',
+            'translated_metadata': {
+                'issueTracker': {
+                    'url': 'https://github.com/librariesio/yarn-parser/issues'
+                },
+                'version': '1.0.0',
+                'name': 'yarn-parser',
+                'author': 'Andrew Nesbitt',
+                'url': 'https://github.com/librariesio/yarn-parser#readme',
+                'processorRequirements': {'node': '7.5'},
+                'other': {
+                    'scripts': {
+                                    'start': 'node index.js'
+                    },
+                    'main': 'index.js'
+                },
+                'license': 'AGPL-3.0',
+                'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
+                'codeRepository': {
+                    'type': 'git',
+                    'url': 'git+https://github.com/librariesio/yarn-parser.git'
+                },
+                'description': 'Tiny web service for parsing yarn.lock files',
+                'softwareRequirements': {
+                    'yarn': '^0.21.0',
+                    'express': '^4.14.0',
+                    'body-parser': '^1.15.2'}
+                }
+        }]