diff --git a/README b/README
--- a/README
+++ b/README
@@ -2,11 +2,14 @@
 ===========
 
 Tools to compute multiple indexes on SWH's raw contents:
-- mimetype
-- ctags
-- language
-- fossology-license
-
+- content:
+  - mimetype
+  - ctags
+  - language
+  - fossology-license
+  - metadata
+- revision:
+  - metadata
 
 # Context
 
@@ -50,13 +53,13 @@
 ## Indexers
 
 Indexers:
-- receive batch of sha1
-- retrieve the associated content from the blob storage
-- compute for that content some index
+- receive batch of ids
+- retrieve the associated data depending on object type
+- compute for that object some index
 - store the result to swh's storage
 - (and possibly do some broadcast itself)
 
-Current indexers:
+Current content indexers:
 
 - mimetype (queue swh_indexer_content_mimetype): compute the mimetype,
   filter out the textual contents and broadcast the list to the
@@ -69,3 +72,11 @@
 
 - fossology-license (queue swh_indexer_fossology_license): try and
   compute the license
+
+- metadata : translate file into translated_metadata dict
+
+Current revision indexers:
+
+- metadata: detects files containing metadata and retrieves translated_metadata
+  in content_metadata table in storage or run content indexer to translate
+  files.
diff --git a/swh/indexer/ctags.py b/swh/indexer/ctags.py
--- a/swh/indexer/ctags.py
+++ b/swh/indexer/ctags.py
@@ -10,7 +10,7 @@
 from swh.model import hashutil
 
 from .language import compute_language
-from .indexer import BaseIndexer, DiskIndexer
+from .indexer import ContentIndexer, DiskIndexer
 
 
 # Options used to compute tags
@@ -54,7 +54,7 @@
         }
 
 
-class CtagsIndexer(BaseIndexer, DiskIndexer):
+class CtagsIndexer(ContentIndexer, DiskIndexer):
     CONFIG_BASE_FILENAME = 'indexer/ctags'
 
     ADDITIONAL_CONFIG = {
@@ -80,7 +80,7 @@
         self.working_directory = self.config['workdir']
         self.language_map = self.config['languages']
 
-    def filter_contents(self, sha1s):
+    def filter(self, sha1s):
         """Filter out known sha1s and return only missing ones.
 
         """
@@ -91,7 +91,7 @@
             } for sha1 in sha1s
         ))
 
-    def index_content(self, sha1, raw_content):
+    def index(self, sha1, raw_content):
         """Index sha1s' content and store result.
 
         Args:
diff --git a/swh/indexer/fossology_license.py b/swh/indexer/fossology_license.py
--- a/swh/indexer/fossology_license.py
+++ b/swh/indexer/fossology_license.py
@@ -8,7 +8,7 @@
 
 from swh.model import hashutil
 
-from .indexer import BaseIndexer, DiskIndexer
+from .indexer import ContentIndexer, DiskIndexer
 
 
 def compute_license(path, log=None):
@@ -46,7 +46,7 @@
         }
 
 
-class ContentFossologyLicenseIndexer(BaseIndexer, DiskIndexer):
+class ContentFossologyLicenseIndexer(ContentIndexer, DiskIndexer):
     """Indexer in charge of:
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
@@ -71,7 +71,7 @@
         super().prepare()
         self.working_directory = self.config['workdir']
 
-    def filter_contents(self, sha1s):
+    def filter(self, sha1s):
         """Filter out known sha1s and return only missing ones.
 
         """
@@ -82,7 +82,7 @@
             } for sha1 in sha1s
         ))
 
-    def index_content(self, sha1, raw_content):
+    def index(self, sha1, raw_content):
         """Index sha1s' content and store result.
 
         Args:
diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py
--- a/swh/indexer/indexer.py
+++ b/swh/indexer/indexer.py
@@ -69,24 +69,34 @@
     """Base class for indexers to inherit from.
 
     The main entry point is the `run` functions which is in charge to
-    trigger the computations on the sha1s batch received.
+    trigger the computations on the ids batch received.
 
     Indexers can:
-    - filter out sha1 whose data has already been indexed.
-    - retrieve sha1's content from objstorage, index this content then
-      store the result in storage.
+    - filter out ids whose data has already been indexed.
+    - retrieve ids data from storage or objstorage
+    - index this data depending on the object and store the result in storage.
 
-    To implement a new index, inherit from this class and implement
-    the following functions:
+    To implement a new object type indexer, inherit from the BaseIndexer and
+    implement the process of indexation :
 
-      - def filter_contents(self, sha1s): filter out data already
+        - def run(self, object_ids, policy_update): object_ids are different
+        depending on object. For example: sha1 for content, sha1_git for
+        revision, directory, release, and id for origin
+
+    To implement a new concrete indexer, inherit from the object level classes:
+    ContentIndexer, RevisionIndexer
+    (later on OriginIndexer will also be available)
+
+    Then you need to implement the following functions:
+
+      - def filter(self, ids): filter out data already
         indexed (in storage). This function is used by the
         orchestrator and not directly by the indexer
         (cf. swh.indexer.orchestrator.BaseOrchestratorIndexer).
 
-      - def index_content(self, sha1, raw_content): compute index on
-        sha1 with data raw_content (retrieved in the objstorage by the
-        sha1 key) and return the resulting index computation.
+      - def index_object(self, id, data): compute index on
+        id with data (retrieved from the storage or the objstorage by the
+        id key) and return the resulting index computation.
 
       - def persist_index_computations(self, results, policy_update):
         persist the results of multiple index computations in the
@@ -212,25 +222,26 @@
         return self.storage.indexer_configuration_get(tool)
 
     @abc.abstractmethod
-    def filter_contents(self, sha1s):
-        """Filter missing sha1 for that particular indexer.
+    def filter(self, ids):
+        """Filter missing ids for that particular indexer.
 
         Args:
-            sha1s ([bytes]): list of contents' sha1
+            ids ([bytes]): list of ids
 
         Yields:
-            iterator of missing sha1
+            iterator of missing ids
 
         """
         pass
 
     @abc.abstractmethod
-    def index_content(self, sha1, content):
-        """Index computation for the sha1 and associated raw content.
+    def index(self, id, data):
+        """Index computation for the id and associated raw data.
 
         Args:
-            sha1 (bytes): sha1 identifier
-            content (bytes): sha1's raw content
+            id (bytes):  identifier
+            data (bytes): id's data from storage or objstorage depending on
+                             object type
 
         Returns:
             a dict that makes sense for the persist_index_computations
@@ -245,7 +256,7 @@
 
         Args:
             results ([result]): List of results. One result is the
-            result of the index_content function.
+            result of the index function.
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
@@ -263,7 +274,7 @@
 
         Args:
             results ([result]): List of results (dict) as returned
-            by index_content function.
+            by index function.
 
         Returns:
             None
@@ -271,6 +282,32 @@
         """
         pass
 
+    @abc.abstractmethod
+    def run(self, ids, policy_update):
+        """Given a list of ids:
+        - retrieves the data from the storage
+        - executes the indexing computations
+        - stores the results (according to policy_update)
+
+        Args:
+            ids ([bytes]): id's identifier list
+            policy_update ([str]): either 'update-dups' or 'ignore-dups' to
+            respectively update duplicates or ignore them
+
+        """
+        pass
+
+
+class ContentIndexer(BaseIndexer):
+    """
+    An object type indexer, inherits from the BaseIndexer and
+    implements the process of indexation for Contents using the run method
+
+    Note: the ContentIndexer is not an instantiable object
+    to use it in another context one should inherit from this class and
+    override the methods mentioned in the BaseIndexer class
+    """
+
     def run(self, sha1s, policy_update):
         """Given a list of sha1s:
         - retrieve the content from the storage
@@ -292,7 +329,7 @@
                     self.log.warn('Content %s not found in objstorage' %
                                   hashutil.hash_to_hex(sha1))
                     continue
-                res = self.index_content(sha1, raw_content)
+                res = self.index(sha1, raw_content)
                 if res:  # If no results, skip it
                     results.append(res)
 
@@ -304,3 +341,43 @@
             if self.rescheduling_task:
                 self.log.warn('Rescheduling batch')
                 self.rescheduling_task.delay(sha1s, policy_update)
+
+
+class RevisionIndexer(BaseIndexer):
+    """
+    An object type indexer, inherits from the BaseIndexer and
+    implements the process of indexation for Revisions using the run method
+
+    Note: the RevisionIndexer is not an instantiable object
+    to use it in another context one should inherit from this class and
+    override the methods mentioned in the BaseIndexer class
+    """
+
+    def run(self, sha1_gits, policy_update):
+        """
+        Given a list of sha1_gits:
+        - retrieve revsions from storage
+        - execute the indexing computations
+        - store the results (according to policy_update)
+        Args:
+            sha1_gits ([bytes]): sha1_git's identifier list
+            policy_update ([str]): either 'update-dups' or 'ignore-dups' to
+            respectively update duplicates or ignore them
+
+        """
+        results = []
+        revs = self.storage.revision_get(sha1_gits)
+
+        for rev in revs:
+            if not rev:
+                self.log.warn('Revision %s not found in storage' %
+                              hashutil.hash_to_hex(sha1_gits))
+                continue
+            try:
+                res = self.index(rev)
+                if res:  # If no results, skip it
+                    results.append(res)
+            except Exception:
+                self.log.exception(
+                        'Problem when processing revision')
+        self.persist_index_computations(results, policy_update)
diff --git a/swh/indexer/language.py b/swh/indexer/language.py
--- a/swh/indexer/language.py
+++ b/swh/indexer/language.py
@@ -10,7 +10,7 @@
 from pygments.util import ClassNotFound
 from chardet.universaldetector import UniversalDetector
 
-from .indexer import BaseIndexer
+from .indexer import ContentIndexer
 
 
 def _cleanup_classname(classname):
@@ -107,7 +107,7 @@
     }
 
 
-class ContentLanguageIndexer(BaseIndexer):
+class ContentLanguageIndexer(ContentIndexer):
     """Indexer in charge of:
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
@@ -134,7 +134,7 @@
         c = self.config
         self.max_content_size = c['tools']['configuration']['max_content_size']
 
-    def filter_contents(self, sha1s):
+    def filter(self, sha1s):
         """Filter out known sha1s and return only missing ones.
 
         """
@@ -145,7 +145,7 @@
             } for sha1 in sha1s
         ))
 
-    def index_content(self, sha1, raw_content):
+    def index(self, sha1, raw_content):
         """Index sha1s' content and store result.
 
         Args:
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -2,36 +2,46 @@
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
+import click
 
-from .indexer import BaseIndexer
+from swh.indexer.indexer import ContentIndexer, RevisionIndexer
 from swh.indexer.metadata_dictionary import compute_metadata
+from swh.indexer.metadata_detector import detect_metadata
+from swh.indexer.metadata_detector import extract_minimal_metadata_dict
 
+from swh.model import hashutil
 
-class ContentMetadataIndexer(BaseIndexer):
-    """Indexer in charge of:
-    - filtering out content already indexed
+
+class ContentMetadataIndexer(ContentIndexer):
+    """Indexer at content level in charge of:
+    - filtering out content already indexed in content_metadata
     - reading content from objstorage with the content's id sha1
     - computing translated_metadata by given context
-    - using the MetadataDict and a tool for each context
-    - store result in storage
+    - using the metadata_dictionary as the 'swh-metadata-translator' tool
+    - store result in content_metadata table
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
-    ADDITIONAL_CONFIG = {
-        'tools': ('dict', {
-            'name': 'swh-metadata-translator',
-            'version': '0.0.1',
-            'configuration': {
-                'type': 'local',
-                'context': 'npm'
-            },
-        }),
-    }
+    def __init__(self, tool, config):
+        self.tool = tool
+        # twisted way to use the exact same config of RevisionMetadataIndexer
+        # object that uses internally ContentMetadataIndexer
+        self.new_config = config
+        super().__init__()
 
     def prepare(self):
         super().prepare()
+        self.results = []
+        if self.new_config['storage']:
+            self.storage = self.new_config['storage']
+        if self.new_config['objstorage']:
+            self.objstorage = self.new_config['objstorage']
+
+    def retrieve_tools_information(self):
+        self.config['tools'] = self.tool
+        return super().retrieve_tools_information()
 
-    def filter_contents(self, sha1s):
+    def filter(self, sha1s):
         """Filter out known sha1s and return only missing ones.
         """
         yield from self.storage.content_metadata_missing((
@@ -41,7 +51,7 @@
             } for sha1 in sha1s
         ))
 
-    def index_content(self, sha1, raw_content):
+    def index(self, sha1, raw_content):
         """Index sha1s' content and store result.
 
         Args:
@@ -60,9 +70,11 @@
             'translated_metadata': None
         }
         try:
-            context = self.tools['configuration']['context']
+            context = self.tools['tool_configuration']['context']
             result['translated_metadata'] = compute_metadata(
                                             context, raw_content)
+            # a twisted way to keep result with indexer object for get_results
+            self.results.append(result)
         except:
             self.log.exception(
                 "Problem during tool retrieval of metadata translation")
@@ -82,3 +94,188 @@
         """
         self.storage.content_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
+
+    def get_results(self):
+        """
+        can be called only if run method was called before
+
+        Returns:
+            results (list): list of content_metadata entries calculated
+            by current indxer
+        """
+        return self.results
+
+
+class RevisionMetadataIndexer(RevisionIndexer):
+    """Indexer at Revision level in charge of:
+    - filtering revisions already indexed in revision_metadata table with
+      defined computation tool
+    - retrieve all entry_files in root directory
+    - use metadata_detector for file_names containig metadata
+    - compute metadata translation if necessary and possible (depends on tool)
+    - send sha1s to content indexing if possible
+    - store the results for revision
+
+    """
+    CONFIG_BASE_FILENAME = 'indexer/metadata'
+
+    ADDITIONAL_CONFIG = {
+        'tools': ('dict', {
+            'name': 'swh-metadata-detector',
+            'version': '0.0.1',
+            'configuration': {
+                'type': 'local',
+                'context': ['npm', 'codemeta']
+            },
+        }),
+    }
+
+    def prepare(self):
+        super().prepare()
+
+    def filter(self, sha1_gits):
+        """Filter out known sha1s and return only missing ones.
+
+        """
+        yield from self.storage.revision_metadata_missing((
+            {
+                'id': sha1_git,
+                'indexer_configuration_id': self.tools['id'],
+            } for sha1_git in sha1_gits
+        ))
+
+    def index(self, rev):
+        """Index rev by processing it and organizing result.
+           use metadata_detector to iterate on filenames
+           - if one filename detected -> sends file to content indexer
+           - if multiple file detected -> translation needed at revision level
+
+            Args:
+                rev (bytes): revision artifact from storage
+
+            Returns:
+                A dict, representing a revision_metadata, with keys:
+                  - id (bytes): rev's identifier (sha1_git)
+                  - indexer_configuration_id (bytes): tool used
+                  - translated_metadata (bytes): dict of retrieved metadata
+
+        """
+        try:
+            result = {
+                'id': rev['id'],
+                'indexer_configuration_id': self.tools['id'],
+                'translated_metadata': None
+            }
+
+            root_dir = rev['directory']
+            dir_ls = self.storage.directory_ls(root_dir, recursive=False)
+            files = (entry for entry in dir_ls if entry['type'] == 'file')
+            detected_files = detect_metadata(files)
+            result['translated_metadata'] = self.translate_revision_metadata(
+                                                                detected_files)
+        except Exception as e:
+            self.log.exception(
+                'Problem when indexing rev')
+        return result
+
+    def persist_index_computations(self, results, policy_update):
+        """Persist the results in storage.
+
+        Args:
+            results ([dict]): list of content_mimetype, dict with the
+            following keys:
+              - id (bytes): content's identifier (sha1)
+              - mimetype (bytes): mimetype in bytes
+              - encoding (bytes): encoding in bytes
+            policy_update ([str]): either 'update-dups' or 'ignore-dups' to
+            respectively update duplicates or ignore them
+
+        """
+        # TODO: add functions in storage to keep data in revision_metadata
+        self.storage.revision_metadata_add(
+            results, conflict_update=(policy_update == 'update-dups'))
+
+    def translate_revision_metadata(self, detected_files):
+        """
+        Determine plan of action to translate metadata when containing
+        one or multiple detected files:
+        Args:
+            - detected_files : dict with context name and list of sha1s
+            (e.g : {'npm' : [sha1_1, sha1_2],
+                     'authors': sha1_3})
+
+        Returns:
+            - translated_metadata: dict with the CodeMeta vocabulary
+        """
+        translated_metadata = []
+        tool = {
+                'name': 'swh-metadata-translator',
+                'version': '0.0.1',
+                'configuration': {
+                    'type': 'local',
+                    'context': None
+                },
+            }
+        # TODO: iterate on each context, on each file
+        # -> get raw_contents
+        # -> translate each content
+        config = {
+            'storage': self.storage,
+            'objstorage': self.objstorage
+        }
+        for context in detected_files.keys():
+            tool['configuration']['context'] = context
+            c_metadata_indexer = ContentMetadataIndexer(tool, config)
+            # sha1s that are in content_metadata table
+            sha1s_in_storage = []
+            metadata_generator = self.storage.content_metadata_get(
+                                              detected_files[context])
+            for c in metadata_generator:
+                # extracting translated_metadata
+                sha1 = c['id']
+                sha1s_in_storage.append(sha1)
+                local_metadata = c['translated_metadata']
+                # local metadata is aggregated
+                if local_metadata:
+                    translated_metadata.append(local_metadata)
+
+            sha1s_filtered = [item for item in detected_files[context]
+                              if item not in sha1s_in_storage]
+
+            if sha1s_filtered:
+                print(sha1s_filtered)
+                # schedule indexation of content
+                try:
+                    c_metadata_indexer.run(sha1s_filtered,
+                                           policy_update='ignore-dups')
+                    # on the fly possibility:
+                    results = c_metadata_indexer.get_results()
+
+                    for result in results:
+                        local_metadata = result['translated_metadata']
+                        translated_metadata.append(local_metadata)
+
+                except Exception as e:
+                    self.log.warn("""Exception while indexing content""", e)
+
+        # transform translated_metadata into min set with swh-metadata-detector
+        min_metadata = extract_minimal_metadata_dict(translated_metadata)
+        return min_metadata
+
+
+@click.command()
+@click.option('--revs_ids',
+              default=['8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
+                       '026040ea79dec1b49b4e3e7beda9132b6b26b51b',
+                       '9699072e21eded4be8d45e3b8d543952533fa190'],
+              help='Default sha1_git to lookup')
+def main(revs_ids):
+    _git_sha1s = list(map(hashutil.hash_to_bytes, revs_ids))
+    rev_metadata_indexer = RevisionMetadataIndexer()
+    rev_metadata_indexer.run(_git_sha1s, 'update-dups')
+
+
+if __name__ == '__main__':
+    import logging
+    logging.basicConfig(level=logging.INFO)
+    main()
diff --git a/swh/indexer/metadata_detector.py b/swh/indexer/metadata_detector.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/metadata_detector.py
@@ -0,0 +1,73 @@
+# Copyright (C) 2017 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+mapping_filenames = {
+    b"package.json": "npm",
+    b"codemeta.json": "codemeta"
+}
+
+
+def detect_metadata(files):
+    """
+    Detects files potentially containing metadata
+    Args:
+        - file_entries (list): list of files
+
+    Returns:
+        - empty list if nothing was found
+        - dictionary {mapping_filenames[name]:f['sha1']}
+    """
+    results = {}
+    for f in files:
+        name = f['name'].lower().strip()
+        # TODO: possibility to detect extensions
+        if name in mapping_filenames:
+            tool = mapping_filenames[name]
+            if tool in results:
+                results[tool].append(f['sha1'])
+            else:
+                results[tool] = [f['sha1']]
+    return results
+
+
+def extract_minimal_metadata_dict(metadata_list):
+    """
+    Every item in the metadata_list is a dict of translated_metadata in the
+    CodeMeta vocabulary
+    we wish to extract a minimal set of terms and keep all values corresponding
+    to this term
+    Args:
+        - metadata_list (list): list of dicts of translated_metadata
+
+    Returns:
+        - minimal_dict (dict): one dict with selected values of metadata
+    """
+    minimal_dict = {
+        "developmentStatus": [],
+        "version": [],
+        "operatingSystem": [],
+        "description": [],
+        "keywords": [],
+        "issueTracker": [],
+        "name": [],
+        "author": [],
+        "relatedLink": [],
+        "url": [],
+        "type": [],
+        "license": [],
+        "maintainer": [],
+        "email": [],
+        "softwareRequirements": [],
+        "identifier": [],
+        "codeRepository": []
+    }
+    for term in minimal_dict.keys():
+        for metadata_dict in metadata_list:
+            if term in metadata_dict:
+                minimal_dict[term].append(metadata_dict[term])
+        if not minimal_dict[term]:
+            minimal_dict[term] = None
+    return minimal_dict
diff --git a/swh/indexer/mimetype.py b/swh/indexer/mimetype.py
--- a/swh/indexer/mimetype.py
+++ b/swh/indexer/mimetype.py
@@ -8,7 +8,7 @@
 from subprocess import Popen, PIPE
 from swh.scheduler import utils
 
-from .indexer import BaseIndexer
+from .indexer import ContentIndexer
 
 
 def compute_mimetype_encoding(raw_content):
@@ -35,7 +35,7 @@
             }
 
 
-class ContentMimetypeIndexer(BaseIndexer):
+class ContentMimetypeIndexer(ContentIndexer):
     """Indexer in charge of:
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
@@ -67,7 +67,7 @@
             self.task_destination = None
         self.tools = self.retrieve_tools_information()
 
-    def filter_contents(self, sha1s):
+    def filter(self, sha1s):
         """Filter out known sha1s and return only missing ones.
 
         """
@@ -78,7 +78,7 @@
             } for sha1 in sha1s
         ))
 
-    def index_content(self, sha1, raw_content):
+    def index(self, sha1, raw_content):
         """Index sha1s' content and store result.
 
         Args:
diff --git a/swh/indexer/orchestrator.py b/swh/indexer/orchestrator.py
--- a/swh/indexer/orchestrator.py
+++ b/swh/indexer/orchestrator.py
@@ -93,22 +93,22 @@
         self.indexers = indexers
         self.tasks = tasks
 
-    def run(self, sha1s):
+    def run(self, ids):
         for name, (idx_class, filtering, batch_size) in self.indexers.items():
             if filtering:
                 policy_update = 'ignore-dups'
                 indexer_class = get_class(idx_class)
-                sha1s_filtered = list(indexer_class().filter_contents(sha1s))
-                if not sha1s_filtered:
+                ids_filtered = list(indexer_class().filter(ids))
+                if not ids_filtered:
                     continue
             else:
                 policy_update = 'update-dups'
-                sha1s_filtered = sha1s
+                ids_filtered = ids
 
             celery_tasks = []
-            for sha1s_to_send in grouper(sha1s_filtered, batch_size):
+            for ids_to_send in grouper(ids_filtered, batch_size):
                 celery_task = self.tasks[name].s(
-                    sha1s=list(sha1s_to_send),
+                    ids=list(ids_to_send),
                     policy_update=policy_update)
                 celery_tasks.append(celery_task)
 
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -8,38 +8,39 @@
 from nose.tools import istest
 
 from swh.indexer.metadata_dictionary import compute_metadata
+from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata import ContentMetadataIndexer
+from swh.indexer.metadata import RevisionMetadataIndexer
 from swh.indexer.tests.test_utils import MockObjStorage
+from swh.indexer.tests.test_utils import MockStorage
 
 
-class MockStorage():
-    """Mock storage to simplify reading indexers' outputs.
+class TestContentMetadataIndexer(ContentMetadataIndexer):
+    """Specific Metadata whose configuration is enough to satisfy the
+       indexing tests.
     """
-    def content_metadata_add(self, metadata, conflict_update=None):
-        self.state = metadata
-        self.conflict_update = conflict_update
-
-    def indexer_configuration_get(self, tool):
-        return {
-            'id': 30,
-            'name': 'hard_mapping_npm',
-            'version': '0.1',
-            'configuration': {
-                'type': 'local',
-                'context': 'npm'
-            },
+    def prepare(self):
+        self.config = {
+            'rescheduling_task': None,
         }
+        self.storage = MockStorage()
+        self.log = logging.getLogger('swh.indexer')
+        self.objstorage = MockObjStorage()
+        self.task_destination = None
+        self.rescheduling_task = self.config['rescheduling_task']
+        self.tools = self.retrieve_tools_information()
+        self.results = []
 
 
-class TestMetadataIndexer(ContentMetadataIndexer):
-    """Specific Metadata whose configuration is enough to satisfy the
+class TestRevisionMetadataIndexer(RevisionMetadataIndexer):
+    """Specific indexer whose configuration is enough to satisfy the
        indexing tests.
     """
     def prepare(self):
         self.config = {
             'rescheduling_task': None,
-            'tools':  {
-                'name': 'swh-metadata-translator',
+            'tools': {
+                'name': 'swh-metadata-detector',
                 'version': '0.0.1',
                 'configuration': {
                     'type': 'local',
@@ -53,6 +54,7 @@
         self.task_destination = None
         self.rescheduling_task = self.config['rescheduling_task']
         self.tools = self.retrieve_tools_information()
+        self.results = []
 
 
 class Metadata(unittest.TestCase):
@@ -64,6 +66,14 @@
         shows the entire diff in the results
         """
         self.maxDiff = None
+        self.content_tool = {
+            'name': 'swh-metadata-translator',
+            'version': '0.0.1',
+            'configuration': {
+                'type': 'local',
+                'context': 'npm'
+            }
+        }
 
     @istest
     def test_compute_metadata_none(self):
@@ -128,7 +138,7 @@
                  '02fb2c89e14f7fab46701478c83779c7beb7b069']
         # this metadata indexer computes only metadata for package.json
         # in npm context with a hard mapping
-        metadata_indexer = TestMetadataIndexer()
+        metadata_indexer = TestContentMetadataIndexer(self.content_tool, None)
 
         # when
         metadata_indexer.run(sha1s, policy_update='ignore-dups')
@@ -197,3 +207,89 @@
 
         # The assertion bellow returns False sometimes because of nested lists
         self.assertEqual(expected_results, results)
+
+    @istest
+    def test_detect_metadata_package_json(self):
+        # given
+        df = [{
+                'sha1_git': b'abc',
+                'name': b'index.js',
+                'target': b'abc',
+                'length': 897,
+                'status': 'visible',
+                'type': 'file',
+                'perms': 33188,
+                'dir_id': b'dir_a',
+                'sha1': b'bcd'
+            },
+            {
+                'sha1_git': b'aab',
+                'name': b'package.json',
+                'target': b'aab',
+                'length': 712,
+                'status': 'visible',
+                'type': 'file',
+                'perms': 33188,
+                'dir_id': b'dir_a',
+                'sha1': b'cde'
+        }]
+        # when
+        results = detect_metadata(df)
+
+        expected_results = {
+            'npm': [
+                b'cde'
+            ]
+        }
+        # then
+        self.assertEqual(expected_results, results)
+
+    @istest
+    def test_revision_metadata_indexer(self):
+        metadata_indexer = TestRevisionMetadataIndexer()
+
+        sha1_gits = [
+            b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
+        ]
+        metadata_indexer.run(sha1_gits, 'update-dups')
+
+        results = metadata_indexer.storage.state
+
+        expected_results = [{
+            'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
+            'translated_metadata': {
+                'identifier': None,
+                'maintainer': None,
+                'url': [
+                    'https://github.com/librariesio/yarn-parser#readme'
+                ],
+                'codeRepository': [{
+                    'type': 'git',
+                    'url': 'git+https://github.com/librariesio/yarn-parser.git'
+                }],
+                'author': ['Andrew Nesbitt'],
+                'license': ['AGPL-3.0'],
+                'version': ['1.0.0'],
+                'description': [
+                    'Tiny web service for parsing yarn.lock files'
+                ],
+                'relatedLink': None,
+                'developmentStatus': None,
+                'operatingSystem': None,
+                'issueTracker': [{
+                    'url': 'https://github.com/librariesio/yarn-parser/issues'
+                }],
+                'softwareRequirements': [{
+                    'express': '^4.14.0',
+                    'yarn': '^0.21.0',
+                    'body-parser': '^1.15.2'
+                }],
+                'name': ['yarn-parser'],
+                'keywords': [['yarn', 'parse', 'lock', 'dependencies']],
+                'type': None,
+                'email': None
+            },
+            'indexer_configuration_id': 7
+        }]
+        # then
+        self.assertEqual(expected_results, results)
diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py
--- a/swh/indexer/tests/test_utils.py
+++ b/swh/indexer/tests/test_utils.py
@@ -101,8 +101,9 @@
               "license": "Artistic-2.0"
             }
 
+            """,
+            'a7ab314d8a11d2c93e3dcf528ca294e7b431c449': b"""
             """
-
         }
 
     def get(self, sha1):
@@ -110,3 +111,139 @@
         if not raw_content:
             raise ObjNotFoundError()
         return raw_content
+
+
+class MockStorage():
+    """Mock storage to simplify reading indexers' outputs.
+    """
+    def content_metadata_missing(self, sha1s):
+        yield from []
+
+    def content_metadata_add(self, metadata, conflict_update=None):
+        self.state = metadata
+        self.conflict_update = conflict_update
+
+    def revision_metadata_add(self, metadata, conflict_update=None):
+        self.state = metadata
+        self.conflict_update = conflict_update
+
+    def indexer_configuration_get(self, tool):
+        if tool['tool_name'] == 'swh-metadata-translator':
+            return {
+                'id': 30,
+                'tool_name': 'swh-metadata-translator',
+                'tool_version': '0.0.1',
+                'tool_configuration': {
+                    'type': 'local',
+                    'context': 'npm'
+                },
+            }
+        elif tool['tool_name'] == 'swh-metadata-detector':
+            return {
+                'id': 7,
+                'tool_name': 'swh-metadata-detector',
+                'tool_version': '0.0.1',
+                'tool_configuration': {
+                    'type': 'local',
+                    'context': 'npm'
+                },
+            }
+
+    def revision_get(self, revisions):
+        return [{
+            'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
+            'committer': {
+                'id': 26,
+                'name': b'Andrew Nesbitt',
+                'fullname': b'Andrew Nesbitt <andrewnez@gmail.com>',
+                'email': b'andrewnez@gmail.com'
+            },
+            'synthetic': False,
+            'date': {
+                'negative_utc': False,
+                'timestamp': {
+                    'seconds': 1487596456,
+                    'microseconds': 0
+                },
+                'offset': 0
+            },
+            'directory': b'10'
+        }]
+
+    def directory_ls(self, directory, recursive=False, cur=None):
+        # with directory: b'\x9d',
+        return [{
+                'sha1_git': b'abc',
+                'name': b'index.js',
+                'target': b'abc',
+                'length': 897,
+                'status': 'visible',
+                'type': 'file',
+                'perms': 33188,
+                'dir_id': b'10',
+                'sha1': b'bcd'
+                },
+                {
+                'sha1_git': b'aab',
+                'name': b'package.json',
+                'target': b'aab',
+                'length': 712,
+                'status': 'visible',
+                'type': 'file',
+                'perms': 33188,
+                'dir_id': b'10',
+                'sha1': b'cde'
+                },
+                {
+                'dir_id': b'10',
+                'target': b'11',
+                'type': 'dir',
+                'length': None,
+                'name': b'.github',
+                'sha1': None,
+                'perms': 16384,
+                'sha1_git': None,
+                'status': None,
+                'sha256': None
+                }]
+
+    def content_metadata_get(self, sha1s):
+        return [{
+            'tool': {
+                'configuration': {
+                    'type': 'local',
+                    'context': 'npm'
+                    },
+                'version': '0.0.1',
+                'id': 6,
+                'name': 'swh-metadata-translator'
+            },
+            'id': b'cde',
+            'translated_metadata': {
+                'issueTracker': {
+                    'url': 'https://github.com/librariesio/yarn-parser/issues'
+                },
+                'version': '1.0.0',
+                'name': 'yarn-parser',
+                'author': 'Andrew Nesbitt',
+                'url': 'https://github.com/librariesio/yarn-parser#readme',
+                'processorRequirements': {'node': '7.5'},
+                'other': {
+                    'scripts': {
+                                    'start': 'node index.js'
+                    },
+                    'main': 'index.js'
+                },
+                'license': 'AGPL-3.0',
+                'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
+                'codeRepository': {
+                    'type': 'git',
+                    'url': 'git+https://github.com/librariesio/yarn-parser.git'
+                },
+                'description': 'Tiny web service for parsing yarn.lock files',
+                'softwareRequirements': {
+                    'yarn': '^0.21.0',
+                    'express': '^4.14.0',
+                    'body-parser': '^1.15.2'}
+                }
+        }]