diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
index 77fdfb9..8adff82 100644
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -1,294 +1,283 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 import click
 import logging
 
 from swh.indexer.indexer import ContentIndexer, RevisionIndexer
 from swh.indexer.metadata_dictionary import compute_metadata
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata_detector import extract_minimal_metadata_dict
 from swh.indexer.storage import INDEXER_CFG_KEY
 
 from swh.model import hashutil
 
 
 class ContentMetadataIndexer(ContentIndexer):
     """Content-level indexer
 
     This indexer is in charge of:
 
     - filtering out content already indexed in content_metadata
     - reading content from objstorage with the content's id sha1
     - computing translated_metadata by given context
     - using the metadata_dictionary as the 'swh-metadata-translator' tool
     - store result in content_metadata table
 
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
     def __init__(self, tool, config):
         # twisted way to use the exact same config of RevisionMetadataIndexer
         # object that uses internally ContentMetadataIndexer
         self.config = config
         self.config['tools'] = tool
         super().__init__()
 
-    def prepare(self):
-        self.results = []
-        if self.config[INDEXER_CFG_KEY]:
-            self.idx_storage = self.config[INDEXER_CFG_KEY]
-        if self.config['objstorage']:
-            self.objstorage = self.config['objstorage']
-        _log = logging.getLogger('requests.packages.urllib3.connectionpool')
-        _log.setLevel(logging.WARN)
-        self.log = logging.getLogger('swh.indexer')
-        self.tools = self.register_tools(self.config['tools'])
-        # NOTE: only one tool so far, change when no longer true
-        self.tool = self.tools[0]
-
     def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
         """
         yield from self.idx_storage.content_metadata_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1 in ids
         ))
 
     def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
             id (bytes): content's identifier
             data (bytes): raw content in bytes
 
         Returns:
             dict: dictionary representing a content_metadata. If the
             translation wasn't successful the translated_metadata keys will
             be returned as None
 
         """
         result = {
             'id': id,
             'indexer_configuration_id': self.tool['id'],
             'translated_metadata': None
         }
         try:
             context = self.tool['tool_configuration']['context']
             result['translated_metadata'] = compute_metadata(context, data)
             # a twisted way to keep result with indexer object for get_results
             self.results.append(result)
         except Exception:
             self.log.exception(
                 "Problem during tool retrieval of metadata translation")
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_metadata, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - translated_metadata (jsonb): detected metadata
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.idx_storage.content_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def get_results(self):
         """can be called only if run method was called before
 
         Returns:
             list: list of content_metadata entries calculated by
                   current indexer
 
         """
         return self.results
 
 
 class RevisionMetadataIndexer(RevisionIndexer):
     """Revision-level indexer
 
     This indexer is in charge of:
 
     - filtering revisions already indexed in revision_metadata table with
       defined computation tool
     - retrieve all entry_files in root directory
     - use metadata_detector for file_names containing metadata
     - compute metadata translation if necessary and possible (depends on tool)
     - send sha1s to content indexing if possible
     - store the results for revision
 
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
     ADDITIONAL_CONFIG = {
         'tools': ('dict', {
             'name': 'swh-metadata-detector',
             'version': '0.0.1',
             'configuration': {
                 'type': 'local',
                 'context': ['npm', 'codemeta']
             },
         }),
     }
 
+    ContentMetadataIndexer = ContentMetadataIndexer
+
     def prepare(self):
         super().prepare()
         self.tool = self.tools[0]
 
     def filter(self, sha1_gits):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.idx_storage.revision_metadata_missing((
             {
                 'id': sha1_git,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1_git in sha1_gits
         ))
 
     def index(self, rev):
         """Index rev by processing it and organizing result.
 
         use metadata_detector to iterate on filenames
 
         - if one filename detected -> sends file to content indexer
         - if multiple file detected -> translation needed at revision level
 
         Args:
           rev (bytes): revision artifact from storage
 
         Returns:
             dict: dictionary representing a revision_metadata, with keys:
 
                 - id (bytes): rev's identifier (sha1_git)
                 - indexer_configuration_id (bytes): tool used
                 - translated_metadata (bytes): dict of retrieved metadata
 
         """
         try:
             result = {
                 'id': rev['id'],
                 'indexer_configuration_id': self.tool['id'],
                 'translated_metadata': None
             }
 
             root_dir = rev['directory']
             dir_ls = self.storage.directory_ls(root_dir, recursive=False)
             files = (entry for entry in dir_ls if entry['type'] == 'file')
             detected_files = detect_metadata(files)
             result['translated_metadata'] = self.translate_revision_metadata(
                                                                 detected_files)
         except Exception as e:
             self.log.exception(
                 'Problem when indexing rev')
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         # TODO: add functions in storage to keep data in revision_metadata
         self.idx_storage.revision_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def translate_revision_metadata(self, detected_files):
         """
         Determine plan of action to translate metadata when containing
         one or multiple detected files:
 
         Args:
             detected_files (dict): dictionary mapping context names (e.g.,
               "npm", "authors") to list of sha1
 
         Returns:
             dict: dict with translated metadata according to the CodeMeta
             vocabulary
 
         """
         translated_metadata = []
         tool = {
                 'name': 'swh-metadata-translator',
                 'version': '0.0.1',
                 'configuration': {
                     'type': 'local',
                     'context': None
                 },
             }
         # TODO: iterate on each context, on each file
         # -> get raw_contents
         # -> translate each content
         config = {
             INDEXER_CFG_KEY: self.idx_storage,
             'objstorage': self.objstorage
         }
         for context in detected_files.keys():
             tool['configuration']['context'] = context
-            c_metadata_indexer = ContentMetadataIndexer(tool, config)
+            c_metadata_indexer = self.ContentMetadataIndexer(tool, config)
             # sha1s that are in content_metadata table
             sha1s_in_storage = []
             metadata_generator = self.idx_storage.content_metadata_get(
                 detected_files[context])
             for c in metadata_generator:
                 # extracting translated_metadata
                 sha1 = c['id']
                 sha1s_in_storage.append(sha1)
                 local_metadata = c['translated_metadata']
                 # local metadata is aggregated
                 if local_metadata:
                     translated_metadata.append(local_metadata)
 
             sha1s_filtered = [item for item in detected_files[context]
                               if item not in sha1s_in_storage]
 
             if sha1s_filtered:
                 # schedule indexation of content
                 try:
                     c_metadata_indexer.run(sha1s_filtered,
                                            policy_update='ignore-dups')
                     # on the fly possibility:
                     results = c_metadata_indexer.get_results()
 
                     for result in results:
                         local_metadata = result['translated_metadata']
                         translated_metadata.append(local_metadata)
 
                 except Exception as e:
                     self.log.warn("""Exception while indexing content""", e)
 
         # transform translated_metadata into min set with swh-metadata-detector
         min_metadata = extract_minimal_metadata_dict(translated_metadata)
         return min_metadata
 
 
 @click.command()
 @click.option('--revs', '-i',
               default=['8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
                        '026040ea79dec1b49b4e3e7beda9132b6b26b51b',
                        '9699072e21eded4be8d45e3b8d543952533fa190'],
               help='Default sha1_git to lookup', multiple=True)
 def main(revs):
     _git_sha1s = list(map(hashutil.hash_to_bytes, revs))
     rev_metadata_indexer = RevisionMetadataIndexer()
     rev_metadata_indexer.run(_git_sha1s, 'update-dups')
 
 
 if __name__ == '__main__':
     logging.basicConfig(level=logging.INFO)
     main()
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
index 2423891..51913c7 100644
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -1,363 +1,366 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
 import logging
 from nose.tools import istest
 
 from swh.indexer.metadata_dictionary import compute_metadata
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata_detector import extract_minimal_metadata_dict
 from swh.indexer.metadata import ContentMetadataIndexer
 from swh.indexer.metadata import RevisionMetadataIndexer
 from swh.indexer.tests.test_utils import MockObjStorage, MockStorage
 from swh.indexer.tests.test_utils import MockIndexerStorage
 
 
 class TestContentMetadataIndexer(ContentMetadataIndexer):
     """Specific Metadata whose configuration is enough to satisfy the
        indexing tests.
     """
     def prepare(self):
         self.config.update({
             'rescheduling_task': None,
         })
         self.idx_storage = MockIndexerStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.task_destination = None
         self.rescheduling_task = self.config['rescheduling_task']
         self.tools = self.register_tools(self.config['tools'])
         self.tool = self.tools[0]
         self.results = []
 
 
 class TestRevisionMetadataIndexer(RevisionMetadataIndexer):
     """Specific indexer whose configuration is enough to satisfy the
        indexing tests.
     """
+
+    ContentMetadataIndexer = TestContentMetadataIndexer
+
     def prepare(self):
         self.config = {
             'rescheduling_task': None,
             'storage': {
                 'cls': 'remote',
                 'args': {
                     'url': 'http://localhost:9999',
                 }
             },
             'tools': {
                 'name': 'swh-metadata-detector',
                 'version': '0.0.1',
                 'configuration': {
                     'type': 'local',
                     'context': 'npm'
                 }
             }
         }
         self.storage = MockStorage()
         self.idx_storage = MockIndexerStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.task_destination = None
         self.rescheduling_task = self.config['rescheduling_task']
         self.tools = self.register_tools(self.config['tools'])
         self.tool = self.tools[0]
         self.results = []
 
 
 class Metadata(unittest.TestCase):
     """
     Tests metadata_mock_tool tool for Metadata detection
     """
     def setUp(self):
         """
         shows the entire diff in the results
         """
         self.maxDiff = None
         self.content_tool = {
             'name': 'swh-metadata-translator',
             'version': '0.0.1',
             'configuration': {
                 'type': 'local',
                 'context': 'npm'
             }
         }
 
     @istest
     def test_compute_metadata_none(self):
         """
         testing content empty content is empty
         should return None
         """
         # given
         content = b""
         context = "npm"
 
         # None if no metadata was found or an error occurred
         declared_metadata = None
         # when
         result = compute_metadata(context, content)
         # then
         self.assertEqual(declared_metadata, result)
 
     @istest
     def test_compute_metadata_npm(self):
         """
         testing only computation of metadata with hard_mapping_npm
         """
         # given
         content = b"""
             {
                 "name": "test_metadata",
                 "version": "0.0.1",
                 "description": "Simple package.json test for indexer",
                   "repository": {
                     "type": "git",
                     "url": "https://github.com/moranegg/metadata_test"
                 }
             }
         """
         declared_metadata = {
             'name': 'test_metadata',
             'version': '0.0.1',
             'description': 'Simple package.json test for indexer',
             'codeRepository': {
                 'type': 'git',
                 'url': 'https://github.com/moranegg/metadata_test'
               },
             'other': {}
         }
 
         # when
         result = compute_metadata("npm", content)
         # then
         self.assertEqual(declared_metadata, result)
 
     @istest
     def test_extract_minimal_metadata_dict(self):
         """
         Test the creation of a coherent minimal metadata set
         """
         # given
         metadata_list = [{
             'name': 'test_1',
             'version': '0.0.1',
             'description': 'Simple package.json test for indexer',
             'codeRepository': {
                 'type': 'git',
                 'url': 'https://github.com/moranegg/metadata_test'
               },
             'other': {}
         }, {
             'name': 'test_0_1',
             'version': '0.0.1',
             'description': 'Simple package.json test for indexer',
             'codeRepository': {
                 'type': 'git',
                 'url': 'https://github.com/moranegg/metadata_test'
               },
             'other': {}
         }, {
             'name': 'test_metadata',
             'version': '0.0.1',
             'author': 'moranegg',
             'other': {}
         }]
 
         # when
         results = extract_minimal_metadata_dict(metadata_list)
 
         # then
         expected_results = {
             "developmentStatus": None,
             "version": ['0.0.1'],
             "operatingSystem": None,
             "description": ['Simple package.json test for indexer'],
             "keywords": None,
             "issueTracker": None,
             "name": ['test_1', 'test_0_1', 'test_metadata'],
             "author": ['moranegg'],
             "relatedLink": None,
             "url": None,
             "license": None,
             "maintainer": None,
             "email": None,
             "softwareRequirements": None,
             "identifier": None,
             "codeRepository": [{
                 'type': 'git',
                 'url': 'https://github.com/moranegg/metadata_test'
               }]
         }
         self.assertEqual(expected_results, results)
 
     @istest
     def test_index_content_metadata_npm(self):
         """
         testing NPM with package.json
         - one sha1 uses a file that can't be translated to metadata and
           should return None in the translated metadata
         """
         # given
         sha1s = ['26a9f72a7c87cc9205725cfd879f514ff4f3d8d5',
                  'd4c647f0fc257591cc9ba1722484229780d1c607',
                  '02fb2c89e14f7fab46701478c83779c7beb7b069']
         # this metadata indexer computes only metadata for package.json
         # in npm context with a hard mapping
         metadata_indexer = TestContentMetadataIndexer(
             tool=self.content_tool, config={})
 
         # when
         metadata_indexer.run(sha1s, policy_update='ignore-dups')
         results = metadata_indexer.idx_storage.state
 
         expected_results = [{
             'indexer_configuration_id': 30,
             'translated_metadata': {
                 'other': {},
                 'codeRepository': {
                     'type': 'git',
                     'url': 'https://github.com/moranegg/metadata_test'
                 },
                 'description': 'Simple package.json test for indexer',
                 'name': 'test_metadata',
                 'version': '0.0.1'
             },
             'id': '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5'
             }, {
             'indexer_configuration_id': 30,
             'translated_metadata': {
                 'softwareRequirements': {
                         'JSONStream': '~1.3.1',
                         'abbrev': '~1.1.0',
                         'ansi-regex': '~2.1.1',
                         'ansicolors': '~0.3.2',
                         'ansistyles': '~0.1.3'
                 },
                 'issueTracker': {
                     'url': 'https://github.com/npm/npm/issues'
                 },
                 'author':
                     'Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)',
                 'codeRepository': {
                     'type': 'git',
                     'url': 'https://github.com/npm/npm'
                 },
                 'description': 'a package manager for JavaScript',
                 'softwareSuggestions': {
                         'tacks': '~1.2.6',
                         'tap': '~10.3.2'
                 },
                 'license': 'Artistic-2.0',
                 'version': '5.0.3',
                 'other': {
                     'preferGlobal': True,
                     'config': {
                         'publishtest': False
                     }
                 },
                 'name': 'npm',
                 'keywords': [
                     'install',
                     'modules',
                     'package manager',
                     'package.json'
                 ],
                 'url': 'https://docs.npmjs.com/'
             },
             'id': 'd4c647f0fc257591cc9ba1722484229780d1c607'
             }, {
             'indexer_configuration_id': 30,
             'translated_metadata': None,
             'id': '02fb2c89e14f7fab46701478c83779c7beb7b069'
         }]
 
         # The assertion below returns False sometimes because of nested lists
         self.assertEqual(expected_results, results)
 
     @istest
     def test_detect_metadata_package_json(self):
         # given
         df = [{
                 'sha1_git': b'abc',
                 'name': b'index.js',
                 'target': b'abc',
                 'length': 897,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'dir_a',
                 'sha1': b'bcd'
             },
             {
                 'sha1_git': b'aab',
                 'name': b'package.json',
                 'target': b'aab',
                 'length': 712,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'dir_a',
                 'sha1': b'cde'
         }]
         # when
         results = detect_metadata(df)
 
         expected_results = {
             'npm': [
                 b'cde'
             ]
         }
         # then
         self.assertEqual(expected_results, results)
 
     @istest
     def test_revision_metadata_indexer(self):
         metadata_indexer = TestRevisionMetadataIndexer()
 
         sha1_gits = [
             b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
         ]
         metadata_indexer.run(sha1_gits, 'update-dups')
 
         results = metadata_indexer.idx_storage.state
 
         expected_results = [{
             'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
             'translated_metadata': {
                 'identifier': None,
                 'maintainer': None,
                 'url': [
                     'https://github.com/librariesio/yarn-parser#readme'
                 ],
                 'codeRepository': [{
                     'type': 'git',
                     'url': 'git+https://github.com/librariesio/yarn-parser.git'
                 }],
                 'author': ['Andrew Nesbitt'],
                 'license': ['AGPL-3.0'],
                 'version': ['1.0.0'],
                 'description': [
                     'Tiny web service for parsing yarn.lock files'
                 ],
                 'relatedLink': None,
                 'developmentStatus': None,
                 'operatingSystem': None,
                 'issueTracker': [{
                     'url': 'https://github.com/librariesio/yarn-parser/issues'
                 }],
                 'softwareRequirements': [{
                     'express': '^4.14.0',
                     'yarn': '^0.21.0',
                     'body-parser': '^1.15.2'
                 }],
                 'name': ['yarn-parser'],
                 'keywords': [['yarn', 'parse', 'lock', 'dependencies']],
                 'email': None
             },
             'indexer_configuration_id': 7
         }]
         # then
         self.assertEqual(expected_results, results)