diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py
index 33999d3..bc37773 100644
--- a/swh/indexer/indexer.py
+++ b/swh/indexer/indexer.py
@@ -1,383 +1,383 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import abc
 import os
 import logging
 import shutil
 import tempfile
 
 from swh.core.config import SWHConfig
 from swh.objstorage import get_objstorage
 from swh.objstorage.exc import ObjNotFoundError
 from swh.model import hashutil
 from swh.storage import get_storage
 from swh.scheduler.utils import get_task
 
 
 class DiskIndexer:
     """Mixin intended to be used with other *Indexer classes.
 
        Indexer* inheriting from this class are a category of indexers
        which needs the disk for their computations.
 
        Expects:
            self.working_directory variable defined at runtime.
 
     """
     def __init__(self):
         super().__init__()
 
     def write_to_temp(self, filename, data):
         """Write the sha1's content in a temporary file.
 
         Args:
             sha1 (str): the sha1 name
             filename (str): one of sha1's many filenames
             data (bytes): the sha1's content to write in temporary
             file
 
         Returns:
             The path to the temporary file created. That file is
             filled in with the raw content's data.
 
         """
         os.makedirs(self.working_directory, exist_ok=True)
         temp_dir = tempfile.mkdtemp(dir=self.working_directory)
         content_path = os.path.join(temp_dir, filename)
 
         with open(content_path, 'wb') as f:
             f.write(data)
 
         return content_path
 
     def cleanup(self, content_path):
         """Remove content_path from working directory.
 
         Args:
             content_path (str): the file to remove
 
         """
         temp_dir = os.path.dirname(content_path)
         shutil.rmtree(temp_dir)
 
 
 class BaseIndexer(SWHConfig,
                   metaclass=abc.ABCMeta):
     """Base class for indexers to inherit from.
 
     The main entry point is the `run` functions which is in charge to
     trigger the computations on the ids batch received.
 
     Indexers can:
     - filter out ids whose data has already been indexed.
     - retrieve ids data from storage or objstorage
     - index this data depending on the object and store the result in storage.
 
     To implement a new object type indexer, inherit from the BaseIndexer and
     implement the process of indexation :
 
         - def run(self, object_ids, policy_update): object_ids are different
         depending on object. For example: sha1 for content, sha1_git for
         revision, directory, release, and id for origin
 
     To implement a new concrete indexer, inherit from the object level classes:
     ContentIndexer, RevisionIndexer
     (later on OriginIndexer will also be available)
 
     Then you need to implement the following functions:
 
       - def filter(self, ids): filter out data already
         indexed (in storage). This function is used by the
         orchestrator and not directly by the indexer
         (cf. swh.indexer.orchestrator.BaseOrchestratorIndexer).
 
       - def index_object(self, id, data): compute index on
         id with data (retrieved from the storage or the objstorage by the
         id key) and return the resulting index computation.
 
       - def persist_index_computations(self, results, policy_update):
         persist the results of multiple index computations in the
         storage.
 
     The new indexer implementation can also override the following functions:
 
       - def prepare(self): Configuration preparation for the indexer.
         When overriding, this must call the super().prepare() function.
 
       - def check(self): Configuration check for the indexer.
         When overriding, this must call the super().check() function.
 
       - def retrieve_tools_information(self): This should return a
         dict of the tool(s) to use when indexing or filtering.
 
     """
     CONFIG = 'indexer/base'
 
     DEFAULT_CONFIG = {
         'storage': ('dict', {
             'host': 'uffizi',
             'cls': 'remote',
             'args': {'root': '/tmp/softwareheritage/objects',
                      'slicing': '0:2/2:4/4:6'}
         }),
         # queue to reschedule if problem (none for no rescheduling,
         # the default)
         'rescheduling_task': ('str', None),
         'objstorage': ('dict', {
             'cls': 'multiplexer',
             'args': {
                 'objstorages': [{
                     'cls': 'filtered',
                     'args': {
                         'storage_conf': {
                             'cls': 'azure-storage',
                             'args': {
                                 'account_name': '0euwestswh',
                                 'api_secret_key': 'secret',
                                 'container_name': 'contents'
                             }
                         },
                         'filters_conf': [
                             {'type': 'readonly'},
                             {'type': 'prefix', 'prefix': '0'}
                         ]
                     }
                 }, {
                     'cls': 'filtered',
                     'args': {
                         'storage_conf': {
                             'cls': 'azure-storage',
                             'args': {
                                 'account_name': '1euwestswh',
                                 'api_secret_key': 'secret',
                                 'container_name': 'contents'
                             }
                         },
                         'filters_conf': [
                             {'type': 'readonly'},
                             {'type': 'prefix', 'prefix': '1'}
                         ]
                     }
                 }]
             },
         }),
     }
 
     ADDITIONAL_CONFIG = {}
 
     def __init__(self):
         """Prepare and check that the indexer is ready to run.
 
         """
         super().__init__()
         self.prepare()
         self.check()
 
     def prepare(self):
         """Prepare the indexer's needed runtime configuration.
            Without this step, the indexer cannot possibly run.
 
         """
         self.config = self.parse_config_file(
             additional_configs=[self.ADDITIONAL_CONFIG])
         objstorage = self.config['objstorage']
         self.objstorage = get_objstorage(objstorage['cls'], objstorage['args'])
         storage = self.config['storage']
         self.storage = get_storage(storage['cls'], storage['args'])
         rescheduling_task = self.config['rescheduling_task']
         if rescheduling_task:
             self.rescheduling_task = get_task(rescheduling_task)
         else:
             self.rescheduling_task = None
 
         l = logging.getLogger('requests.packages.urllib3.connectionpool')
         l.setLevel(logging.WARN)
         self.log = logging.getLogger('swh.indexer')
         self.tools = self.retrieve_tools_information()
 
     def check(self):
         """Check the indexer's configuration is ok before proceeding.
            If ok, does nothing. If not raise error.
 
         """
         if not self.tools:
             raise ValueError('Tools %s is unknown, cannot continue' %
                              self.config['tools'])
 
     def retrieve_tools_information(self):
         """Permit to define how to retrieve tool information based on
            configuration.
 
            Add a sensible default which can be overridden if not
            sufficient.  (For now, all indexers use only one tool)
 
         """
         tool = {
             'tool_%s' % key: value for key, value
             in self.config['tools'].items()
         }
         return self.storage.indexer_configuration_get(tool)
 
     @abc.abstractmethod
     def filter(self, ids):
         """Filter missing ids for that particular indexer.
 
         Args:
             ids ([bytes]): list of ids
 
         Yields:
             iterator of missing ids
 
         """
         pass
 
     @abc.abstractmethod
     def index(self, id, data):
         """Index computation for the id and associated raw data.
 
         Args:
             id (bytes):  identifier
             data (bytes): id's data from storage or objstorage depending on
                              object type
 
         Returns:
             a dict that makes sense for the persist_index_computations
         function.
 
         """
         pass
 
     @abc.abstractmethod
     def persist_index_computations(self, results, policy_update):
         """Persist the computation resulting from the index.
 
         Args:
             results ([result]): List of results. One result is the
             result of the index function.
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         Returns:
             None
 
         """
         pass
 
     def next_step(self, results):
         """Do something else with computations results (e.g. send to another
         queue, ...).
 
         (This is not an abstractmethod since it is optional).
 
         Args:
             results ([result]): List of results (dict) as returned
             by index function.
 
         Returns:
             None
 
         """
         pass
 
     @abc.abstractmethod
     def run(self, ids, policy_update):
         """Given a list of ids:
         - retrieves the data from the storage
         - executes the indexing computations
         - stores the results (according to policy_update)
 
         Args:
             ids ([bytes]): id's identifier list
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         pass
 
 
 class ContentIndexer(BaseIndexer):
     """
     An object type indexer, inherits from the BaseIndexer and
     implements the process of indexation for Contents using the run method
 
     Note: the ContentIndexer is not an instantiable object
     to use it in another context one should inherit from this class and
     override the methods mentioned in the BaseIndexer class
     """
 
     def run(self, sha1s, policy_update):
         """Given a list of sha1s:
         - retrieve the content from the storage
         - execute the indexing computations
         - store the results (according to policy_update)
 
         Args:
             sha1s ([bytes]): sha1's identifier list
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         results = []
         try:
             for sha1 in sha1s:
                 try:
                     raw_content = self.objstorage.get(sha1)
                 except ObjNotFoundError:
                     self.log.warn('Content %s not found in objstorage' %
                                   hashutil.hash_to_hex(sha1))
                     continue
                 res = self.index(sha1, raw_content)
                 if res:  # If no results, skip it
                     results.append(res)
 
             self.persist_index_computations(results, policy_update)
             self.next_step(results)
         except Exception:
             self.log.exception(
                 'Problem when reading contents metadata.')
             if self.rescheduling_task:
                 self.log.warn('Rescheduling batch')
                 self.rescheduling_task.delay(sha1s, policy_update)
 
 
 class RevisionIndexer(BaseIndexer):
     """
     An object type indexer, inherits from the BaseIndexer and
     implements the process of indexation for Revisions using the run method
 
     Note: the RevisionIndexer is not an instantiable object
     to use it in another context one should inherit from this class and
     override the methods mentioned in the BaseIndexer class
     """
 
     def run(self, sha1_gits, policy_update):
         """
         Given a list of sha1_gits:
         - retrieve revsions from storage
         - execute the indexing computations
         - store the results (according to policy_update)
         Args:
             sha1_gits ([bytes]): sha1_git's identifier list
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         results = []
         revs = self.storage.revision_get(sha1_gits)
 
         for rev in revs:
             if not rev:
-                self.log.warn('Revision %s not found in storage' %
-                              hashutil.hash_to_hex(sha1_gits))
+                self.log.warn('Revisions %s not found in storage' %
+                              list(map(hashutil.hash_to_hex, sha1_gits)))
                 continue
             try:
                 res = self.index(rev)
                 if res:  # If no results, skip it
                     results.append(res)
             except Exception:
                 self.log.exception(
                         'Problem when processing revision')
         self.persist_index_computations(results, policy_update)
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
index bf59e57..28a445c 100644
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -1,275 +1,280 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 import click
 
 from swh.indexer.indexer import ContentIndexer, RevisionIndexer
 from swh.indexer.metadata_dictionary import compute_metadata
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata_detector import extract_minimal_metadata_dict
 
 from swh.model import hashutil
 
 
 class ContentMetadataIndexer(ContentIndexer):
     """Indexer at content level in charge of:
     - filtering out content already indexed in content_metadata
     - reading content from objstorage with the content's id sha1
     - computing translated_metadata by given context
     - using the metadata_dictionary as the 'swh-metadata-translator' tool
     - store result in content_metadata table
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
     def __init__(self, tool, config):
         self.tool = tool
         # twisted way to use the exact same config of RevisionMetadataIndexer
         # object that uses internally ContentMetadataIndexer
         self.new_config = config
         super().__init__()
 
     def prepare(self):
         super().prepare()
         self.results = []
         if self.new_config['storage']:
             self.storage = self.new_config['storage']
         if self.new_config['objstorage']:
             self.objstorage = self.new_config['objstorage']
 
     def retrieve_tools_information(self):
         self.config['tools'] = self.tool
         return super().retrieve_tools_information()
 
     def filter(self, sha1s):
         """Filter out known sha1s and return only missing ones.
         """
         yield from self.storage.content_metadata_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tools['id'],
             } for sha1 in sha1s
         ))
 
     def index(self, sha1, raw_content):
         """Index sha1s' content and store result.
 
         Args:
             sha1 (bytes): content's identifier
             raw_content (bytes): raw content in bytes
 
         Returns:
             result (dict): representing a content_metadata
             if translation wasn't successful the translated_metadata keys
             will be kept as None
 
         """
         result = {
             'id': sha1,
             'indexer_configuration_id': self.tools['id'],
             'translated_metadata': None
         }
         try:
             context = self.tools['tool_configuration']['context']
             result['translated_metadata'] = compute_metadata(
                                             context, raw_content)
             # a twisted way to keep result with indexer object for get_results
             self.results.append(result)
         except:
             self.log.exception(
                 "Problem during tool retrieval of metadata translation")
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_metadata, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - translated_metadata (jsonb): detected metadata
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.storage.content_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def get_results(self):
         """
         can be called only if run method was called before
 
         Returns:
             results (list): list of content_metadata entries calculated
             by current indxer
         """
         return self.results
 
 
 class RevisionMetadataIndexer(RevisionIndexer):
     """Indexer at Revision level in charge of:
     - filtering revisions already indexed in revision_metadata table with
       defined computation tool
     - retrieve all entry_files in root directory
     - use metadata_detector for file_names containig metadata
     - compute metadata translation if necessary and possible (depends on tool)
     - send sha1s to content indexing if possible
     - store the results for revision
 
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
     ADDITIONAL_CONFIG = {
         'tools': ('dict', {
             'name': 'swh-metadata-detector',
             'version': '0.0.1',
             'configuration': {
                 'type': 'local',
                 'context': ['npm', 'codemeta']
             },
         }),
     }
 
     def prepare(self):
         super().prepare()
 
     def filter(self, sha1_gits):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.storage.revision_metadata_missing((
             {
                 'id': sha1_git,
                 'indexer_configuration_id': self.tools['id'],
             } for sha1_git in sha1_gits
         ))
 
     def index(self, rev):
         """Index rev by processing it and organizing result.
            use metadata_detector to iterate on filenames
            - if one filename detected -> sends file to content indexer
            - if multiple file detected -> translation needed at revision level
 
             Args:
                 rev (bytes): revision artifact from storage
 
             Returns:
                 A dict, representing a revision_metadata, with keys:
                   - id (bytes): rev's identifier (sha1_git)
                   - indexer_configuration_id (bytes): tool used
                   - translated_metadata (bytes): dict of retrieved metadata
 
         """
         try:
             result = {
                 'id': rev['id'],
                 'indexer_configuration_id': self.tools['id'],
                 'translated_metadata': None
             }
 
             root_dir = rev['directory']
             dir_ls = self.storage.directory_ls(root_dir, recursive=False)
             files = (entry for entry in dir_ls if entry['type'] == 'file')
             detected_files = detect_metadata(files)
             result['translated_metadata'] = self.translate_revision_metadata(
                                                                 detected_files)
         except Exception as e:
             self.log.exception(
                 'Problem when indexing rev')
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         # TODO: add functions in storage to keep data in revision_metadata
         self.storage.revision_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def translate_revision_metadata(self, detected_files):
         """
         Determine plan of action to translate metadata when containing
         one or multiple detected files:
         Args:
             - detected_files : dict with context name and list of sha1s
             (e.g : {'npm' : [sha1_1, sha1_2],
                      'authors': sha1_3})
 
         Returns:
             - translated_metadata: dict with the CodeMeta vocabulary
         """
         translated_metadata = []
         tool = {
                 'name': 'swh-metadata-translator',
                 'version': '0.0.1',
                 'configuration': {
                     'type': 'local',
                     'context': None
                 },
             }
         # TODO: iterate on each context, on each file
         # -> get raw_contents
         # -> translate each content
         config = {
             'storage': self.storage,
             'objstorage': self.objstorage
         }
         for context in detected_files.keys():
             tool['configuration']['context'] = context
             c_metadata_indexer = ContentMetadataIndexer(tool, config)
-            # sha1s that aren't in content_metadata table
-            sha1s_filtered = list(c_metadata_indexer.filter(
-                                                    detected_files[context]))
+            # sha1s that are in content_metadata table
+            sha1s_in_storage = []
+            metadata_generator = self.storage.content_metadata_get(
+                                              detected_files[context])
+            for c in metadata_generator:
+                # extracting translated_metadata
+                sha1 = c['id']
+                sha1s_in_storage.append(sha1)
+                local_metadata = c['translated_metadata']
+                # local metadata is aggregated
+                if local_metadata:
+                    translated_metadata.append(local_metadata)
+
+            sha1s_filtered = [item for item in detected_files[context]
+                              if item not in sha1s_in_storage]
+
             if sha1s_filtered:
-                print(sha1s_filtered)
                 # schedule indexation of content
                 try:
                     c_metadata_indexer.run(sha1s_filtered,
                                            policy_update='ignore-dups')
                     # on the fly possibility:
-                    local_metadata = c_metadata_indexer.get_results()
+                    results = c_metadata_indexer.get_results()
+
+                    for result in results:
+                        local_metadata = result['translated_metadata']
+                        translated_metadata.append(local_metadata)
+
                 except Exception as e:
                     self.log.warn("""Exception while indexing content""", e)
-            sha1s_in_storage = [item for item in detected_files[context]
-                                if item not in sha1s_filtered]
-            # fetch from storage results that were skipped with filter
-            for sha1 in sha1s_in_storage:
-                local_metadata = {}
-                # fetch content_metadata from storage
-                metadata_generator = self.storage.content_metadata_get([sha1])
-                for c in metadata_generator:
-                    # extracting translated_metadata
-                    local_metadata = c['translated_metadata']
-                # local metadata is aggregated
-                if local_metadata:
-                    translated_metadata.append(local_metadata)
+
         # transform translated_metadata into min set with swh-metadata-detector
         min_metadata = extract_minimal_metadata_dict(translated_metadata)
         return min_metadata
 
 
 @click.command()
-@click.option('--revs_ids',
+@click.option('--revs', '-i',
               default=['8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
                        '026040ea79dec1b49b4e3e7beda9132b6b26b51b',
                        '9699072e21eded4be8d45e3b8d543952533fa190'],
-              help='Default sha1_git to lookup')
-def main(revs_ids):
-    _git_sha1s = list(map(hashutil.hash_to_bytes, revs_ids))
+              help='Default sha1_git to lookup', multiple=True)
+def main(revs):
+    _git_sha1s = list(map(hashutil.hash_to_bytes, revs))
     rev_metadata_indexer = RevisionMetadataIndexer()
     rev_metadata_indexer.run(_git_sha1s, 'update-dups')
 
 
 if __name__ == '__main__':
     import logging
     logging.basicConfig(level=logging.INFO)
     main()
diff --git a/swh/indexer/metadata_detector.py b/swh/indexer/metadata_detector.py
index ceb4a53..78599e0 100644
--- a/swh/indexer/metadata_detector.py
+++ b/swh/indexer/metadata_detector.py
@@ -1,72 +1,73 @@
 # Copyright (C) 2017 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 mapping_filenames = {
     b"package.json": "npm",
     b"codemeta.json": "codemeta"
 }
 
 
 def detect_metadata(files):
     """
     Detects files potentially containing metadata
     Args:
         - file_entries (list): list of files
 
     Returns:
         - empty list if nothing was found
         - dictionary {mapping_filenames[name]:f['sha1']}
     """
     results = {}
     for f in files:
         name = f['name'].lower().strip()
         # TODO: possibility to detect extensions
         if name in mapping_filenames:
             tool = mapping_filenames[name]
             if tool in results:
                 results[tool].append(f['sha1'])
             else:
                 results[tool] = [f['sha1']]
     return results
 
 
 def extract_minimal_metadata_dict(metadata_list):
     """
     Every item in the metadata_list is a dict of translated_metadata in the
     CodeMeta vocabulary
     we wish to extract a minimal set of terms and keep all values corresponding
     to this term
     Args:
         - metadata_list (list): list of dicts of translated_metadata
 
     Returns:
         - minimal_dict (dict): one dict with selected values of metadata
     """
     minimal_dict = {
         "developmentStatus": [],
         "version": [],
         "operatingSystem": [],
         "description": [],
         "keywords": [],
         "issueTracker": [],
         "name": [],
         "author": [],
         "relatedLink": [],
         "url": [],
         "type": [],
         "license": [],
         "maintainer": [],
         "email": [],
         "softwareRequirements": [],
-        "identifier": []
+        "identifier": [],
+        "codeRepository": []
     }
     for term in minimal_dict.keys():
         for metadata_dict in metadata_list:
             if term in metadata_dict:
                 minimal_dict[term].append(metadata_dict[term])
         if not minimal_dict[term]:
             minimal_dict[term] = None
     return minimal_dict
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
index 44bdfeb..de17b61 100644
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -1,291 +1,295 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
 import logging
 from nose.tools import istest
 
 from swh.indexer.metadata_dictionary import compute_metadata
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata import ContentMetadataIndexer
 from swh.indexer.metadata import RevisionMetadataIndexer
 from swh.indexer.tests.test_utils import MockObjStorage
 from swh.indexer.tests.test_utils import MockStorage
 
 
 class TestContentMetadataIndexer(ContentMetadataIndexer):
     """Specific Metadata whose configuration is enough to satisfy the
        indexing tests.
     """
     def prepare(self):
         self.config = {
             'rescheduling_task': None,
         }
         self.storage = MockStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.task_destination = None
         self.rescheduling_task = self.config['rescheduling_task']
         self.tools = self.retrieve_tools_information()
         self.results = []
 
 
 class TestRevisionMetadataIndexer(RevisionMetadataIndexer):
     """Specific indexer whose configuration is enough to satisfy the
        indexing tests.
     """
     def prepare(self):
         self.config = {
             'rescheduling_task': None,
             'tools': {
                 'name': 'swh-metadata-detector',
                 'version': '0.0.1',
                 'configuration': {
                     'type': 'local',
                     'context': 'npm'
                 }
             }
         }
         self.storage = MockStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.task_destination = None
         self.rescheduling_task = self.config['rescheduling_task']
         self.tools = self.retrieve_tools_information()
         self.results = []
 
 
 class Metadata(unittest.TestCase):
     """
     Tests metadata_mock_tool tool for Metadata detection
     """
     def setUp(self):
         """
         shows the entire diff in the results
         """
         self.maxDiff = None
         self.content_tool = {
             'name': 'swh-metadata-translator',
             'version': '0.0.1',
             'configuration': {
                 'type': 'local',
                 'context': 'npm'
             }
         }
 
     @istest
     def test_compute_metadata_none(self):
         """
         testing content empty content is empty
         should return None
         """
         # given
         content = b""
         context = "npm"
 
         # None if no metadata was found or an error occurred
         declared_metadata = None
         # when
         result = compute_metadata(context, content)
         # then
         self.assertEqual(declared_metadata, result)
 
     @istest
     def test_compute_metadata_npm(self):
         """
         testing only computation of metadata with hard_mapping_npm
         """
         # given
         content = b"""
             {
                 "name": "test_metadata",
                 "version": "0.0.1",
                 "description": "Simple package.json test for indexer",
                   "repository": {
                     "type": "git",
                     "url": "https://github.com/moranegg/metadata_test"
                 }
             }
         """
         declared_metadata = {
             'name': 'test_metadata',
             'version': '0.0.1',
             'description': 'Simple package.json test for indexer',
             'codeRepository': {
                 'type': 'git',
                 'url': 'https://github.com/moranegg/metadata_test'
               },
             'other': {}
         }
 
         # when
         result = compute_metadata("npm", content)
         # then
         self.assertEqual(declared_metadata, result)
 
     @istest
     def test_index_content_metadata_npm(self):
         """
         testing NPM with package.json
         - one sha1 uses a file that can't be translated to metadata and
           should return None in the translated metadata
         """
         # given
         sha1s = ['26a9f72a7c87cc9205725cfd879f514ff4f3d8d5',
                  'd4c647f0fc257591cc9ba1722484229780d1c607',
                  '02fb2c89e14f7fab46701478c83779c7beb7b069']
         # this metadata indexer computes only metadata for package.json
         # in npm context with a hard mapping
         metadata_indexer = TestContentMetadataIndexer(self.content_tool, None)
 
         # when
         metadata_indexer.run(sha1s, policy_update='ignore-dups')
         results = metadata_indexer.storage.state
 
         expected_results = [{
             'indexer_configuration_id': 30,
             'translated_metadata': {
                 'other': {},
                 'codeRepository': {
                     'type': 'git',
                     'url': 'https://github.com/moranegg/metadata_test'
                 },
                 'description': 'Simple package.json test for indexer',
                 'name': 'test_metadata',
                 'version': '0.0.1'
             },
             'id': '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5'
             }, {
             'indexer_configuration_id': 30,
             'translated_metadata': {
                 'softwareRequirements': {
                         'JSONStream': '~1.3.1',
                         'abbrev': '~1.1.0',
                         'ansi-regex': '~2.1.1',
                         'ansicolors': '~0.3.2',
                         'ansistyles': '~0.1.3'
                 },
                 'issueTracker': {
                     'url': 'https://github.com/npm/npm/issues'
                 },
                 'author':
                     'Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)',
                 'codeRepository': {
                     'type': 'git',
                     'url': 'https://github.com/npm/npm'
                 },
                 'description': 'a package manager for JavaScript',
                 'softwareSuggestions': {
                         'tacks': '~1.2.6',
                         'tap': '~10.3.2'
                 },
                 'license': 'Artistic-2.0',
                 'version': '5.0.3',
                 'other': {
                     'preferGlobal': True,
                     'config': {
                         'publishtest': False
                     }
                 },
                 'name': 'npm',
                 'keywords': [
                     'install',
                     'modules',
                     'package manager',
                     'package.json'
                 ],
                 'url': 'https://docs.npmjs.com/'
             },
             'id': 'd4c647f0fc257591cc9ba1722484229780d1c607'
             }, {
             'indexer_configuration_id': 30,
             'translated_metadata': None,
             'id': '02fb2c89e14f7fab46701478c83779c7beb7b069'
         }]
 
         # The assertion bellow returns False sometimes because of nested lists
         self.assertEqual(expected_results, results)
 
     @istest
     def test_detect_metadata_package_json(self):
         # given
         df = [{
                 'sha1_git': b'abc',
                 'name': b'index.js',
                 'target': b'abc',
                 'length': 897,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'dir_a',
                 'sha1': b'bcd'
             },
             {
                 'sha1_git': b'aab',
                 'name': b'package.json',
                 'target': b'aab',
                 'length': 712,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'dir_a',
                 'sha1': b'cde'
         }]
         # when
         results = detect_metadata(df)
 
         expected_results = {
             'npm': [
                 b'cde'
             ]
         }
         # then
         self.assertEqual(expected_results, results)
 
     @istest
     def test_revision_metadata_indexer(self):
         metadata_indexer = TestRevisionMetadataIndexer()
 
         sha1_gits = [
             b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
         ]
         metadata_indexer.run(sha1_gits, 'update-dups')
 
         results = metadata_indexer.storage.state
 
         expected_results = [{
             'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
             'translated_metadata': {
                 'identifier': None,
                 'maintainer': None,
                 'url': [
                     'https://github.com/librariesio/yarn-parser#readme'
                 ],
+                'codeRepository': [{
+                    'type': 'git',
+                    'url': 'git+https://github.com/librariesio/yarn-parser.git'
+                }],
                 'author': ['Andrew Nesbitt'],
                 'license': ['AGPL-3.0'],
                 'version': ['1.0.0'],
                 'description': [
                     'Tiny web service for parsing yarn.lock files'
                 ],
                 'relatedLink': None,
                 'developmentStatus': None,
                 'operatingSystem': None,
                 'issueTracker': [{
                     'url': 'https://github.com/librariesio/yarn-parser/issues'
                 }],
                 'softwareRequirements': [{
                     'express': '^4.14.0',
                     'yarn': '^0.21.0',
                     'body-parser': '^1.15.2'
                 }],
                 'name': ['yarn-parser'],
                 'keywords': [['yarn', 'parse', 'lock', 'dependencies']],
                 'type': None,
                 'email': None
             },
             'indexer_configuration_id': 7
         }]
         # then
         self.assertEqual(expected_results, results)