diff --git a/swh/indexer/cli.py b/swh/indexer/cli.py
index cb82793..56c7f88 100644
--- a/swh/indexer/cli.py
+++ b/swh/indexer/cli.py
@@ -1,186 +1,208 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import click
 
 from swh.core import config
 from swh.scheduler import get_scheduler
 from swh.scheduler.utils import create_task_dict
 from swh.storage import get_storage
 
-from swh.indexer.metadata_dictionary import MAPPINGS
+from swh.indexer import metadata_dictionary
 from swh.indexer.storage import get_indexer_storage
 from swh.indexer.storage.api.server import load_and_check_config, app
 
 
 CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
 
 TASK_BATCH_SIZE = 1000  # Number of tasks per query to the scheduler
 
 
 @click.group(context_settings=CONTEXT_SETTINGS)
 @click.option('--config-file', '-C', default=None,
               type=click.Path(exists=True, dir_okay=False,),
               help="Configuration file.")
 @click.pass_context
 def cli(ctx, config_file):
     """Software Heritage Indexer CLI interface
     """
     ctx.ensure_object(dict)
 
     conf = config.read(config_file)
     ctx.obj['config'] = conf
 
 
 def _get_api(getter, config, config_key, url):
     if url:
         config[config_key] = {
             'cls': 'remote',
             'args': {'url': url}
         }
     elif config_key not in config:
         raise click.ClickException(
             'Missing configuration for {}'.format(config_key))
     return getter(**config[config_key])
 
 
 @cli.group('mapping')
 def mapping():
     pass
 
 
 @mapping.command('list')
 def mapping_list():
     """Prints the list of known mappings."""
-    mapping_names = [mapping.name for mapping in MAPPINGS.values()]
+    mapping_names = [mapping.name
+                     for mapping in metadata_dictionary.MAPPINGS.values()]
     mapping_names.sort()
     for mapping_name in mapping_names:
         click.echo(mapping_name)
 
 
+@mapping.command('list-terms')
+@click.option('--exclude-mapping', multiple=True,
+              help='Exclude the given mapping from the output')
+@click.option('--concise', is_flag=True,
+              default=False,
+              help='Don\'t print the list of mappings supporting each term.')
+def mapping_list_terms(concise, exclude_mapping):
+    """Prints the list of known CodeMeta terms, and which mappings
+    support them."""
+    properties = metadata_dictionary.list_terms()
+    for (property_name, supported_mappings) in sorted(properties.items()):
+        supported_mappings = {m.name for m in supported_mappings}
+        supported_mappings -= set(exclude_mapping)
+        if supported_mappings:
+            if concise:
+                click.echo(property_name)
+            else:
+                click.echo('{}:'.format(property_name))
+                click.echo('\t' + ', '.join(sorted(supported_mappings)))
+
+
 @cli.group('schedule')
 @click.option('--scheduler-url', '-s', default=None,
               help="URL of the scheduler API")
 @click.option('--indexer-storage-url', '-i', default=None,
               help="URL of the indexer storage API")
 @click.option('--storage-url', '-g', default=None,
               help="URL of the (graph) storage API")
 @click.option('--dry-run/--no-dry-run', is_flag=True,
               default=False,
               help='Default to list only what would be scheduled.')
 @click.pass_context
 def schedule(ctx, scheduler_url, storage_url, indexer_storage_url,
              dry_run):
     """Manipulate indexer tasks via SWH Scheduler's API."""
     ctx.obj['indexer_storage'] = _get_api(
         get_indexer_storage,
         ctx.obj['config'],
         'indexer_storage',
         indexer_storage_url
     )
     ctx.obj['storage'] = _get_api(
         get_storage,
         ctx.obj['config'],
         'storage',
         storage_url
     )
     ctx.obj['scheduler'] = _get_api(
         get_scheduler,
         ctx.obj['config'],
         'scheduler',
         scheduler_url
     )
     if dry_run:
         ctx.obj['scheduler'] = None
 
 
 def list_origins_by_producer(idx_storage, mappings, tool_ids):
     start = 0
     limit = 10000
     while True:
         origins = list(
             idx_storage.origin_intrinsic_metadata_search_by_producer(
                 start=start, limit=limit, ids_only=True,
                 mappings=mappings or None, tool_ids=tool_ids or None))
         if not origins:
             break
         start = origins[-1]+1
         yield from origins
 
 
 @schedule.command('reindex_origin_metadata')
 @click.option('--batch-size', '-b', 'origin_batch_size',
               default=10, show_default=True, type=int,
               help="Number of origins per task")
 @click.option('--tool-id', '-t', 'tool_ids', type=int, multiple=True,
               help="Restrict search of old metadata to this/these tool ids.")
 @click.option('--mapping', '-m', 'mappings', multiple=True,
               help="Mapping(s) that should be re-scheduled (eg. 'npm', "
                    "'gemspec', 'maven')")
 @click.option('--task-type',
               default='indexer_origin_metadata', show_default=True,
               help="Name of the task type to schedule.")
 @click.pass_context
 def schedule_origin_metadata_reindex(
         ctx, origin_batch_size, mappings, tool_ids, task_type):
     """Schedules indexing tasks for origins that were already indexed."""
     idx_storage = ctx.obj['indexer_storage']
     scheduler = ctx.obj['scheduler']
 
     origins = list_origins_by_producer(idx_storage, mappings, tool_ids)
     kwargs = {"policy_update": "update-dups", "parse_ids": False}
     nb_origins = 0
     nb_tasks = 0
 
     while True:
         task_batch = []
         for _ in range(TASK_BATCH_SIZE):
             # Group origins
             origin_batch = []
             for (_, origin) in zip(range(origin_batch_size), origins):
                 origin_batch.append(origin)
             nb_origins += len(origin_batch)
             if not origin_batch:
                 break
 
             # Create a task for these origins
             args = [origin_batch]
             task_dict = create_task_dict(task_type, 'oneshot', *args, **kwargs)
             task_batch.append(task_dict)
 
         # Schedule a batch of tasks
         if not task_batch:
             break
         nb_tasks += len(task_batch)
         if scheduler:
             scheduler.create_tasks(task_batch)
         click.echo('Scheduled %d tasks (%d origins).' % (nb_tasks, nb_origins))
 
     # Print final status.
     if nb_tasks:
         click.echo('Done.')
     else:
         click.echo('Nothing to do (no origin metadata matched the criteria).')
 
 
 @cli.command('api-server')
 @click.argument('config-path', required=1)
 @click.option('--host', default='0.0.0.0', help="Host to run the server")
 @click.option('--port', default=5007, type=click.INT,
               help="Binding port of the server")
 @click.option('--debug/--nodebug', default=True,
               help="Indicates if the server should run in debug mode")
 def api_server(config_path, host, port, debug):
     api_cfg = load_and_check_config(config_path, type='any')
     app.config.update(api_cfg)
     app.run(host, port=int(port), debug=bool(debug))
 
 
 def main():
     return cli(auto_envvar_prefix='SWH_INDEXER')
 
 
 if __name__ == '__main__':
     main()
diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py
index b670b87..7cc316f 100644
--- a/swh/indexer/codemeta.py
+++ b/swh/indexer/codemeta.py
@@ -1,127 +1,129 @@
 # Copyright (C) 2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import csv
 import json
 import os.path
 import re
 
 import swh.indexer
 from pyld import jsonld
 
 _DATA_DIR = os.path.join(os.path.dirname(swh.indexer.__file__), 'data')
 
 CROSSWALK_TABLE_PATH = os.path.join(_DATA_DIR, 'codemeta', 'crosswalk.csv')
 
 CODEMETA_CONTEXT_PATH = os.path.join(_DATA_DIR, 'codemeta', 'codemeta.jsonld')
 
 
 with open(CODEMETA_CONTEXT_PATH) as fd:
     CODEMETA_CONTEXT = json.load(fd)
 
 CODEMETA_CONTEXT_URL = 'https://doi.org/10.5063/schema/codemeta-2.0'
 CODEMETA_ALTERNATE_CONTEXT_URLS = {
     ('https://raw.githubusercontent.com/codemeta/codemeta/'
      'master/codemeta.jsonld')
 }
 CODEMETA_URI = 'https://codemeta.github.io/terms/'
 SCHEMA_URI = 'http://schema.org/'
 
 
 PROPERTY_BLACKLIST = {
     # CodeMeta properties that we cannot properly represent.
     SCHEMA_URI + 'softwareRequirements',
     CODEMETA_URI + 'softwareSuggestions',
 
     # Duplicate of 'author'
     SCHEMA_URI + 'creator',
     }
 
 _codemeta_field_separator = re.compile(r'\s*[,/]\s*')
 
 
 def make_absolute_uri(local_name):
     definition = CODEMETA_CONTEXT['@context'][local_name]
     if isinstance(definition, str):
         return definition
     elif isinstance(definition, dict):
         prefixed_name = definition['@id']
         (prefix, local_name) = prefixed_name.split(':')
         if prefix == 'schema':
             canonical_name = SCHEMA_URI + local_name
         elif prefix == 'codemeta':
             canonical_name = CODEMETA_URI + local_name
         else:
             assert False, prefix
         return canonical_name
     else:
         assert False, definition
 
 
 def _read_crosstable(fd):
     reader = csv.reader(fd)
     try:
         header = next(reader)
     except StopIteration:
         raise ValueError('empty file')
 
     data_sources = set(header) - {'Parent Type', 'Property',
                                   'Type', 'Description'}
     assert 'codemeta-V1' in data_sources
 
     codemeta_translation = {data_source: {} for data_source in data_sources}
+    terms = set()
 
     for line in reader:  # For each canonical name
         local_name = dict(zip(header, line))['Property']
         if not local_name:
             continue
         canonical_name = make_absolute_uri(local_name)
         if canonical_name in PROPERTY_BLACKLIST:
             continue
+        terms.add(canonical_name)
         for (col, value) in zip(header, line):  # For each cell in the row
             if col in data_sources:
                 # If that's not the parentType/property/type/description
                 for local_name in _codemeta_field_separator.split(value):
                     # For each of the data source's properties that maps
                     # to this canonical name
                     if local_name.strip():
                         codemeta_translation[col][local_name.strip()] = \
                                 canonical_name
 
-    return (header, codemeta_translation)
+    return (terms, codemeta_translation)
 
 
 with open(CROSSWALK_TABLE_PATH) as fd:
-    (CODEMETA_KEYS, CROSSWALK_TABLE) = _read_crosstable(fd)
+    (CODEMETA_TERMS, CROSSWALK_TABLE) = _read_crosstable(fd)
 
 
 def _document_loader(url):
     """Document loader for pyld.
 
     Reads the local codemeta.jsonld file instead of fetching it
     from the Internet every single time."""
     if url == CODEMETA_CONTEXT_URL or url in CODEMETA_ALTERNATE_CONTEXT_URLS:
         return {
                 'contextUrl': None,
                 'documentUrl': url,
                 'document': CODEMETA_CONTEXT,
                 }
     elif url == CODEMETA_URI:
         raise Exception('{} is CodeMeta\'s URI, use {} as context url'.format(
             CODEMETA_URI, CODEMETA_CONTEXT_URL))
     else:
         raise Exception(url)
 
 
 def compact(doc):
     """Same as `pyld.jsonld.compact`, but in the context of CodeMeta."""
     return jsonld.compact(doc, CODEMETA_CONTEXT_URL,
                           options={'documentLoader': _document_loader})
 
 
 def expand(doc):
     """Same as `pyld.jsonld.expand`, but in the context of CodeMeta."""
     return jsonld.expand(doc,
                          options={'documentLoader': _document_loader})
diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
index b579ab2..47fb559 100644
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -1,702 +1,729 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 import re
 import abc
 import ast
 import json
 import logging
 import itertools
+import collections
 import email.parser
-import xml.parsers.expat
 import email.policy
+import xml.parsers.expat
 
 import click
 import xmltodict
 
-from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
+from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI, CODEMETA_TERMS
 from swh.indexer.codemeta import compact, expand
 
 
 MAPPINGS = {}
 
 
 def register_mapping(cls):
     MAPPINGS[cls.__name__] = cls
     return cls
 
 
+def list_terms():
+    """Returns a dictionary with all supported CodeMeta terms as keys,
+    and the mappings that support each of them as values."""
+    d = collections.defaultdict(set)
+    for mapping in MAPPINGS.values():
+        for term in mapping.supported_terms():
+            d[term].add(mapping)
+    return d
+
+
 def merge_values(v1, v2):
     """If v1 and v2 are of the form `{"@list": l1}` and `{"@list": l2}`,
     returns `{"@list": l1 + l2}`.
     Otherwise, make them lists (if they are not already) and concatenate
     them.
 
     >>> merge_values('a', 'b')
     ['a', 'b']
     >>> merge_values(['a', 'b'], 'c')
     ['a', 'b', 'c']
     >>> merge_values({'@list': ['a', 'b']}, {'@list': ['c']})
     {'@list': ['a', 'b', 'c']}
     """
     if v1 is None:
         return v2
     elif v2 is None:
         return v1
     elif isinstance(v1, dict) and set(v1) == {'@list'}:
         assert isinstance(v1['@list'], list)
         if isinstance(v2, dict) and set(v2) == {'@list'}:
             assert isinstance(v2['@list'], list)
             return {'@list': v1['@list'] + v2['@list']}
         else:
             raise ValueError('Cannot merge %r and %r' % (v1, v2))
     else:
         if isinstance(v2, dict) and '@list' in v2:
             raise ValueError('Cannot merge %r and %r' % (v1, v2))
         if not isinstance(v1, list):
             v1 = [v1]
         if not isinstance(v2, list):
             v2 = [v2]
         return v1 + v2
 
 
 class BaseMapping(metaclass=abc.ABCMeta):
     """Base class for mappings to inherit from
 
     To implement a new mapping:
 
     - inherit this class
     - override translate function
     """
     def __init__(self, log_suffix=''):
         self.log_suffix = log_suffix
         self.log = logging.getLogger('%s.%s' % (
             self.__class__.__module__,
             self.__class__.__name__))
 
     @property
     @abc.abstractmethod
     def name(self):
         """A name of this mapping, used as an identifier in the
         indexer storage."""
         pass
 
     @classmethod
     @abc.abstractmethod
     def detect_metadata_files(cls, files):
         """
         Detects files potentially containing metadata
 
         Args:
             file_entries (list): list of files
 
         Returns:
             list: list of sha1 (possibly empty)
         """
         pass
 
     @abc.abstractmethod
     def translate(self, file_content):
         pass
 
     def normalize_translation(self, metadata):
         return compact(metadata)
 
 
 class SingleFileMapping(BaseMapping):
     """Base class for all mappings that use a single file as input."""
 
     @property
     @abc.abstractmethod
     def filename(self):
         """The .json file to extract metadata from."""
         pass
 
     @classmethod
     def detect_metadata_files(cls, file_entries):
         for entry in file_entries:
             if entry['name'] == cls.filename:
                 return [entry['sha1']]
         return []
 
 
 class DictMapping(BaseMapping):
     """Base class for mappings that take as input a file that is mostly
     a key-value store (eg. a shallow JSON dict)."""
 
     string_fields = []
     '''List of fields that are simple strings, and don't need any
     normalization.'''
 
     @property
     @abc.abstractmethod
     def mapping(self):
         """A translation dict to map dict keys into a canonical name."""
         pass
 
+    @staticmethod
+    def _normalize_method_name(name):
+        return name.replace('-', '_')
+
+    @classmethod
+    def supported_terms(cls):
+        return {
+            term for (key, term) in cls.mapping.items()
+            if key in cls.string_fields
+            or hasattr(cls, 'translate_' + cls._normalize_method_name(key))
+            or hasattr(cls, 'normalize_' + cls._normalize_method_name(key))}
+
     def _translate_dict(self, content_dict, *, normalize=True):
         """
         Translates content  by parsing content from a dict object
         and translating with the appropriate mapping
 
         Args:
             content_dict (dict): content dict to translate
 
         Returns:
             dict: translated metadata in json-friendly form needed for
             the indexer
 
         """
         translated_metadata = {'@type': SCHEMA_URI + 'SoftwareSourceCode'}
         for k, v in content_dict.items():
             # First, check if there is a specific translation
             # method for this key
             translation_method = getattr(
-                self, 'translate_' + k.replace('-', '_'), None)
+                self, 'translate_' + self._normalize_method_name(k), None)
             if translation_method:
                 translation_method(translated_metadata, v)
             elif k in self.mapping:
                 # if there is no method, but the key is known from the
                 # crosswalk table
                 codemeta_key = self.mapping[k]
 
                 # if there is a normalization method, use it on the value
                 normalization_method = getattr(
-                    self, 'normalize_' + k.replace('-', '_'), None)
+                    self, 'normalize_' + self._normalize_method_name(k), None)
                 if normalization_method:
                     v = normalization_method(v)
                 elif k in self.string_fields and isinstance(v, str):
                     pass
                 elif k in self.string_fields and isinstance(v, list):
                     v = [x for x in v if isinstance(x, str)]
                 else:
                     continue
 
                 # set the translation metadata with the normalized value
                 if codemeta_key in translated_metadata:
                     translated_metadata[codemeta_key] = merge_values(
                         translated_metadata[codemeta_key], v)
                 else:
                     translated_metadata[codemeta_key] = v
         if normalize:
             return self.normalize_translation(translated_metadata)
         else:
             return translated_metadata
 
 
 class JsonMapping(DictMapping, SingleFileMapping):
     """Base class for all mappings that use a JSON file as input."""
 
     def translate(self, raw_content):
         """
         Translates content by parsing content from a bytestring containing
         json data and translating with the appropriate mapping
 
         Args:
             raw_content (bytes): raw content to translate
 
         Returns:
             dict: translated metadata in json-friendly form needed for
             the indexer
 
         """
         try:
             raw_content = raw_content.decode()
         except UnicodeDecodeError:
             self.log.warning('Error unidecoding from %s', self.log_suffix)
             return
         try:
             content_dict = json.loads(raw_content)
         except json.JSONDecodeError:
             self.log.warning('Error unjsoning from %s', self.log_suffix)
             return
         if isinstance(content_dict, dict):
             return self._translate_dict(content_dict)
 
 
 @register_mapping
 class NpmMapping(JsonMapping):
     """
     dedicated class for NPM (package.json) mapping and translation
     """
     name = 'npm'
     mapping = CROSSWALK_TABLE['NodeJS']
     filename = b'package.json'
     string_fields = ['name', 'version', 'homepage', 'description', 'email']
 
     _schema_shortcuts = {
             'github': 'git+https://github.com/%s.git',
             'gist': 'git+https://gist.github.com/%s.git',
             'gitlab': 'git+https://gitlab.com/%s.git',
             # Bitbucket supports both hg and git, and the shortcut does not
             # tell which one to use.
             # 'bitbucket': 'https://bitbucket.org/',
             }
 
     def normalize_repository(self, d):
         """https://docs.npmjs.com/files/package.json#repository
 
         >>> NpmMapping().normalize_repository({
         ...     'type': 'git',
         ...     'url': 'https://example.org/foo.git'
         ... })
         {'@id': 'git+https://example.org/foo.git'}
         >>> NpmMapping().normalize_repository(
         ...     'gitlab:foo/bar')
         {'@id': 'git+https://gitlab.com/foo/bar.git'}
         >>> NpmMapping().normalize_repository(
         ...     'foo/bar')
         {'@id': 'git+https://github.com/foo/bar.git'}
         """
         if isinstance(d, dict) and isinstance(d.get('type'), str) \
                 and isinstance(d.get('url'), str):
             url = '{type}+{url}'.format(**d)
         elif isinstance(d, str):
             if '://' in d:
                 url = d
             elif ':' in d:
                 (schema, rest) = d.split(':', 1)
                 if schema in self._schema_shortcuts:
                     url = self._schema_shortcuts[schema] % rest
                 else:
                     return None
             else:
                 url = self._schema_shortcuts['github'] % d
 
         else:
             return None
 
         return {'@id': url}
 
     def normalize_bugs(self, d):
         """https://docs.npmjs.com/files/package.json#bugs
 
         >>> NpmMapping().normalize_bugs({
         ...     'url': 'https://example.org/bugs/',
         ...     'email': 'bugs@example.org'
         ... })
         {'@id': 'https://example.org/bugs/'}
         >>> NpmMapping().normalize_bugs(
         ...     'https://example.org/bugs/')
         {'@id': 'https://example.org/bugs/'}
         """
         if isinstance(d, dict) and isinstance(d.get('url'), str):
             return {'@id': d['url']}
         elif isinstance(d, str):
             return {'@id': d}
         else:
             return None
 
     _parse_author = re.compile(r'^ *'
                                r'(?P<name>.*?)'
                                r'( +<(?P<email>.*)>)?'
                                r'( +\((?P<url>.*)\))?'
                                r' *$')
 
     def normalize_author(self, d):
         """https://docs.npmjs.com/files/package.json#people-fields-author-contributors'
 
         >>> from pprint import pprint
         >>> pprint(NpmMapping().normalize_author({
         ...     'name': 'John Doe',
         ...     'email': 'john.doe@example.org',
         ...     'url': 'https://example.org/~john.doe',
         ... }))
         {'@list': [{'@type': 'http://schema.org/Person',
                     'http://schema.org/email': 'john.doe@example.org',
                     'http://schema.org/name': 'John Doe',
                     'http://schema.org/url': {'@id': 'https://example.org/~john.doe'}}]}
         >>> pprint(NpmMapping().normalize_author(
         ...     'John Doe <john.doe@example.org> (https://example.org/~john.doe)'
         ... ))
         {'@list': [{'@type': 'http://schema.org/Person',
                     'http://schema.org/email': 'john.doe@example.org',
                     'http://schema.org/name': 'John Doe',
                     'http://schema.org/url': {'@id': 'https://example.org/~john.doe'}}]}
         """ # noqa
         author = {'@type': SCHEMA_URI+'Person'}
         if isinstance(d, dict):
             name = d.get('name', None)
             email = d.get('email', None)
             url = d.get('url', None)
         elif isinstance(d, str):
             match = self._parse_author.match(d)
             name = match.group('name')
             email = match.group('email')
             url = match.group('url')
         else:
             return None
         if name and isinstance(name, str):
             author[SCHEMA_URI+'name'] = name
         if email and isinstance(email, str):
             author[SCHEMA_URI+'email'] = email
         if url and isinstance(url, str):
             author[SCHEMA_URI+'url'] = {'@id': url}
         return {"@list": [author]}
 
     def normalize_license(self, s):
         """https://docs.npmjs.com/files/package.json#license
 
         >>> NpmMapping().normalize_license('MIT')
         {'@id': 'https://spdx.org/licenses/MIT'}
         """
         if isinstance(s, str):
             return {"@id": "https://spdx.org/licenses/" + s}
         else:
             return None
 
     def normalize_homepage(self, s):
         """https://docs.npmjs.com/files/package.json#homepage
 
         >>> NpmMapping().normalize_homepage('https://example.org/~john.doe')
         {'@id': 'https://example.org/~john.doe'}
         """
         if isinstance(s, str):
             return {"@id": s}
 
     def normalize_keywords(self, l):
         """https://docs.npmjs.com/files/package.json#homepage
 
         >>> NpmMapping().normalize_keywords(['foo', 'bar'])
         ['foo', 'bar']
         """
         if isinstance(l, list):
             return [x for x in l if isinstance(x, str)]
 
 
 @register_mapping
 class CodemetaMapping(SingleFileMapping):
     """
     dedicated class for CodeMeta (codemeta.json) mapping and translation
     """
     name = 'codemeta'
     filename = b'codemeta.json'
-    string_fields = ['name', 'version', 'url', 'description', 'email']
+    string_fields = None
+
+    @classmethod
+    def supported_terms(cls):
+        return [term for term in CODEMETA_TERMS if not term.startswith('@')]
 
     def translate(self, content):
         try:
             return self.normalize_translation(expand(
                 json.loads(content.decode())))
         except Exception:
             return None
 
 
 @register_mapping
 class MavenMapping(DictMapping, SingleFileMapping):
     """
     dedicated class for Maven (pom.xml) mapping and translation
     """
     name = 'maven'
     filename = b'pom.xml'
     mapping = CROSSWALK_TABLE['Java (Maven)']
     string_fields = ['name', 'version', 'description', 'email']
 
     def translate(self, content):
         try:
             d = xmltodict.parse(content).get('project') or {}
         except xml.parsers.expat.ExpatError:
             self.log.warning('Error parsing XML from %s', self.log_suffix)
             return None
         except UnicodeDecodeError:
             self.log.warning('Error unidecoding XML from %s', self.log_suffix)
             return None
         except (LookupError, ValueError):
             # unknown encoding or multi-byte encoding
             self.log.warning('Error detecting XML encoding from %s',
                              self.log_suffix)
             return None
         metadata = self._translate_dict(d, normalize=False)
         metadata[SCHEMA_URI+'codeRepository'] = self.parse_repositories(d)
         metadata[SCHEMA_URI+'license'] = self.parse_licenses(d)
         return self.normalize_translation(metadata)
 
     _default_repository = {'url': 'https://repo.maven.apache.org/maven2/'}
 
     def parse_repositories(self, d):
         """https://maven.apache.org/pom.html#Repositories
 
         >>> import xmltodict
         >>> from pprint import pprint
         >>> d = xmltodict.parse('''
         ... <repositories>
         ...   <repository>
         ...     <id>codehausSnapshots</id>
         ...     <name>Codehaus Snapshots</name>
         ...     <url>http://snapshots.maven.codehaus.org/maven2</url>
         ...     <layout>default</layout>
         ...   </repository>
         ... </repositories>
         ... ''')
         >>> MavenMapping().parse_repositories(d)
         """
         repositories = d.get('repositories')
         if not repositories:
             results = [self.parse_repository(d, self._default_repository)]
         elif isinstance(repositories, dict):
             repositories = repositories.get('repository') or []
             if not isinstance(repositories, list):
                 repositories = [repositories]
             results = [self.parse_repository(d, repo)
                        for repo in repositories]
         else:
             results = []
         return [res for res in results if res] or None
 
     def parse_repository(self, d, repo):
         if not isinstance(repo, dict):
             return
         if repo.get('layout', 'default') != 'default':
             return  # TODO ?
         url = repo.get('url')
         group_id = d.get('groupId')
         artifact_id = d.get('artifactId')
         if (isinstance(url, str) and isinstance(group_id, str)
                 and isinstance(artifact_id, str)):
             repo = os.path.join(url, *group_id.split('.'), artifact_id)
             return {"@id": repo}
 
     def normalize_groupId(self, id_):
         """https://maven.apache.org/pom.html#Maven_Coordinates
 
         >>> MavenMapping().normalize_groupId('org.example')
         {'@id': 'org.example'}
         """
         if isinstance(id_, str):
             return {"@id": id_}
 
     def parse_licenses(self, d):
         """https://maven.apache.org/pom.html#Licenses
 
         >>> import xmltodict
         >>> import json
         >>> d = xmltodict.parse('''
         ... <licenses>
         ...   <license>
         ...     <name>Apache License, Version 2.0</name>
         ...     <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
         ...   </license>
         ... </licenses>
         ... ''')
         >>> print(json.dumps(d, indent=4))
         {
             "licenses": {
                 "license": {
                     "name": "Apache License, Version 2.0",
                     "url": "https://www.apache.org/licenses/LICENSE-2.0.txt"
                 }
             }
         }
         >>> MavenMapping().parse_licenses(d)
         [{'@id': 'https://www.apache.org/licenses/LICENSE-2.0.txt'}]
 
         or, if there are more than one license:
 
         >>> import xmltodict
         >>> from pprint import pprint
         >>> d = xmltodict.parse('''
         ... <licenses>
         ...   <license>
         ...     <name>Apache License, Version 2.0</name>
         ...     <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
         ...   </license>
         ...   <license>
         ...     <name>MIT License</name>
         ...     <url>https://opensource.org/licenses/MIT</url>
         ...   </license>
         ... </licenses>
         ... ''')
         >>> pprint(MavenMapping().parse_licenses(d))
         [{'@id': 'https://www.apache.org/licenses/LICENSE-2.0.txt'},
          {'@id': 'https://opensource.org/licenses/MIT'}]
         """
 
         licenses = d.get('licenses')
         if not isinstance(licenses, dict):
             return
         licenses = licenses.get('license')
         if isinstance(licenses, dict):
             licenses = [licenses]
         elif not isinstance(licenses, list):
             return
         return [{"@id": license['url']}
                 for license in licenses
                 if isinstance(license, dict)
                 and isinstance(license.get('url'), str)] or None
 
 
 _normalize_pkginfo_key = str.lower
 
 
 class LinebreakPreservingEmailPolicy(email.policy.EmailPolicy):
     def header_fetch_parse(self, name, value):
         if hasattr(value, 'name'):
             return value
         value = value.replace('\n        ', '\n')
         return self.header_factory(name, value)
 
 
 @register_mapping
 class PythonPkginfoMapping(DictMapping, SingleFileMapping):
     """Dedicated class for Python's PKG-INFO mapping and translation.
 
     https://www.python.org/dev/peps/pep-0314/"""
     name = 'pkg-info'
     filename = b'PKG-INFO'
     mapping = {_normalize_pkginfo_key(k): v
                for (k, v) in CROSSWALK_TABLE['Python PKG-INFO'].items()}
     string_fields = ['name', 'version', 'description', 'summary',
                      'author', 'author-email']
 
     _parser = email.parser.BytesHeaderParser(
         policy=LinebreakPreservingEmailPolicy())
 
     def translate(self, content):
         msg = self._parser.parsebytes(content)
         d = {}
         for (key, value) in msg.items():
             key = _normalize_pkginfo_key(key)
             if value != 'UNKNOWN':
                 d.setdefault(key, []).append(value)
         metadata = self._translate_dict(d, normalize=False)
         if SCHEMA_URI+'author' in metadata or SCHEMA_URI+'email' in metadata:
             metadata[SCHEMA_URI+'author'] = {
                 '@list': [{
                     '@type': SCHEMA_URI+'Person',
                     SCHEMA_URI+'name':
                         metadata.pop(SCHEMA_URI+'author', [None])[0],
                     SCHEMA_URI+'email':
                         metadata.pop(SCHEMA_URI+'email', [None])[0],
                 }]
             }
         return self.normalize_translation(metadata)
 
     def normalize_home_page(self, urls):
         return [{'@id': url} for url in urls]
 
     def normalize_license(self, licenses):
         return [{'@id': license} for license in licenses]
 
 
 @register_mapping
 class GemspecMapping(DictMapping):
     name = 'gemspec'
     mapping = CROSSWALK_TABLE['Ruby Gem']
     string_fields = ['name', 'version', 'description', 'summary', 'email']
 
     _re_spec_new = re.compile(r'.*Gem::Specification.new +(do|\{) +\|.*\|.*')
     _re_spec_entry = re.compile(r'\s*\w+\.(?P<key>\w+)\s*=\s*(?P<expr>.*)')
 
     @classmethod
     def detect_metadata_files(cls, file_entries):
         for entry in file_entries:
             if entry['name'].endswith(b'.gemspec'):
                 return [entry['sha1']]
         return []
 
     def translate(self, raw_content):
         try:
             raw_content = raw_content.decode()
         except UnicodeDecodeError:
             self.log.warning('Error unidecoding from %s', self.log_suffix)
             return
 
         # Skip lines before 'Gem::Specification.new'
         lines = itertools.dropwhile(
             lambda x: not self._re_spec_new.match(x),
             raw_content.split('\n'))
 
         try:
             next(lines)  # Consume 'Gem::Specification.new'
         except StopIteration:
             self.log.warning('Could not find Gem::Specification in %s',
                              self.log_suffix)
             return
 
         content_dict = {}
         for line in lines:
             match = self._re_spec_entry.match(line)
             if match:
                 value = self.eval_ruby_expression(match.group('expr'))
                 if value:
                     content_dict[match.group('key')] = value
         return self._translate_dict(content_dict)
 
     def eval_ruby_expression(self, expr):
         """Very simple evaluator of Ruby expressions.
 
         >>> GemspecMapping().eval_ruby_expression('"Foo bar"')
         'Foo bar'
         >>> GemspecMapping().eval_ruby_expression("'Foo bar'")
         'Foo bar'
         >>> GemspecMapping().eval_ruby_expression("['Foo', 'bar']")
         ['Foo', 'bar']
         >>> GemspecMapping().eval_ruby_expression("'Foo bar'.freeze")
         'Foo bar'
         >>> GemspecMapping().eval_ruby_expression( \
                 "['Foo'.freeze, 'bar'.freeze]")
         ['Foo', 'bar']
         """
         def evaluator(node):
             if isinstance(node, ast.Str):
                 return node.s
             elif isinstance(node, ast.List):
                 res = []
                 for element in node.elts:
                     val = evaluator(element)
                     if not val:
                         return
                     res.append(val)
                 return res
 
         expr = expr.replace('.freeze', '')
         try:
             # We're parsing Ruby expressions here, but Python's
             # ast.parse works for very simple Ruby expressions
             # (mainly strings delimited with " or ', and lists
             # of such strings).
             tree = ast.parse(expr, mode='eval')
         except (SyntaxError, ValueError):
             return
         if isinstance(tree, ast.Expression):
             return evaluator(tree.body)
 
     def normalize_homepage(self, s):
         if isinstance(s, str):
             return {"@id": s}
 
     def normalize_license(self, s):
         if isinstance(s, str):
             return [{"@id": "https://spdx.org/licenses/" + s}]
 
     def normalize_licenses(self, licenses):
         if isinstance(licenses, list):
             return [{"@id": "https://spdx.org/licenses/" + license}
                     for license in licenses
                     if isinstance(license, str)]
 
     def normalize_author(self, author):
         if isinstance(author, str):
             return {"@list": [author]}
 
     def normalize_authors(self, authors):
         if isinstance(authors, list):
             return {"@list": [author for author in authors
                               if isinstance(author, str)]}
 
 
 @click.command()
 @click.argument('mapping_name')
 @click.argument('file_name')
 def main(mapping_name, file_name):
     from pprint import pprint
     with open(file_name, 'rb') as fd:
         file_content = fd.read()
     res = MAPPINGS[mapping_name]().translate(file_content)
     pprint(res)
 
 
 if __name__ == '__main__':
     main()
diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py
index d14b186..6a9a31e 100644
--- a/swh/indexer/tests/test_cli.py
+++ b/swh/indexer/tests/test_cli.py
@@ -1,289 +1,315 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from functools import reduce
+import re
 import tempfile
 from unittest.mock import patch
 
 from click.testing import CliRunner
 
 from swh.model.hashutil import hash_to_bytes
 
 from swh.indexer.cli import cli
 
 
 CLI_CONFIG = '''
 scheduler:
     cls: foo
     args: {}
 storage:
     cls: memory
     args: {}
 indexer_storage:
     cls: memory
     args: {}
 '''
 
 
 def fill_idx_storage(idx_storage, nb_rows):
     tools = [
         {
             'tool_name': 'tool %d' % i,
             'tool_version': '0.0.1',
             'tool_configuration': {},
         }
         for i in range(2)
     ]
     tools = idx_storage.indexer_configuration_add(tools)
 
     origin_metadata = [
         {
             'origin_id': origin_id,
             'from_revision': hash_to_bytes('abcd{:0>4}'.format(origin_id)),
             'indexer_configuration_id': tools[origin_id % 2]['id'],
             'metadata': {'name': 'origin %d' % origin_id},
             'mappings': ['mapping%d' % (origin_id % 10)]
         }
         for origin_id in range(nb_rows)
     ]
     revision_metadata = [
         {
             'id': hash_to_bytes('abcd{:0>4}'.format(origin_id)),
             'indexer_configuration_id': tools[origin_id % 2]['id'],
             'metadata': {'name': 'origin %d' % origin_id},
             'mappings': ['mapping%d' % (origin_id % 10)]
         }
         for origin_id in range(nb_rows)
     ]
 
     idx_storage.revision_metadata_add(revision_metadata)
     idx_storage.origin_intrinsic_metadata_add(origin_metadata)
 
     return [tool['id'] for tool in tools]
 
 
 def _origins_in_task_args(tasks):
     """Returns the set of origins contained in the arguments of the
     provided tasks (assumed to be of type indexer_origin_metadata)."""
     return reduce(
         set.union,
         (set(task['arguments']['args'][0]) for task in tasks),
         set()
     )
 
 
 def _assert_tasks_for_origins(tasks, origins):
     expected_kwargs = {"policy_update": "update-dups", "parse_ids": False}
     assert {task['type'] for task in tasks} == {'indexer_origin_metadata'}
     assert all(len(task['arguments']['args']) == 1 for task in tasks)
     assert all(task['arguments']['kwargs'] == expected_kwargs
                for task in tasks)
     assert _origins_in_task_args(tasks) == set(origins)
 
 
 def invoke(scheduler, catch_exceptions, args):
     runner = CliRunner()
     with patch('swh.indexer.cli.get_scheduler') as get_scheduler_mock, \
             tempfile.NamedTemporaryFile('a', suffix='.yml') as config_fd:
         config_fd.write(CLI_CONFIG)
         config_fd.seek(0)
         get_scheduler_mock.return_value = scheduler
         result = runner.invoke(cli, ['-C' + config_fd.name] + args)
     if not catch_exceptions and result.exception:
         print(result.output)
         raise result.exception
     return result
 
 
 def test_mapping_list(indexer_scheduler):
     result = invoke(indexer_scheduler, False, [
         'mapping', 'list',
     ])
     expected_output = '\n'.join([
         'codemeta', 'gemspec', 'maven', 'npm', 'pkg-info', '',
     ])
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
 
+def test_mapping_list_terms(indexer_scheduler):
+    result = invoke(indexer_scheduler, False, [
+        'mapping', 'list-terms',
+    ])
+    assert result.exit_code == 0, result.output
+    assert re.search(r'http://schema.org/url:\n.*npm', result.output)
+    assert re.search(r'http://schema.org/url:\n.*codemeta', result.output)
+    assert re.search(
+        r'https://codemeta.github.io/terms/developmentStatus:\n\tcodemeta',
+        result.output)
+
+
+def test_mapping_list_terms_exclude(indexer_scheduler):
+    result = invoke(indexer_scheduler, False, [
+        'mapping', 'list-terms',
+        '--exclude-mapping', 'codemeta'
+    ])
+    assert result.exit_code == 0, result.output
+    assert re.search(r'http://schema.org/url:\n.*npm', result.output)
+    assert not re.search(r'http://schema.org/url:\n.*codemeta', result.output)
+    assert not re.search(
+        r'https://codemeta.github.io/terms/developmentStatus:\n\tcodemeta',
+        result.output)
+
+
 @patch('swh.indexer.cli.TASK_BATCH_SIZE', 3)
 def test_origin_metadata_reindex_empty_db(
         indexer_scheduler, idx_storage, storage):
     result = invoke(indexer_scheduler, False, [
         'schedule', 'reindex_origin_metadata',
     ])
     expected_output = (
         'Nothing to do (no origin metadata matched the criteria).\n'
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 0
 
 
 @patch('swh.indexer.cli.TASK_BATCH_SIZE', 3)
 def test_origin_metadata_reindex_divisor(
         indexer_scheduler, idx_storage, storage):
     """Tests the re-indexing when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     fill_idx_storage(idx_storage, 90)
 
     result = invoke(indexer_scheduler, False, [
         'schedule', 'reindex_origin_metadata',
     ])
 
     # Check the output
     expected_output = (
         'Scheduled 3 tasks (30 origins).\n'
         'Scheduled 6 tasks (60 origins).\n'
         'Scheduled 9 tasks (90 origins).\n'
         'Done.\n'
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 9
     _assert_tasks_for_origins(tasks, range(90))
 
 
 @patch('swh.indexer.cli.TASK_BATCH_SIZE', 3)
 def test_origin_metadata_reindex_dry_run(
         indexer_scheduler, idx_storage, storage):
     """Tests the re-indexing when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     fill_idx_storage(idx_storage, 90)
 
     result = invoke(indexer_scheduler, False, [
         'schedule', '--dry-run', 'reindex_origin_metadata',
     ])
 
     # Check the output
     expected_output = (
         'Scheduled 3 tasks (30 origins).\n'
         'Scheduled 6 tasks (60 origins).\n'
         'Scheduled 9 tasks (90 origins).\n'
         'Done.\n'
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 0
 
 
 @patch('swh.indexer.cli.TASK_BATCH_SIZE', 3)
 def test_origin_metadata_reindex_nondivisor(
         indexer_scheduler, idx_storage, storage):
     """Tests the re-indexing when neither origin_batch_size or
     task_batch_size is a divisor of nb_origins."""
     fill_idx_storage(idx_storage, 70)
 
     result = invoke(indexer_scheduler, False, [
         'schedule', 'reindex_origin_metadata',
         '--batch-size', '20',
     ])
 
     # Check the output
     expected_output = (
         'Scheduled 3 tasks (60 origins).\n'
         'Scheduled 4 tasks (70 origins).\n'
         'Done.\n'
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 4
     _assert_tasks_for_origins(tasks, range(70))
 
 
 @patch('swh.indexer.cli.TASK_BATCH_SIZE', 3)
 def test_origin_metadata_reindex_filter_one_mapping(
         indexer_scheduler, idx_storage, storage):
     """Tests the re-indexing when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     fill_idx_storage(idx_storage, 110)
 
     result = invoke(indexer_scheduler, False, [
         'schedule', 'reindex_origin_metadata',
         '--mapping', 'mapping1',
     ])
 
     # Check the output
     expected_output = (
         'Scheduled 2 tasks (11 origins).\n'
         'Done.\n'
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 2
     _assert_tasks_for_origins(
         tasks,
         [1, 11, 21, 31, 41, 51, 61, 71, 81, 91, 101])
 
 
 @patch('swh.indexer.cli.TASK_BATCH_SIZE', 3)
 def test_origin_metadata_reindex_filter_two_mappings(
         indexer_scheduler, idx_storage, storage):
     """Tests the re-indexing when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     fill_idx_storage(idx_storage, 110)
 
     result = invoke(indexer_scheduler, False, [
         'schedule', 'reindex_origin_metadata',
         '--mapping', 'mapping1', '--mapping', 'mapping2',
     ])
 
     # Check the output
     expected_output = (
         'Scheduled 3 tasks (22 origins).\n'
         'Done.\n'
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 3
     _assert_tasks_for_origins(
         tasks,
         [1, 11, 21, 31, 41, 51, 61, 71, 81, 91, 101,
          2, 12, 22, 32, 42, 52, 62, 72, 82, 92, 102])
 
 
 @patch('swh.indexer.cli.TASK_BATCH_SIZE', 3)
 def test_origin_metadata_reindex_filter_one_tool(
         indexer_scheduler, idx_storage, storage):
     """Tests the re-indexing when origin_batch_size*task_batch_size is a
     divisor of nb_origins."""
     tool_ids = fill_idx_storage(idx_storage, 110)
 
     result = invoke(indexer_scheduler, False, [
         'schedule', 'reindex_origin_metadata',
         '--tool-id', str(tool_ids[0]),
     ])
 
     # Check the output
     expected_output = (
         'Scheduled 3 tasks (30 origins).\n'
         'Scheduled 6 tasks (55 origins).\n'
         'Done.\n'
     )
     assert result.exit_code == 0, result.output
     assert result.output == expected_output
 
     # Check scheduled tasks
     tasks = indexer_scheduler.search_tasks()
     assert len(tasks) == 6
     _assert_tasks_for_origins(
         tasks,
         [x*2 for x in range(55)])
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
index cba4675..902a483 100644
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -1,1136 +1,1136 @@
 # Copyright (C) 2017-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 import unittest
 
 from hypothesis import given, strategies, settings, HealthCheck
 import xmltodict
 
 from swh.model.hashutil import hash_to_bytes
 
-from swh.indexer.codemeta import CODEMETA_KEYS
+from swh.indexer.codemeta import CODEMETA_TERMS
 from swh.indexer.metadata_dictionary import (
     CROSSWALK_TABLE, MAPPINGS, merge_values)
 from swh.indexer.metadata_detector import (
     detect_metadata, extract_minimal_metadata_dict
 )
 from swh.indexer.metadata import (
     ContentMetadataIndexer, RevisionMetadataIndexer
 )
 
 from .utils import (
     BASE_TEST_CONFIG, fill_obj_storage, fill_storage,
     YARN_PARSER_METADATA, json_document_strategy
 )
 
 
 TRANSLATOR_TOOL = {
     'name': 'swh-metadata-translator',
     'version': '0.0.2',
     'configuration': {
         'type': 'local',
         'context': 'NpmMapping'
     }
 }
 
 
 class ContentMetadataTestIndexer(ContentMetadataIndexer):
     """Specific Metadata whose configuration is enough to satisfy the
        indexing tests.
     """
     def parse_config_file(self, *args, **kwargs):
         assert False, 'should not be called; the rev indexer configures it.'
 
 
 REVISION_METADATA_CONFIG = {
     **BASE_TEST_CONFIG,
     'tools': TRANSLATOR_TOOL,
 }
 
 
 class Metadata(unittest.TestCase):
     """
     Tests metadata_mock_tool tool for Metadata detection
     """
     def setUp(self):
         """
         shows the entire diff in the results
         """
         self.maxDiff = None
         self.npm_mapping = MAPPINGS['NpmMapping']()
         self.codemeta_mapping = MAPPINGS['CodemetaMapping']()
         self.maven_mapping = MAPPINGS['MavenMapping']()
         self.pkginfo_mapping = MAPPINGS['PythonPkginfoMapping']()
         self.gemspec_mapping = MAPPINGS['GemspecMapping']()
 
     def test_crosstable(self):
         self.assertEqual(CROSSWALK_TABLE['NodeJS'], {
             'repository': 'http://schema.org/codeRepository',
             'os': 'http://schema.org/operatingSystem',
             'cpu': 'http://schema.org/processorRequirements',
             'engines':
                 'http://schema.org/processorRequirements',
             'author': 'http://schema.org/author',
             'author.email': 'http://schema.org/email',
             'author.name': 'http://schema.org/name',
             'contributor': 'http://schema.org/contributor',
             'keywords': 'http://schema.org/keywords',
             'license': 'http://schema.org/license',
             'version': 'http://schema.org/version',
             'description': 'http://schema.org/description',
             'name': 'http://schema.org/name',
             'bugs': 'https://codemeta.github.io/terms/issueTracker',
             'homepage': 'http://schema.org/url'
         })
 
     def test_merge_values(self):
         self.assertEqual(
             merge_values('a', 'b'),
             ['a', 'b'])
         self.assertEqual(
             merge_values(['a', 'b'], 'c'),
             ['a', 'b', 'c'])
         self.assertEqual(
             merge_values('a', ['b', 'c']),
             ['a', 'b', 'c'])
 
         self.assertEqual(
             merge_values({'@list': ['a']}, {'@list': ['b']}),
             {'@list': ['a', 'b']})
         self.assertEqual(
             merge_values({'@list': ['a', 'b']}, {'@list': ['c']}),
             {'@list': ['a', 'b', 'c']})
 
         with self.assertRaises(ValueError):
             merge_values({'@list': ['a']}, 'b')
         with self.assertRaises(ValueError):
             merge_values('a', {'@list': ['b']})
         with self.assertRaises(ValueError):
             merge_values({'@list': ['a']}, ['b'])
         with self.assertRaises(ValueError):
             merge_values(['a'], {'@list': ['b']})
 
         self.assertEqual(
             merge_values('a', None),
             'a')
         self.assertEqual(
             merge_values(['a', 'b'], None),
             ['a', 'b'])
         self.assertEqual(
             merge_values(None, ['b', 'c']),
             ['b', 'c'])
         self.assertEqual(
             merge_values({'@list': ['a']}, None),
             {'@list': ['a']})
         self.assertEqual(
             merge_values(None, {'@list': ['a']}),
             {'@list': ['a']})
 
     def test_compute_metadata_none(self):
         """
         testing content empty content is empty
         should return None
         """
         # given
         content = b""
 
         # None if no metadata was found or an error occurred
         declared_metadata = None
         # when
         result = self.npm_mapping.translate(content)
         # then
         self.assertEqual(declared_metadata, result)
 
     def test_compute_metadata_npm(self):
         """
         testing only computation of metadata with hard_mapping_npm
         """
         # given
         content = b"""
             {
                 "name": "test_metadata",
                 "version": "0.0.2",
                 "description": "Simple package.json test for indexer",
                   "repository": {
                     "type": "git",
                     "url": "https://github.com/moranegg/metadata_test"
                 },
                 "author": {
                     "email": "moranegg@example.com",
                     "name": "Morane G"
                 }
             }
         """
         declared_metadata = {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
             'name': 'test_metadata',
             'version': '0.0.2',
             'description': 'Simple package.json test for indexer',
             'codeRepository':
                 'git+https://github.com/moranegg/metadata_test',
             'author': [{
                 'type': 'Person',
                 'name': 'Morane G',
                 'email': 'moranegg@example.com',
             }],
         }
 
         # when
         result = self.npm_mapping.translate(content)
         # then
         self.assertEqual(declared_metadata, result)
 
     def test_extract_minimal_metadata_dict(self):
         """
         Test the creation of a coherent minimal metadata set
         """
         # given
         metadata_list = [{
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'name': 'test_1',
             'version': '0.0.2',
             'description': 'Simple package.json test for indexer',
             'codeRepository':
                 'git+https://github.com/moranegg/metadata_test',
         }, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'name': 'test_0_1',
             'version': '0.0.2',
             'description': 'Simple package.json test for indexer',
             'codeRepository':
                 'git+https://github.com/moranegg/metadata_test'
         }, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'name': 'test_metadata',
             'version': '0.0.2',
             'author': 'moranegg',
         }]
 
         # when
         results = extract_minimal_metadata_dict(metadata_list)
 
         # then
         expected_results = {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             "version": '0.0.2',
             "description": 'Simple package.json test for indexer',
             "name": ['test_1', 'test_0_1', 'test_metadata'],
             "author": ['moranegg'],
             "codeRepository":
                 'git+https://github.com/moranegg/metadata_test',
         }
         self.assertEqual(expected_results, results)
 
     def test_index_content_metadata_npm(self):
         """
         testing NPM with package.json
         - one sha1 uses a file that can't be translated to metadata and
           should return None in the translated metadata
         """
         # given
         sha1s = [
             hash_to_bytes('26a9f72a7c87cc9205725cfd879f514ff4f3d8d5'),
             hash_to_bytes('d4c647f0fc257591cc9ba1722484229780d1c607'),
             hash_to_bytes('02fb2c89e14f7fab46701478c83779c7beb7b069'),
         ]
         # this metadata indexer computes only metadata for package.json
         # in npm context with a hard mapping
         config = BASE_TEST_CONFIG.copy()
         config['tools'] = [TRANSLATOR_TOOL]
         metadata_indexer = ContentMetadataTestIndexer(config=config)
         fill_obj_storage(metadata_indexer.objstorage)
         fill_storage(metadata_indexer.storage)
 
         # when
         metadata_indexer.run(sha1s, policy_update='ignore-dups')
         results = list(metadata_indexer.idx_storage.content_metadata_get(
             sha1s))
 
         expected_results = [{
             'translated_metadata': {
                 '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
                 'type': 'SoftwareSourceCode',
                 'codeRepository':
                     'git+https://github.com/moranegg/metadata_test',
                 'description': 'Simple package.json test for indexer',
                 'name': 'test_metadata',
                 'version': '0.0.1'
             },
             'id': hash_to_bytes('26a9f72a7c87cc9205725cfd879f514ff4f3d8d5'),
             }, {
             'translated_metadata': {
                 '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
                 'type': 'SoftwareSourceCode',
                 'issueTracker':
                     'https://github.com/npm/npm/issues',
                 'author': [{
                     'type': 'Person',
                     'name': 'Isaac Z. Schlueter',
                     'email': 'i@izs.me',
                     'url': 'http://blog.izs.me',
                 }],
                 'codeRepository':
                     'git+https://github.com/npm/npm',
                 'description': 'a package manager for JavaScript',
                 'license': 'https://spdx.org/licenses/Artistic-2.0',
                 'version': '5.0.3',
                 'name': 'npm',
                 'keywords': [
                     'install',
                     'modules',
                     'package manager',
                     'package.json'
                 ],
                 'url': 'https://docs.npmjs.com/'
             },
             'id': hash_to_bytes('d4c647f0fc257591cc9ba1722484229780d1c607')
         }]
 
         for result in results:
             del result['tool']
 
         # The assertion below returns False sometimes because of nested lists
         self.assertEqual(expected_results, results)
 
     def test_npm_bugs_normalization(self):
         # valid dictionary
         package_json = b"""{
             "name": "foo",
             "bugs": {
                 "url": "https://github.com/owner/project/issues",
                 "email": "foo@example.com"
             }
         }"""
         result = self.npm_mapping.translate(package_json)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'name': 'foo',
             'issueTracker': 'https://github.com/owner/project/issues',
             'type': 'SoftwareSourceCode',
         })
 
         # "invalid" dictionary
         package_json = b"""{
             "name": "foo",
             "bugs": {
                 "email": "foo@example.com"
             }
         }"""
         result = self.npm_mapping.translate(package_json)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'name': 'foo',
             'type': 'SoftwareSourceCode',
         })
 
         # string
         package_json = b"""{
             "name": "foo",
             "bugs": "https://github.com/owner/project/issues"
         }"""
         result = self.npm_mapping.translate(package_json)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'name': 'foo',
             'issueTracker': 'https://github.com/owner/project/issues',
             'type': 'SoftwareSourceCode',
         })
 
     def test_npm_repository_normalization(self):
         # normal
         package_json = b"""{
             "name": "foo",
             "repository": {
                 "type" : "git",
                 "url" : "https://github.com/npm/cli.git"
             }
         }"""
         result = self.npm_mapping.translate(package_json)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'name': 'foo',
             'codeRepository': 'git+https://github.com/npm/cli.git',
             'type': 'SoftwareSourceCode',
         })
 
         # missing url
         package_json = b"""{
             "name": "foo",
             "repository": {
                 "type" : "git"
             }
         }"""
         result = self.npm_mapping.translate(package_json)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'name': 'foo',
             'type': 'SoftwareSourceCode',
         })
 
         # github shortcut
         package_json = b"""{
             "name": "foo",
             "repository": "github:npm/cli"
         }"""
         result = self.npm_mapping.translate(package_json)
         expected_result = {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'name': 'foo',
             'codeRepository': 'git+https://github.com/npm/cli.git',
             'type': 'SoftwareSourceCode',
         }
         self.assertEqual(result, expected_result)
 
         # github shortshortcut
         package_json = b"""{
             "name": "foo",
             "repository": "npm/cli"
         }"""
         result = self.npm_mapping.translate(package_json)
         self.assertEqual(result, expected_result)
 
         # gitlab shortcut
         package_json = b"""{
             "name": "foo",
             "repository": "gitlab:user/repo"
         }"""
         result = self.npm_mapping.translate(package_json)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'name': 'foo',
             'codeRepository': 'git+https://gitlab.com/user/repo.git',
             'type': 'SoftwareSourceCode',
         })
 
     def test_detect_metadata_package_json(self):
         # given
         df = [{
                 'sha1_git': b'abc',
                 'name': b'index.js',
                 'target': b'abc',
                 'length': 897,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'dir_a',
                 'sha1': b'bcd'
             },
             {
                 'sha1_git': b'aab',
                 'name': b'package.json',
                 'target': b'aab',
                 'length': 712,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'dir_a',
                 'sha1': b'cde'
         }]
         # when
         results = detect_metadata(df)
 
         expected_results = {
             'NpmMapping': [
                 b'cde'
             ]
         }
         # then
         self.assertEqual(expected_results, results)
 
     def test_compute_metadata_valid_codemeta(self):
         raw_content = (
             b"""{
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "@type": "SoftwareSourceCode",
             "identifier": "CodeMeta",
             "description": "CodeMeta is a concept vocabulary that can be used to standardize the exchange of software metadata across repositories and organizations.",
             "name": "CodeMeta: Minimal metadata schemas for science software and code, in JSON-LD",
             "codeRepository": "https://github.com/codemeta/codemeta",
             "issueTracker": "https://github.com/codemeta/codemeta/issues",
             "license": "https://spdx.org/licenses/Apache-2.0",
             "version": "2.0",
             "author": [
               {
                 "@type": "Person",
                 "givenName": "Carl",
                 "familyName": "Boettiger",
                 "email": "cboettig@gmail.com",
                 "@id": "http://orcid.org/0000-0002-1642-628X"
               },
               {
                 "@type": "Person",
                 "givenName": "Matthew B.",
                 "familyName": "Jones",
                 "email": "jones@nceas.ucsb.edu",
                 "@id": "http://orcid.org/0000-0003-0077-4738"
               }
             ],
             "maintainer": {
               "@type": "Person",
               "givenName": "Carl",
               "familyName": "Boettiger",
               "email": "cboettig@gmail.com",
               "@id": "http://orcid.org/0000-0002-1642-628X"
             },
             "contIntegration": "https://travis-ci.org/codemeta/codemeta",
             "developmentStatus": "active",
             "downloadUrl": "https://github.com/codemeta/codemeta/archive/2.0.zip",
             "funder": {
                 "@id": "https://doi.org/10.13039/100000001",
                 "@type": "Organization",
                 "name": "National Science Foundation"
             },
             "funding":"1549758; Codemeta: A Rosetta Stone for Metadata in Scientific Software",
             "keywords": [
               "metadata",
               "software"
             ],
             "version":"2.0",
             "dateCreated":"2017-06-05",
             "datePublished":"2017-06-05",
             "programmingLanguage": "JSON-LD"
           }""") # noqa
         expected_result = {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "identifier": "CodeMeta",
             "description":
                 "CodeMeta is a concept vocabulary that can "
                 "be used to standardize the exchange of software metadata "
                 "across repositories and organizations.",
             "name":
                 "CodeMeta: Minimal metadata schemas for science "
                 "software and code, in JSON-LD",
             "codeRepository": "https://github.com/codemeta/codemeta",
             "issueTracker": "https://github.com/codemeta/codemeta/issues",
             "license": "https://spdx.org/licenses/Apache-2.0",
             "version": "2.0",
             "author": [
               {
                 "type": "Person",
                 "givenName": "Carl",
                 "familyName": "Boettiger",
                 "email": "cboettig@gmail.com",
                 "id": "http://orcid.org/0000-0002-1642-628X"
               },
               {
                 "type": "Person",
                 "givenName": "Matthew B.",
                 "familyName": "Jones",
                 "email": "jones@nceas.ucsb.edu",
                 "id": "http://orcid.org/0000-0003-0077-4738"
               }
             ],
             "maintainer": {
               "type": "Person",
               "givenName": "Carl",
               "familyName": "Boettiger",
               "email": "cboettig@gmail.com",
               "id": "http://orcid.org/0000-0002-1642-628X"
             },
             "contIntegration": "https://travis-ci.org/codemeta/codemeta",
             "developmentStatus": "active",
             "downloadUrl":
                 "https://github.com/codemeta/codemeta/archive/2.0.zip",
             "funder": {
                 "id": "https://doi.org/10.13039/100000001",
                 "type": "Organization",
                 "name": "National Science Foundation"
             },
             "funding": "1549758; Codemeta: A Rosetta Stone for Metadata "
                 "in Scientific Software",
             "keywords": [
               "metadata",
               "software"
             ],
             "version": "2.0",
             "dateCreated": "2017-06-05",
             "datePublished": "2017-06-05",
             "programmingLanguage": "JSON-LD"
           }
         result = self.codemeta_mapping.translate(raw_content)
         self.assertEqual(result, expected_result)
 
     def test_compute_metadata_codemeta_alternate_context(self):
         raw_content = (
             b"""{
             "@context": "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld",
             "@type": "SoftwareSourceCode",
             "identifier": "CodeMeta"
         }""")  # noqa
         expected_result = {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "identifier": "CodeMeta",
         }
         result = self.codemeta_mapping.translate(raw_content)
         self.assertEqual(result, expected_result)
 
     def test_compute_metadata_maven(self):
         raw_content = b"""
         <project>
           <name>Maven Default Project</name>
           <modelVersion>4.0.0</modelVersion>
           <groupId>com.mycompany.app</groupId>
           <artifactId>my-app</artifactId>
           <version>1.2.3</version>
           <repositories>
             <repository>
               <id>central</id>
               <name>Maven Repository Switchboard</name>
               <layout>default</layout>
               <url>http://repo1.maven.org/maven2</url>
               <snapshots>
                 <enabled>false</enabled>
               </snapshots>
             </repository>
           </repositories>
           <licenses>
             <license>
               <name>Apache License, Version 2.0</name>
               <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
               <distribution>repo</distribution>
               <comments>A business-friendly OSS license</comments>
             </license>
           </licenses>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
             'name': 'Maven Default Project',
             'identifier': 'com.mycompany.app',
             'version': '1.2.3',
             'license': 'https://www.apache.org/licenses/LICENSE-2.0.txt',
             'codeRepository':
                 'http://repo1.maven.org/maven2/com/mycompany/app/my-app',
         })
 
     def test_compute_metadata_maven_empty(self):
         raw_content = b"""
         <project>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
         })
 
     def test_compute_metadata_maven_almost_empty(self):
         raw_content = b"""
         <project>
           <foo/>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
         })
 
     def test_compute_metadata_maven_invalid_xml(self):
         expected_warning = (
             'WARNING:swh.indexer.metadata_dictionary.MavenMapping:'
             'Error parsing XML from foo')
 
         raw_content = b"""
         <project>"""
         with self.assertLogs('swh.indexer.metadata_dictionary',
                              level='WARNING') as cm:
             result = MAPPINGS["MavenMapping"]('foo').translate(raw_content)
             self.assertEqual(cm.output, [expected_warning])
         self.assertEqual(result, None)
 
         raw_content = b"""
         """
         with self.assertLogs('swh.indexer.metadata_dictionary',
                              level='WARNING') as cm:
             result = MAPPINGS["MavenMapping"]('foo').translate(raw_content)
             self.assertEqual(cm.output, [expected_warning])
         self.assertEqual(result, None)
 
     def test_compute_metadata_maven_unknown_encoding(self):
         expected_warning = (
             'WARNING:swh.indexer.metadata_dictionary.MavenMapping:'
             'Error detecting XML encoding from foo')
 
         raw_content = b"""<?xml version="1.0" encoding="foo"?>
         <project>
         </project>"""
         with self.assertLogs('swh.indexer.metadata_dictionary',
                              level='WARNING') as cm:
             result = MAPPINGS["MavenMapping"]('foo').translate(raw_content)
             self.assertEqual(cm.output, [expected_warning])
         self.assertEqual(result, None)
 
         raw_content = b"""<?xml version="1.0" encoding="UTF-7"?>
         <project>
         </project>"""
         with self.assertLogs('swh.indexer.metadata_dictionary',
                              level='WARNING') as cm:
             result = MAPPINGS["MavenMapping"]('foo').translate(raw_content)
             self.assertEqual(cm.output, [expected_warning])
         self.assertEqual(result, None)
 
     def test_compute_metadata_maven_invalid_encoding(self):
         expected_warning = (
             'WARNING:swh.indexer.metadata_dictionary.MavenMapping:'
             'Error unidecoding XML from foo')
 
         raw_content = b"""<?xml version="1.0" encoding="UTF-8"?>
         <foo\xe5ct>
         </foo>"""
         with self.assertLogs('swh.indexer.metadata_dictionary',
                              level='WARNING') as cm:
             result = MAPPINGS["MavenMapping"]('foo').translate(raw_content)
             self.assertEqual(cm.output, [expected_warning])
         self.assertEqual(result, None)
 
     def test_compute_metadata_maven_minimal(self):
         raw_content = b"""
         <project>
           <name>Maven Default Project</name>
           <modelVersion>4.0.0</modelVersion>
           <groupId>com.mycompany.app</groupId>
           <artifactId>my-app</artifactId>
           <version>1.2.3</version>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
             'name': 'Maven Default Project',
             'identifier': 'com.mycompany.app',
             'version': '1.2.3',
             'codeRepository':
             'https://repo.maven.apache.org/maven2/com/mycompany/app/my-app',
         })
 
     def test_compute_metadata_maven_empty_nodes(self):
         raw_content = b"""
         <project>
           <name>Maven Default Project</name>
           <modelVersion>4.0.0</modelVersion>
           <groupId>com.mycompany.app</groupId>
           <artifactId>my-app</artifactId>
           <version>1.2.3</version>
           <repositories>
           </repositories>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
             'name': 'Maven Default Project',
             'identifier': 'com.mycompany.app',
             'version': '1.2.3',
             'codeRepository':
             'https://repo.maven.apache.org/maven2/com/mycompany/app/my-app',
         })
 
         raw_content = b"""
         <project>
           <name>Maven Default Project</name>
           <modelVersion>4.0.0</modelVersion>
           <groupId>com.mycompany.app</groupId>
           <artifactId>my-app</artifactId>
           <version></version>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
             'name': 'Maven Default Project',
             'identifier': 'com.mycompany.app',
             'codeRepository':
             'https://repo.maven.apache.org/maven2/com/mycompany/app/my-app',
         })
 
         raw_content = b"""
         <project>
           <name></name>
           <modelVersion>4.0.0</modelVersion>
           <groupId>com.mycompany.app</groupId>
           <artifactId>my-app</artifactId>
           <version>1.2.3</version>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
             'identifier': 'com.mycompany.app',
             'version': '1.2.3',
             'codeRepository':
             'https://repo.maven.apache.org/maven2/com/mycompany/app/my-app',
         })
 
         raw_content = b"""
         <project>
           <name>Maven Default Project</name>
           <modelVersion>4.0.0</modelVersion>
           <groupId>com.mycompany.app</groupId>
           <artifactId>my-app</artifactId>
           <version>1.2.3</version>
           <licenses>
           </licenses>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
             'name': 'Maven Default Project',
             'identifier': 'com.mycompany.app',
             'version': '1.2.3',
             'codeRepository':
             'https://repo.maven.apache.org/maven2/com/mycompany/app/my-app',
         })
 
         raw_content = b"""
         <project>
           <groupId></groupId>
           <version>1.2.3</version>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
             'version': '1.2.3',
         })
 
     def test_compute_metadata_maven_invalid_licenses(self):
         raw_content = b"""
         <project>
           <name>Maven Default Project</name>
           <modelVersion>4.0.0</modelVersion>
           <groupId>com.mycompany.app</groupId>
           <artifactId>my-app</artifactId>
           <version>1.2.3</version>
           <licenses>
             foo
           </licenses>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
             'name': 'Maven Default Project',
             'identifier': 'com.mycompany.app',
             'version': '1.2.3',
             'codeRepository':
             'https://repo.maven.apache.org/maven2/com/mycompany/app/my-app',
         })
 
     def test_compute_metadata_maven_multiple(self):
         '''Tests when there are multiple code repos and licenses.'''
         raw_content = b"""
         <project>
           <name>Maven Default Project</name>
           <modelVersion>4.0.0</modelVersion>
           <groupId>com.mycompany.app</groupId>
           <artifactId>my-app</artifactId>
           <version>1.2.3</version>
           <repositories>
             <repository>
               <id>central</id>
               <name>Maven Repository Switchboard</name>
               <layout>default</layout>
               <url>http://repo1.maven.org/maven2</url>
               <snapshots>
                 <enabled>false</enabled>
               </snapshots>
             </repository>
             <repository>
               <id>example</id>
               <name>Example Maven Repo</name>
               <layout>default</layout>
               <url>http://example.org/maven2</url>
             </repository>
           </repositories>
           <licenses>
             <license>
               <name>Apache License, Version 2.0</name>
               <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
               <distribution>repo</distribution>
               <comments>A business-friendly OSS license</comments>
             </license>
             <license>
               <name>MIT license</name>
               <url>https://opensource.org/licenses/MIT</url>
             </license>
           </licenses>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
             'name': 'Maven Default Project',
             'identifier': 'com.mycompany.app',
             'version': '1.2.3',
             'license': [
                 'https://www.apache.org/licenses/LICENSE-2.0.txt',
                 'https://opensource.org/licenses/MIT',
             ],
             'codeRepository': [
                 'http://repo1.maven.org/maven2/com/mycompany/app/my-app',
                 'http://example.org/maven2/com/mycompany/app/my-app',
             ]
         })
 
     def test_compute_metadata_pkginfo(self):
         raw_content = (b"""\
 Metadata-Version: 2.1
 Name: swh.core
 Version: 0.0.49
 Summary: Software Heritage core utilities
 Home-page: https://forge.softwareheritage.org/diffusion/DCORE/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-core
 Description: swh-core
         ========
        \x20
         core library for swh's modules:
         - config parser
         - hash computations
         - serialization
         - logging mechanism
        \x20
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Description-Content-Type: text/markdown
 Provides-Extra: testing
 """) # noqa
         result = self.pkginfo_mapping.translate(raw_content)
         self.assertCountEqual(result['description'], [
             'Software Heritage core utilities',  # note the comma here
             'swh-core\n'
             '========\n'
             '\n'
             "core library for swh's modules:\n"
             '- config parser\n'
             '- hash computations\n'
             '- serialization\n'
             '- logging mechanism\n'
             ''],
             result)
         del result['description']
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
             'url': 'https://forge.softwareheritage.org/diffusion/DCORE/',
             'name': 'swh.core',
             'author': [{
                 'type': 'Person',
                 'name': 'Software Heritage developers',
                 'email': 'swh-devel@inria.fr',
             }],
             'version': '0.0.49',
         })
 
     def test_compute_metadata_pkginfo_utf8(self):
         raw_content = (b'''\
 Metadata-Version: 1.1
 Name: snowpyt
 Description-Content-Type: UNKNOWN
 Description: foo
         Hydrology N\xc2\xb083
 ''') # noqa
         result = self.pkginfo_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
             'name': 'snowpyt',
             'description': 'foo\nHydrology N°83',
         })
 
     def test_compute_metadata_pkginfo_license(self):
         raw_content = (b"""\
 Metadata-Version: 2.1
 Name: foo
 License: MIT
 """) # noqa
         result = self.pkginfo_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
             'name': 'foo',
             'license': 'MIT',
         })
 
     def test_gemspec_base(self):
         raw_content = b"""
 Gem::Specification.new do |s|
   s.name        = 'example'
   s.version     = '0.1.0'
   s.licenses    = ['MIT']
   s.summary     = "This is an example!"
   s.description = "Much longer explanation of the example!"
   s.authors     = ["Ruby Coder"]
   s.email       = 'rubycoder@example.com'
   s.files       = ["lib/example.rb"]
   s.homepage    = 'https://rubygems.org/gems/example'
   s.metadata    = { "source_code_uri" => "https://github.com/example/example" }
 end"""
         result = self.gemspec_mapping.translate(raw_content)
         self.assertCountEqual(result.pop('description'), [
             "This is an example!",
             "Much longer explanation of the example!"
         ])
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
             'author': ['Ruby Coder'],
             'name': 'example',
             'license': 'https://spdx.org/licenses/MIT',
             'codeRepository': 'https://rubygems.org/gems/example',
             'email': 'rubycoder@example.com',
             'version': '0.1.0',
         })
 
     def test_gemspec_two_author_fields(self):
         raw_content = b"""
 Gem::Specification.new do |s|
   s.authors     = ["Ruby Coder1"]
   s.author      = "Ruby Coder2"
 end"""
         result = self.gemspec_mapping.translate(raw_content)
         self.assertCountEqual(result.pop('author'), [
             'Ruby Coder1', 'Ruby Coder2'])
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
         })
 
     def test_gemspec_invalid_author(self):
         raw_content = b"""
 Gem::Specification.new do |s|
   s.author      = ["Ruby Coder"]
 end"""
         result = self.gemspec_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
         })
         raw_content = b"""
 Gem::Specification.new do |s|
   s.author      = "Ruby Coder1",
 end"""
         result = self.gemspec_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
         })
         raw_content = b"""
 Gem::Specification.new do |s|
   s.authors     = ["Ruby Coder1", ["Ruby Coder2"]]
 end"""
         result = self.gemspec_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
             'author': ['Ruby Coder1'],
         })
 
     def test_gemspec_alternative_header(self):
         raw_content = b"""
 require './lib/version'
 
 Gem::Specification.new { |s|
   s.name = 'rb-system-with-aliases'
   s.summary = 'execute system commands with aliases'
 }
 """
         result = self.gemspec_mapping.translate(raw_content)
         self.assertEqual(result, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'type': 'SoftwareSourceCode',
             'name': 'rb-system-with-aliases',
             'description': 'execute system commands with aliases',
         })
 
     @settings(suppress_health_check=[HealthCheck.too_slow])
     @given(json_document_strategy(
         keys=list(MAPPINGS['NpmMapping'].mapping)))
     def test_npm_adversarial(self, doc):
         raw = json.dumps(doc).encode()
         self.npm_mapping.translate(raw)
 
     @settings(suppress_health_check=[HealthCheck.too_slow])
-    @given(json_document_strategy(keys=CODEMETA_KEYS))
+    @given(json_document_strategy(keys=CODEMETA_TERMS))
     def test_codemeta_adversarial(self, doc):
         raw = json.dumps(doc).encode()
         self.codemeta_mapping.translate(raw)
 
     @settings(suppress_health_check=[HealthCheck.too_slow])
     @given(json_document_strategy(
         keys=list(MAPPINGS['MavenMapping'].mapping)))
     def test_maven_adversarial(self, doc):
         raw = xmltodict.unparse({'project': doc}, pretty=True)
         self.maven_mapping.translate(raw)
 
     @settings(suppress_health_check=[HealthCheck.too_slow])
     @given(strategies.dictionaries(
         # keys
         strategies.one_of(
             strategies.text(),
             *map(strategies.just, MAPPINGS['GemspecMapping'].mapping)
         ),
         # values
         strategies.recursive(
             strategies.characters(),
             lambda children: strategies.lists(children, 1)
         )
     ))
     def test_gemspec_adversarial(self, doc):
         parts = [b'Gem::Specification.new do |s|\n']
         for (k, v) in doc.items():
             parts.append('  s.{} = {}\n'.format(k, repr(v)).encode())
         parts.append(b'end\n')
         self.gemspec_mapping.translate(b''.join(parts))
 
     def test_revision_metadata_indexer(self):
         metadata_indexer = RevisionMetadataIndexer(
             config=REVISION_METADATA_CONFIG)
         fill_obj_storage(metadata_indexer.objstorage)
         fill_storage(metadata_indexer.storage)
 
         tool = metadata_indexer.idx_storage.indexer_configuration_get(
             {'tool_'+k: v for (k, v) in TRANSLATOR_TOOL.items()})
         assert tool is not None
 
         metadata_indexer.idx_storage.content_metadata_add([{
             'indexer_configuration_id': tool['id'],
             'id': b'cde',
             'translated_metadata': YARN_PARSER_METADATA,
         }])
 
         sha1_gits = [
             hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
         ]
         metadata_indexer.run(sha1_gits, 'update-dups')
 
         results = list(metadata_indexer.idx_storage.revision_metadata_get(
             sha1_gits))
 
         expected_results = [{
             'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
             'tool': TRANSLATOR_TOOL,
             'translated_metadata': YARN_PARSER_METADATA,
             'mappings': ['npm'],
         }]
 
         for result in results:
             del result['tool']['id']
 
         # then
         self.assertEqual(expected_results, results)