diff --git a/requirements.txt b/requirements.txt
index dbade70..87ecc1f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 vcversioner
 pygments
 click
 chardet
 file_magic
+pyld
diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py
new file mode 100644
index 0000000..267b7bf
--- /dev/null
+++ b/swh/indexer/codemeta.py
@@ -0,0 +1,95 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import csv
+import json
+import os.path
+
+import swh.indexer
+from pyld import jsonld
+
+_DATA_DIR = os.path.join(os.path.dirname(swh.indexer.__file__), 'data')
+
+CROSSWALK_TABLE_PATH = os.path.join(_DATA_DIR, 'codemeta', 'crosswalk.csv')
+
+CODEMETA_CONTEXT_PATH = os.path.join(_DATA_DIR, 'codemeta', 'codemeta.jsonld')
+
+
+with open(CODEMETA_CONTEXT_PATH) as fd:
+    CODEMETA_CONTEXT = json.load(fd)
+
+CODEMETA_CONTEXT_URL = 'https://doi.org/10.5063/schema/codemeta-2.0'
+CODEMETA_URI = 'https://codemeta.github.io/terms/'
+
+
+# CodeMeta properties that we cannot properly represent.
+PROPERTY_BLACKLIST = {
+    'https://codemeta.github.io/terms/softwareRequirements',
+    'https://codemeta.github.io/terms/softwareSuggestions',
+    }
+
+
+def _read_crosstable(fd):
+    reader = csv.reader(fd)
+    try:
+        header = next(reader)
+    except StopIteration:
+        raise ValueError('empty file')
+
+    data_sources = set(header) - {'Parent Type', 'Property',
+                                  'Type', 'Description'}
+    assert 'codemeta-V1' in data_sources
+
+    codemeta_translation = {data_source: {} for data_source in data_sources}
+
+    for line in reader:  # For each canonical name
+        canonical_name = CODEMETA_URI + dict(zip(header, line))['Property']
+        if canonical_name in PROPERTY_BLACKLIST:
+            continue
+        for (col, value) in zip(header, line):  # For each cell in the row
+            if col in data_sources:
+                # If that's not the parentType/property/type/description
+                for local_name in value.split('/'):
+                    # For each of the data source's properties that maps
+                    # to this canonical name
+                    if local_name.strip():
+                        codemeta_translation[col][local_name.strip()] = \
+                                canonical_name
+
+    return codemeta_translation
+
+
+with open(CROSSWALK_TABLE_PATH) as fd:
+    CROSSWALK_TABLE = _read_crosstable(fd)
+
+
+def _document_loader(url):
+    """Document loader for pyld.
+
+    Reads the local codemeta.jsonld file instead of fetching it
+    from the Internet every single time."""
+    if url == CODEMETA_CONTEXT_URL:
+        return {
+                'contextUrl': None,
+                'documentUrl': url,
+                'document': CODEMETA_CONTEXT,
+                }
+    elif url == CODEMETA_URI:
+        raise Exception('{} is CodeMeta\'s URI, use {} as context url'.format(
+            CODEMETA_URI, CODEMETA_CONTEXT_URL))
+    else:
+        raise Exception(url)
+
+
+def compact(doc):
+    """Same as `pyld.jsonld.compact`, but in the context of CodeMeta."""
+    return jsonld.compact(doc, CODEMETA_CONTEXT_URL,
+                          options={'documentLoader': _document_loader})
+
+
+def expand(doc):
+    """Same as `pyld.jsonld.expand`, but in the context of CodeMeta."""
+    return jsonld.expand(doc,
+                         options={'documentLoader': _document_loader})
diff --git a/swh/indexer/data/codemeta/CITATION b/swh/indexer/data/codemeta/CITATION
new file mode 100644
index 0000000..9f1a546
--- /dev/null
+++ b/swh/indexer/data/codemeta/CITATION
@@ -0,0 +1,2 @@
+Matthew B. Jones, Carl Boettiger, Abby Cabunoc Mayes, Arfon Smith, Peter Slaughter, Kyle Niemeyer, Yolanda Gil, Martin Fenner, Krzysztof Nowak, Mark Hahnel, Luke Coy, Alice Allen, Mercè Crosas, Ashley Sands, Neil Chue Hong, Patricia Cruse, Daniel S. Katz, Carole Goble. 2017. CodeMeta: an exchange schema for software metadata. Version 2.0. KNB Data Repository. doi:10.5063/schema/codemeta-2.0
+swh:1:dir:39c509fd2002f9e531fb4b3a321ceb5e6994e54a;origin=https://github.com/codemeta/codemeta
diff --git a/swh/indexer/data/codemeta/codemeta.jsonld b/swh/indexer/data/codemeta/codemeta.jsonld
new file mode 100644
index 0000000..ecba88b
--- /dev/null
+++ b/swh/indexer/data/codemeta/codemeta.jsonld
@@ -0,0 +1,80 @@
+{
+  "@context": {
+      "type": "@type",
+      "id": "@id",
+      "schema":"http://schema.org/",
+      "codemeta": "https://codemeta.github.io/terms/",
+      "Organization": {"@id": "schema:Organization"},
+      "Person": {"@id": "schema:Person"},
+      "SoftwareSourceCode": {"@id": "schema:SoftwareSourceCode"},
+      "SoftwareApplication": {"@id": "schema:SoftwareApplication"},
+      "Text": {"@id": "schema:Text"},
+      "URL": {"@id": "schema:URL"},
+      "address": { "@id": "schema:address"},
+      "affiliation": { "@id": "schema:affiliation"},
+      "applicationCategory": { "@id": "schema:applicationCategory", "@type": "@id"},
+      "applicationSubCategory": { "@id": "schema:applicationSubCategory", "@type": "@id"},
+      "citation": { "@id": "schema:citation"},
+      "codeRepository": { "@id": "schema:codeRepository", "@type": "@id"},
+      "contributor": { "@id": "schema:contributor"},
+      "copyrightHolder": { "@id": "schema:copyrightHolder"},
+      "copyrightYear": { "@id": "schema:copyrightYear"},
+      "creator": { "@id": "schema:creator"},
+      "dateCreated": {"@id": "schema:dateCreated", "@type": "schema:Date" },
+      "dateModified":  {"@id": "schema:dateModified", "@type": "schema:Date" },
+      "datePublished":  {"@id": "schema:datePublished", "@type": "schema:Date" },
+      "description": { "@id": "schema:description"},
+      "downloadUrl": { "@id": "schema:downloadUrl", "@type": "@id"},
+      "email": { "@id": "schema:email"},
+      "editor": { "@id": "schema:editor"},
+      "encoding": { "@id": "schema:encoding"},
+      "familyName": { "@id": "schema:familyName"},
+      "fileFormat": { "@id": "schema:fileFormat", "@type": "@id"},
+      "fileSize": { "@id": "schema:fileSize"},
+      "funder": { "@id": "schema:funder"},
+      "givenName": { "@id": "schema:givenName"},
+      "hasPart": { "@id": "schema:hasPart" },
+      "identifier": { "@id": "schema:identifier", "@type": "@id"},
+      "installUrl": { "@id": "schema:installUrl", "@type": "@id"},
+      "isAccessibleForFree": { "@id": "schema:isAccessibleForFree"},
+      "isPartOf":  { "@id": "schema:isPartOf"},
+      "keywords": { "@id": "schema:keywords"},
+      "license": { "@id": "schema:license", "@type": "@id"},
+      "memoryRequirements": { "@id": "schema:memoryRequirements", "@type": "@id"},
+      "name": { "@id": "schema:name"},
+      "operatingSystem": { "@id": "schema:operatingSystem"},
+      "permissions": { "@id": "schema:permissions"},
+      "position": { "@id": "schema:position"},
+      "processorRequirements": { "@id": "schema:processorRequirements"},
+      "producer": { "@id": "schema:producer"},
+      "programmingLanguage": { "@id": "schema:programmingLanguage"},
+      "provider": { "@id": "schema:provider"},
+      "publisher": { "@id": "schema:publisher"},
+      "relatedLink": { "@id": "schema:relatedLink", "@type": "@id"},
+      "releaseNotes": { "@id": "schema:releaseNotes", "@type": "@id"},
+      "runtimePlatform": { "@id": "schema:runtimePlatform"},
+      "sameAs": { "@id": "schema:sameAs", "@type": "@id"},
+      "softwareHelp": { "@id": "schema:softwareHelp"},
+      "softwareRequirements": { "@id": "schema:softwareRequirements", "@type": "@id"},
+      "softwareVersion": { "@id": "schema:softwareVersion"},
+      "sponsor": { "@id": "schema:sponsor"},
+      "storageRequirements": { "@id": "schema:storageRequirements", "@type": "@id"},
+      "supportingData": { "@id": "schema:supportingData"},
+      "targetProduct": { "@id": "schema:targetProduct"},
+      "url": { "@id": "schema:url", "@type": "@id"},
+      "version": { "@id": "schema:version"},
+        
+      "author": { "@id": "schema:author", "@container": "@list" },
+      
+      "softwareSuggestions": { "@id": "codemeta:softwareSuggestions", "@type": "@id"},
+      "contIntegration": { "@id": "codemeta:contIntegration", "@type": "@id"},
+      "buildInstructions": { "@id": "codemeta:buildInstructions", "@type": "@id"},
+      "developmentStatus": { "@id": "codemeta:developmentStatus", "@type": "@id"},
+      "embargoDate": { "@id":"codemeta:embargoDate", "@type": "schema:Date" },
+      "funding": { "@id": "codemeta:funding" },
+      "readme": { "@id":"codemeta:readme", "@type": "@id" },
+      "issueTracker": { "@id":"codemeta:issueTracker", "@type": "@id" },
+      "referencePublication": { "@id": "codemeta:referencePublication", "@type": "@id"},
+      "maintainer": { "@id": "codemeta:maintainer" }
+  }
+}
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
index 933716b..b625b39 100644
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -1,334 +1,334 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 import click
 import logging
 
 from swh.indexer.indexer import ContentIndexer, RevisionIndexer, OriginIndexer
 from swh.indexer.metadata_dictionary import MAPPINGS
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata_detector import extract_minimal_metadata_dict
 from swh.indexer.storage import INDEXER_CFG_KEY
 
 from swh.model import hashutil
 
 
 class ContentMetadataIndexer(ContentIndexer):
     """Content-level indexer
 
     This indexer is in charge of:
 
     - filtering out content already indexed in content_metadata
     - reading content from objstorage with the content's id sha1
     - computing translated_metadata by given context
     - using the metadata_dictionary as the 'swh-metadata-translator' tool
     - store result in content_metadata table
 
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
     def __init__(self, tool, config):
         # twisted way to use the exact same config of RevisionMetadataIndexer
         # object that uses internally ContentMetadataIndexer
         self.config = config
         self.config['tools'] = tool
         super().__init__()
 
     def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
         """
         yield from self.idx_storage.content_metadata_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1 in ids
         ))
 
     def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
             id (bytes): content's identifier
             data (bytes): raw content in bytes
 
         Returns:
             dict: dictionary representing a content_metadata. If the
             translation wasn't successful the translated_metadata keys will
             be returned as None
 
         """
         result = {
             'id': id,
             'indexer_configuration_id': self.tool['id'],
             'translated_metadata': None
         }
         try:
             mapping_name = self.tool['tool_configuration']['context']
             result['translated_metadata'] = MAPPINGS[mapping_name] \
                 .translate(data)
             # a twisted way to keep result with indexer object for get_results
             self.results.append(result)
         except Exception:
             self.log.exception(
                 "Problem during tool retrieval of metadata translation")
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_metadata, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - translated_metadata (jsonb): detected metadata
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.idx_storage.content_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def get_results(self):
         """can be called only if run method was called before
 
         Returns:
             list: list of content_metadata entries calculated by
                   current indexer
 
         """
         return self.results
 
 
 class RevisionMetadataIndexer(RevisionIndexer):
     """Revision-level indexer
 
     This indexer is in charge of:
 
     - filtering revisions already indexed in revision_metadata table with
       defined computation tool
     - retrieve all entry_files in root directory
     - use metadata_detector for file_names containing metadata
     - compute metadata translation if necessary and possible (depends on tool)
     - send sha1s to content indexing if possible
     - store the results for revision
 
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
     ADDITIONAL_CONFIG = {
         'tools': ('dict', {
             'name': 'swh-metadata-detector',
             'version': '0.0.2',
             'configuration': {
                 'type': 'local',
                 'context': ['NpmMapping', 'CodemetaMapping']
             },
         }),
     }
 
     ContentMetadataIndexer = ContentMetadataIndexer
 
     def prepare(self):
         super().prepare()
         self.tool = self.tools[0]
 
     def filter(self, sha1_gits):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.idx_storage.revision_metadata_missing((
             {
                 'id': sha1_git,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1_git in sha1_gits
         ))
 
     def index(self, rev):
         """Index rev by processing it and organizing result.
 
         use metadata_detector to iterate on filenames
 
         - if one filename detected -> sends file to content indexer
         - if multiple file detected -> translation needed at revision level
 
         Args:
           rev (bytes): revision artifact from storage
 
         Returns:
             dict: dictionary representing a revision_metadata, with keys:
 
                 - id (str): rev's identifier (sha1_git)
                 - indexer_configuration_id (bytes): tool used
-                - translated_metadata (bytes): dict of retrieved metadata
+                - translated_metadata: dict of retrieved metadata
 
         """
         try:
             result = {
                 'id': rev['id'].decode(),
                 'indexer_configuration_id': self.tool['id'],
                 'translated_metadata': None
             }
 
             root_dir = rev['directory']
             dir_ls = self.storage.directory_ls(root_dir, recursive=False)
             files = [entry for entry in dir_ls if entry['type'] == 'file']
             detected_files = detect_metadata(files)
             result['translated_metadata'] = self.translate_revision_metadata(
-                                                                detected_files)
+                    detected_files)
         except Exception as e:
             self.log.exception(
                 'Problem when indexing rev: %r', e)
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         # TODO: add functions in storage to keep data in revision_metadata
         self.idx_storage.revision_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def translate_revision_metadata(self, detected_files):
         """
         Determine plan of action to translate metadata when containing
         one or multiple detected files:
 
         Args:
             detected_files (dict): dictionary mapping context names (e.g.,
               "npm", "authors") to list of sha1
 
         Returns:
             dict: dict with translated metadata according to the CodeMeta
             vocabulary
 
         """
         translated_metadata = []
         tool = {
                 'name': 'swh-metadata-translator',
                 'version': '0.0.2',
                 'configuration': {
                     'type': 'local',
                     'context': None
                 },
             }
         # TODO: iterate on each context, on each file
         # -> get raw_contents
         # -> translate each content
         config = {
             INDEXER_CFG_KEY: self.idx_storage,
             'objstorage': self.objstorage
         }
         for context in detected_files.keys():
             tool['configuration']['context'] = context
             c_metadata_indexer = self.ContentMetadataIndexer(tool, config)
             # sha1s that are in content_metadata table
             sha1s_in_storage = []
             metadata_generator = self.idx_storage.content_metadata_get(
                 detected_files[context])
             for c in metadata_generator:
                 # extracting translated_metadata
                 sha1 = c['id']
                 sha1s_in_storage.append(sha1)
                 local_metadata = c['translated_metadata']
                 # local metadata is aggregated
                 if local_metadata:
                     translated_metadata.append(local_metadata)
 
             sha1s_filtered = [item for item in detected_files[context]
                               if item not in sha1s_in_storage]
 
             if sha1s_filtered:
                 # schedule indexation of content
                 try:
                     c_metadata_indexer.run(sha1s_filtered,
                                            policy_update='ignore-dups')
                     # on the fly possibility:
                     results = c_metadata_indexer.get_results()
 
                     for result in results:
                         local_metadata = result['translated_metadata']
                         translated_metadata.append(local_metadata)
 
                 except Exception as e:
                     self.log.warning("""Exception while indexing content""", e)
 
         # transform translated_metadata into min set with swh-metadata-detector
         min_metadata = extract_minimal_metadata_dict(translated_metadata)
         return min_metadata
 
 
 class OriginMetadataIndexer(OriginIndexer):
     def filter(self, ids):
         return ids
 
     def run(self, revisions_metadata, policy_update, *, origin_head):
         """Expected to be called with the result of RevisionMetadataIndexer
         as first argument; ie. not a list of ids as other indexers would.
 
         Args:
 
             * `revisions_metadata` (List[dict]): contains metadata from
               revisions, along with the respective revision ids. It is
               passed by RevisionMetadataIndexer via a Celery chain
               triggered by OriginIndexer.next_step.
             * `policy_update`: `'ignore-dups'` or `'update-dups'`
             * `origin_head` (dict): {str(origin_id): rev_id.encode()}
               keys `origin_id` and `revision_id`, which is the result
               of OriginHeadIndexer.
         """
         origin_head_map = {int(origin_id): rev_id
                            for (origin_id, rev_id) in origin_head.items()}
 
         # Fix up the argument order. revisions_metadata has to be the
         # first argument because of celery.chain; the next line calls
         # run() with the usual order, ie. origin ids first.
         return super().run(ids=list(origin_head_map),
                            policy_update=policy_update,
                            revisions_metadata=revisions_metadata,
                            origin_head_map=origin_head_map)
 
     def index(self, origin, *, revisions_metadata, origin_head_map):
         # Get the last revision of the origin.
         revision_id = origin_head_map[origin['id']]
 
         # Get the metadata of that revision, and return it
         for revision_metadata in revisions_metadata:
             if revision_metadata['id'] == revision_id:
                 return {
                         'origin_id': origin['id'],
                         'metadata': revision_metadata['translated_metadata'],
                         'from_revision': revision_id,
                         'indexer_configuration_id':
                         revision_metadata['indexer_configuration_id'],
                         }
 
         raise KeyError('%r not in %r' %
                        (revision_id, [r['id'] for r in revisions_metadata]))
 
     def persist_index_computations(self, results, policy_update):
         self.idx_storage.origin_intrinsic_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
 
 @click.command()
 @click.option('--revs', '-i',
               help='Default sha1_git to lookup', multiple=True)
 def main(revs):
     _git_sha1s = list(map(hashutil.hash_to_bytes, revs))
     rev_metadata_indexer = RevisionMetadataIndexer()
     rev_metadata_indexer.run(_git_sha1s, 'update-dups')
 
 
 if __name__ == '__main__':
     logging.basicConfig(level=logging.INFO)
     main()
diff --git a/swh/indexer/metadata_detector.py b/swh/indexer/metadata_detector.py
index d26a7ef..00bef4a 100644
--- a/swh/indexer/metadata_detector.py
+++ b/swh/indexer/metadata_detector.py
@@ -1,65 +1,58 @@
 # Copyright (C) 2017 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-
+from swh.indexer.codemeta import compact, expand, CODEMETA_URI
 from swh.indexer.metadata_dictionary import MAPPINGS
 
 
 def detect_metadata(files):
     """
     Detects files potentially containing metadata
     Args:
         - file_entries (list): list of files
 
     Returns:
         - empty list if nothing was found
         - dictionary {mapping_filenames[name]:f['sha1']}
     """
     results = {}
     for (mapping_name, mapping) in MAPPINGS.items():
         matches = mapping.detect_metadata_files(files)
         if matches:
             results[mapping_name] = matches
     return results
 
 
+_MINIMAL_PROPERTY_SET = {
+    "developmentStatus", "version", "operatingSystem", "description",
+    "keywords", "issueTracker", "name", "author", "relatedLink",
+    "url", "license", "maintainer", "email", "identifier",
+    "codeRepository"}
+
+MINIMAL_METADATA_SET = {CODEMETA_URI+prop for prop in _MINIMAL_PROPERTY_SET}
+
+
 def extract_minimal_metadata_dict(metadata_list):
     """
     Every item in the metadata_list is a dict of translated_metadata in the
     CodeMeta vocabulary
     we wish to extract a minimal set of terms and keep all values corresponding
     to this term without duplication
     Args:
         - metadata_list (list): list of dicts of translated_metadata
 
     Returns:
         - minimal_dict (dict): one dict with selected values of metadata
     """
-    minimal_dict = {
-        "developmentStatus": [],
-        "version": [],
-        "operatingSystem": [],
-        "description": [],
-        "keywords": [],
-        "issueTracker": [],
-        "name": [],
-        "author": [],
-        "relatedLink": [],
-        "url": [],
-        "license": [],
-        "maintainer": [],
-        "email": [],
-        "softwareRequirements": [],
-        "identifier": [],
-        "codeRepository": []
-    }
-    for term in minimal_dict.keys():
-        for metadata_item in metadata_list:
-            if term in metadata_item:
-                if not metadata_item[term] in minimal_dict[term]:
-                    minimal_dict[term].append(metadata_item[term])
-        if not minimal_dict[term]:
-            minimal_dict[term] = None
-    return minimal_dict
+    minimal_dict = {}
+    for document in metadata_list:
+        for metadata_item in expand(document):
+            for (term, value) in metadata_item.items():
+                if term in MINIMAL_METADATA_SET:
+                    if term not in minimal_dict:
+                        minimal_dict[term] = [value]
+                    elif value not in minimal_dict[term]:
+                        minimal_dict[term].append(value)
+    return compact(minimal_dict)
diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
index 4266001..bf704c8 100644
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -1,214 +1,177 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import abc
-import csv
 import json
-import os.path
 import logging
 
-import swh.indexer
-
-CROSSWALK_TABLE_PATH = os.path.join(os.path.dirname(swh.indexer.__file__),
-                                    'data', 'codemeta', 'crosswalk.csv')
-
-
-def read_crosstable(fd):
-    reader = csv.reader(fd)
-    try:
-        header = next(reader)
-    except StopIteration:
-        raise ValueError('empty file')
-
-    data_sources = set(header) - {'Parent Type', 'Property',
-                                  'Type', 'Description'}
-    assert 'codemeta-V1' in data_sources
-
-    codemeta_translation = {data_source: {} for data_source in data_sources}
-
-    for line in reader:  # For each canonical name
-        canonical_name = dict(zip(header, line))['Property']
-        for (col, value) in zip(header, line):  # For each cell in the row
-            if col in data_sources:
-                # If that's not the parentType/property/type/description
-                for local_name in value.split('/'):
-                    # For each of the data source's properties that maps
-                    # to this canonical name
-                    if local_name.strip():
-                        codemeta_translation[col][local_name.strip()] = \
-                                canonical_name
-
-    return codemeta_translation
-
-
-with open(CROSSWALK_TABLE_PATH) as fd:
-    CROSSWALK_TABLE = read_crosstable(fd)
+from swh.indexer.codemeta import CROSSWALK_TABLE, compact
 
 
 MAPPINGS = {}
 
 
 def register_mapping(cls):
     MAPPINGS[cls.__name__] = cls()
     return cls
 
 
 class BaseMapping(metaclass=abc.ABCMeta):
     """Base class for mappings to inherit from
 
     To implement a new mapping:
 
     - inherit this class
     - override translate function
     """
     def __init__(self):
         self.log = logging.getLogger('%s.%s' % (
             self.__class__.__module__,
             self.__class__.__name__))
 
     @abc.abstractmethod
     def detect_metadata_files(self, files):
         """
         Detects files potentially containing metadata
         Args:
             - file_entries (list): list of files
 
         Returns:
             - empty list if nothing was found
             - list of sha1 otherwise
         """
         pass
 
     @abc.abstractmethod
     def translate(self, file_content):
         pass
 
+    def normalize_translation(self, metadata):
+        return compact(metadata)
+
 
 class DictMapping(BaseMapping):
     """Base class for mappings that take as input a file that is mostly
     a key-value store (eg. a shallow JSON dict)."""
 
     @property
     @abc.abstractmethod
     def mapping(self):
         """A translation dict to map dict keys into a canonical name."""
         pass
 
     def translate_dict(self, content_dict):
         """
         Translates content  by parsing content from a dict object
         and translating with the appropriate mapping
 
         Args:
             content_dict (dict)
 
         Returns:
             dict: translated metadata in json-friendly form needed for
                   the indexer
 
         """
         translated_metadata = {}
-        default = 'other'
-        translated_metadata['other'] = {}
-        try:
-            for k, v in content_dict.items():
-                try:
-                    term = self.mapping.get(k, default)
-                    if term not in translated_metadata:
-                        translated_metadata[term] = v
-                        continue
-                    if isinstance(translated_metadata[term], str):
-                        in_value = translated_metadata[term]
-                        translated_metadata[term] = [in_value, v]
-                        continue
-                    if isinstance(translated_metadata[term], list):
-                        translated_metadata[term].append(v)
-                        continue
-                    if isinstance(translated_metadata[term], dict):
-                        translated_metadata[term][k] = v
-                        continue
-                except KeyError:
-                    self.log.exception(
-                        "Problem during item mapping")
-                    continue
-        except Exception:
-            raise
-            return None
-        return translated_metadata
+        for k, v in content_dict.items():
+            # First, check if there is a specific translation
+            # method for this key
+            translation_method = getattr(self, 'translate_' + k, None)
+            if translation_method:
+                translation_method(translated_metadata, v)
+            elif k in self.mapping:
+                # if there is no method, but the key is known from the
+                # crosswalk table
+
+                # if there is a normalization method, use it on the value
+                normalization_method = getattr(self, 'normalize_' + k, None)
+                if normalization_method:
+                    v = normalization_method(v)
+
+                # set the translation metadata with the normalized value
+                translated_metadata[self.mapping[k]] = v
+        return self.normalize_translation(translated_metadata)
 
 
 class JsonMapping(DictMapping):
     """Base class for all mappings that use a JSON file as input."""
 
     @property
     @abc.abstractmethod
     def filename(self):
         """The .json file to extract metadata from."""
         pass
 
     def detect_metadata_files(self, file_entries):
         for entry in file_entries:
             if entry['name'] == self.filename:
                 return [entry['sha1']]
         return []
 
     def translate(self, raw_content):
         """
         Translates content by parsing content from a bytestring containing
         json data and translating with the appropriate mapping
 
         Args:
             raw_content: bytes
 
         Returns:
             dict: translated metadata in json-friendly form needed for
                   the indexer
 
         """
         try:
             raw_content = raw_content.decode()
         except UnicodeDecodeError:
             self.log.warning('Error unidecoding %r', raw_content)
             return
         try:
             content_dict = json.loads(raw_content)
         except json.JSONDecodeError:
             self.log.warning('Error unjsoning %r' % raw_content)
             return
         return self.translate_dict(content_dict)
 
 
 @register_mapping
 class NpmMapping(JsonMapping):
     """
     dedicated class for NPM (package.json) mapping and translation
     """
     mapping = CROSSWALK_TABLE['NodeJS']
     filename = b'package.json'
 
+    def normalize_repository(self, d):
+        return '{type}+{url}'.format(**d)
+
+    def normalize_bugs(self, d):
+        return '{url}'.format(**d)
+
 
 @register_mapping
 class CodemetaMapping(JsonMapping):
     """
     dedicated class for CodeMeta (codemeta.json) mapping and translation
     """
     mapping = CROSSWALK_TABLE['codemeta-V1']
     filename = b'codemeta.json'
 
 
 def main():
     raw_content = """{"name": "test_name", "unknown_term": "ut"}"""
     raw_content1 = b"""{"name": "test_name",
                         "unknown_term": "ut",
                         "prerequisites" :"packageXYZ"}"""
     result = MAPPINGS["NpmMapping"].translate(raw_content)
     result1 = MAPPINGS["MavenMapping"].translate(raw_content1)
 
     print(result)
     print(result1)
 
 
 if __name__ == "__main__":
     main()
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
index 4dae7a5..e6e5734 100644
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -1,378 +1,319 @@
 # Copyright (C) 2017-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
 import logging
 
 from swh.indexer.metadata_dictionary import CROSSWALK_TABLE, MAPPINGS
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata_detector import extract_minimal_metadata_dict
 from swh.indexer.metadata import ContentMetadataIndexer
 from swh.indexer.metadata import RevisionMetadataIndexer
 from swh.indexer.tests.test_utils import MockObjStorage, MockStorage
 from swh.indexer.tests.test_utils import MockIndexerStorage
 
 
 class ContentMetadataTestIndexer(ContentMetadataIndexer):
     """Specific Metadata whose configuration is enough to satisfy the
        indexing tests.
     """
     def prepare(self):
         self.idx_storage = MockIndexerStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.destination_task = None
         self.tools = self.register_tools(self.config['tools'])
         self.tool = self.tools[0]
         self.results = []
 
 
 class RevisionMetadataTestIndexer(RevisionMetadataIndexer):
     """Specific indexer whose configuration is enough to satisfy the
        indexing tests.
     """
 
     ContentMetadataIndexer = ContentMetadataTestIndexer
 
     def prepare(self):
         self.config = {
             'storage': {
                 'cls': 'remote',
                 'args': {
                     'url': 'http://localhost:9999',
                 }
             },
             'tools': {
                 'name': 'swh-metadata-detector',
                 'version': '0.0.2',
                 'configuration': {
                     'type': 'local',
                     'context': 'NpmMapping'
                 }
             }
         }
         self.storage = MockStorage()
         self.idx_storage = MockIndexerStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.destination_task = None
         self.tools = self.register_tools(self.config['tools'])
         self.tool = self.tools[0]
         self.results = []
 
 
 class Metadata(unittest.TestCase):
     """
     Tests metadata_mock_tool tool for Metadata detection
     """
     def setUp(self):
         """
         shows the entire diff in the results
         """
         self.maxDiff = None
         self.content_tool = {
             'name': 'swh-metadata-translator',
             'version': '0.0.2',
             'configuration': {
                 'type': 'local',
                 'context': 'NpmMapping'
             }
         }
         MockIndexerStorage.added_data = []
 
     def test_crosstable(self):
         self.assertEqual(CROSSWALK_TABLE['NodeJS'], {
-            'repository': 'codeRepository',
-            'os': 'operatingSystem',
-            'cpu': 'processorRequirements',
-            'engines': 'processorRequirements',
-            'dependencies': 'softwareRequirements',
-            'bundleDependencies': 'softwareRequirements',
-            'bundledDependencies': 'softwareRequirements',
-            'peerDependencies': 'softwareRequirements',
-            'author': 'creator',
-            'author.email': 'email',
-            'author.name': 'name',
-            'contributor': 'contributor',
-            'keywords': 'keywords',
-            'license': 'license',
-            'version': 'version',
-            'description': 'description',
-            'name': 'name',
-            'devDependencies': 'softwareSuggestions',
-            'optionalDependencies': 'softwareSuggestions',
-            'bugs': 'issueTracker',
-            'homepage': 'url'
+            'repository': 'https://codemeta.github.io/terms/codeRepository',
+            'os': 'https://codemeta.github.io/terms/operatingSystem',
+            'cpu': 'https://codemeta.github.io/terms/processorRequirements',
+            'engines':
+                'https://codemeta.github.io/terms/processorRequirements',
+            'author': 'https://codemeta.github.io/terms/creator',
+            'author.email': 'https://codemeta.github.io/terms/email',
+            'author.name': 'https://codemeta.github.io/terms/name',
+            'contributor': 'https://codemeta.github.io/terms/contributor',
+            'keywords': 'https://codemeta.github.io/terms/keywords',
+            'license': 'https://codemeta.github.io/terms/license',
+            'version': 'https://codemeta.github.io/terms/version',
+            'description': 'https://codemeta.github.io/terms/description',
+            'name': 'https://codemeta.github.io/terms/name',
+            'bugs': 'https://codemeta.github.io/terms/issueTracker',
+            'homepage': 'https://codemeta.github.io/terms/url'
         })
 
     def test_compute_metadata_none(self):
         """
         testing content empty content is empty
         should return None
         """
         # given
         content = b""
 
         # None if no metadata was found or an error occurred
         declared_metadata = None
         # when
         result = MAPPINGS["NpmMapping"].translate(content)
         # then
         self.assertEqual(declared_metadata, result)
 
     def test_compute_metadata_npm(self):
         """
         testing only computation of metadata with hard_mapping_npm
         """
         # given
         content = b"""
             {
                 "name": "test_metadata",
                 "version": "0.0.2",
                 "description": "Simple package.json test for indexer",
                   "repository": {
                     "type": "git",
                     "url": "https://github.com/moranegg/metadata_test"
                 }
             }
         """
         declared_metadata = {
-            'name': 'test_metadata',
-            'version': '0.0.2',
-            'description': 'Simple package.json test for indexer',
-            'codeRepository': {
-                'type': 'git',
-                'url': 'https://github.com/moranegg/metadata_test'
-              },
-            'other': {}
+            '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+            'codemeta:name': 'test_metadata',
+            'codemeta:version': '0.0.2',
+            'codemeta:description': 'Simple package.json test for indexer',
+            'codemeta:codeRepository':
+                'git+https://github.com/moranegg/metadata_test',
         }
 
         # when
         result = MAPPINGS["NpmMapping"].translate(content)
         # then
         self.assertEqual(declared_metadata, result)
 
     def test_extract_minimal_metadata_dict(self):
         """
         Test the creation of a coherent minimal metadata set
         """
         # given
         metadata_list = [{
-            'name': 'test_1',
-            'version': '0.0.2',
-            'description': 'Simple package.json test for indexer',
-            'codeRepository': {
-                'type': 'git',
-                'url': 'https://github.com/moranegg/metadata_test'
-              },
-            'other': {}
+            '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+            'codemeta:name': 'test_1',
+            'codemeta:version': '0.0.2',
+            'codemeta:description': 'Simple package.json test for indexer',
+            'codemeta:codeRepository':
+                'git+https://github.com/moranegg/metadata_test',
         }, {
-            'name': 'test_0_1',
-            'version': '0.0.2',
-            'description': 'Simple package.json test for indexer',
-            'codeRepository': {
-                'type': 'git',
-                'url': 'https://github.com/moranegg/metadata_test'
-              },
-            'other': {}
+            '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+            'codemeta:name': 'test_0_1',
+            'codemeta:version': '0.0.2',
+            'codemeta:description': 'Simple package.json test for indexer',
+            'codemeta:codeRepository':
+                'git+https://github.com/moranegg/metadata_test'
         }, {
-            'name': 'test_metadata',
-            'version': '0.0.2',
-            'author': 'moranegg',
-            'other': {}
+            '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+            'codemeta:name': 'test_metadata',
+            'codemeta:version': '0.0.2',
+            'codemeta:author': 'moranegg',
         }]
 
         # when
         results = extract_minimal_metadata_dict(metadata_list)
 
         # then
         expected_results = {
-            "developmentStatus": None,
-            "version": ['0.0.2'],
-            "operatingSystem": None,
-            "description": ['Simple package.json test for indexer'],
-            "keywords": None,
-            "issueTracker": None,
-            "name": ['test_1', 'test_0_1', 'test_metadata'],
-            "author": ['moranegg'],
-            "relatedLink": None,
-            "url": None,
-            "license": None,
-            "maintainer": None,
-            "email": None,
-            "softwareRequirements": None,
-            "identifier": None,
-            "codeRepository": [{
-                'type': 'git',
-                'url': 'https://github.com/moranegg/metadata_test'
-              }]
+            '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+            "codemeta:version": '0.0.2',
+            "codemeta:description": 'Simple package.json test for indexer',
+            "codemeta:name": ['test_1', 'test_0_1', 'test_metadata'],
+            "codemeta:author": 'moranegg',
+            "codemeta:codeRepository":
+                'git+https://github.com/moranegg/metadata_test',
         }
         self.assertEqual(expected_results, results)
 
     def test_index_content_metadata_npm(self):
         """
         testing NPM with package.json
         - one sha1 uses a file that can't be translated to metadata and
           should return None in the translated metadata
         """
         # given
         sha1s = ['26a9f72a7c87cc9205725cfd879f514ff4f3d8d5',
                  'd4c647f0fc257591cc9ba1722484229780d1c607',
                  '02fb2c89e14f7fab46701478c83779c7beb7b069']
         # this metadata indexer computes only metadata for package.json
         # in npm context with a hard mapping
         metadata_indexer = ContentMetadataTestIndexer(
             tool=self.content_tool, config={})
 
         # when
         metadata_indexer.run(sha1s, policy_update='ignore-dups')
         results = metadata_indexer.idx_storage.added_data
 
         expected_results = [('content_metadata', False, [{
             'indexer_configuration_id': 30,
             'translated_metadata': {
-                'other': {},
-                'codeRepository': {
-                    'type': 'git',
-                    'url': 'https://github.com/moranegg/metadata_test'
-                },
-                'description': 'Simple package.json test for indexer',
-                'name': 'test_metadata',
-                'version': '0.0.1'
+                '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+                'codemeta:codeRepository':
+                    'git+https://github.com/moranegg/metadata_test',
+                'codemeta:description': 'Simple package.json test for indexer',
+                'codemeta:name': 'test_metadata',
+                'codemeta:version': '0.0.1'
             },
             'id': '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5'
             }, {
             'indexer_configuration_id': 30,
             'translated_metadata': {
-                'softwareRequirements': {
-                        'JSONStream': '~1.3.1',
-                        'abbrev': '~1.1.0',
-                        'ansi-regex': '~2.1.1',
-                        'ansicolors': '~0.3.2',
-                        'ansistyles': '~0.1.3'
-                },
-                'issueTracker': {
-                    'url': 'https://github.com/npm/npm/issues'
-                },
-                'creator':
+                '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+                'codemeta:issueTracker':
+                    'https://github.com/npm/npm/issues',
+                'codemeta:creator':
                     'Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)',
-                'codeRepository': {
-                    'type': 'git',
-                    'url': 'https://github.com/npm/npm'
-                },
-                'description': 'a package manager for JavaScript',
-                'softwareSuggestions': {
-                        'tacks': '~1.2.6',
-                        'tap': '~10.3.2'
-                },
-                'license': 'Artistic-2.0',
-                'version': '5.0.3',
-                'other': {
-                    'preferGlobal': True,
-                    'config': {
-                        'publishtest': False
-                    }
-                },
-                'name': 'npm',
-                'keywords': [
+                'codemeta:codeRepository':
+                    'git+https://github.com/npm/npm',
+                'codemeta:description': 'a package manager for JavaScript',
+                'codemeta:license': 'Artistic-2.0',
+                'codemeta:version': '5.0.3',
+                'codemeta:name': 'npm',
+                'codemeta:keywords': [
                     'install',
                     'modules',
                     'package manager',
                     'package.json'
                 ],
-                'url': 'https://docs.npmjs.com/'
+                'codemeta:url': 'https://docs.npmjs.com/'
             },
             'id': 'd4c647f0fc257591cc9ba1722484229780d1c607'
             }, {
             'indexer_configuration_id': 30,
             'translated_metadata': None,
             'id': '02fb2c89e14f7fab46701478c83779c7beb7b069'
         }])]
 
         # The assertion below returns False sometimes because of nested lists
         self.assertEqual(expected_results, results)
 
     def test_detect_metadata_package_json(self):
         # given
         df = [{
                 'sha1_git': b'abc',
                 'name': b'index.js',
                 'target': b'abc',
                 'length': 897,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'dir_a',
                 'sha1': b'bcd'
             },
             {
                 'sha1_git': b'aab',
                 'name': b'package.json',
                 'target': b'aab',
                 'length': 712,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'dir_a',
                 'sha1': b'cde'
         }]
         # when
         results = detect_metadata(df)
 
         expected_results = {
             'NpmMapping': [
                 b'cde'
             ]
         }
         # then
         self.assertEqual(expected_results, results)
 
     def test_revision_metadata_indexer(self):
         metadata_indexer = RevisionMetadataTestIndexer()
 
         sha1_gits = [
             b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
         ]
         metadata_indexer.run(sha1_gits, 'update-dups')
 
         results = metadata_indexer.idx_storage.added_data
 
         expected_results = [('revision_metadata', True, [{
             'id': '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
             'translated_metadata': {
-                'identifier': None,
-                'maintainer': None,
-                'url': [
-                    'https://github.com/librariesio/yarn-parser#readme'
-                ],
-                'codeRepository': [{
-                    'type': 'git',
-                    'url': 'git+https://github.com/librariesio/yarn-parser.git'
-                }],
-                'author': ['Andrew Nesbitt'],
-                'license': ['AGPL-3.0'],
-                'version': ['1.0.0'],
-                'description': [
-                    'Tiny web service for parsing yarn.lock files'
-                ],
-                'relatedLink': None,
-                'developmentStatus': None,
-                'operatingSystem': None,
-                'issueTracker': [{
-                    'url': 'https://github.com/librariesio/yarn-parser/issues'
-                }],
-                'softwareRequirements': [{
-                    'express': '^4.14.0',
-                    'yarn': '^0.21.0',
-                    'body-parser': '^1.15.2'
-                }],
-                'name': ['yarn-parser'],
-                'keywords': [['yarn', 'parse', 'lock', 'dependencies']],
-                'email': None
+                '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+                'codemeta:url':
+                    'https://github.com/librariesio/yarn-parser#readme',
+                'codemeta:codeRepository':
+                    'git+https://github.com/librariesio/yarn-parser.git',
+                'codemeta:author': 'Andrew Nesbitt',
+                'codemeta:license': 'AGPL-3.0',
+                'codemeta:version': '1.0.0',
+                'codemeta:description':
+                    'Tiny web service for parsing yarn.lock files',
+                'codemeta:issueTracker':
+                    'https://github.com/librariesio/yarn-parser/issues',
+                'codemeta:name': 'yarn-parser',
+                'codemeta:keywords': ['yarn', 'parse', 'lock', 'dependencies'],
             },
             'indexer_configuration_id': 7
         }])]
         # then
         self.assertEqual(expected_results, results)
diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py
index 911d92c..b5401d1 100644
--- a/swh/indexer/tests/test_origin_metadata.py
+++ b/swh/indexer/tests/test_origin_metadata.py
@@ -1,142 +1,127 @@
 # Copyright (C) 2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import time
 import logging
 import unittest
 from celery import task
 
 from swh.indexer.metadata import OriginMetadataIndexer
 from swh.indexer.tests.test_utils import MockObjStorage, MockStorage
 from swh.indexer.tests.test_utils import MockIndexerStorage
 from swh.indexer.tests.test_origin_head import OriginHeadTestIndexer
 from swh.indexer.tests.test_metadata import RevisionMetadataTestIndexer
 
 from swh.scheduler.tests.scheduler_testing import SchedulerTestFixture
 
 
 class OriginMetadataTestIndexer(OriginMetadataIndexer):
     def prepare(self):
         self.config = {
             'storage': {
                 'cls': 'remote',
                 'args': {
                     'url': 'http://localhost:9999',
                 }
             },
             'tools': {
                 'name': 'origin-metadata',
                 'version': '0.0.1',
                 'configuration': {}
             }
         }
         self.storage = MockStorage()
         self.idx_storage = MockIndexerStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.destination_task = None
         self.tools = self.register_tools(self.config['tools'])
         self.tool = self.tools[0]
         self.results = []
 
 
 @task
 def revision_metadata_test_task(*args, **kwargs):
     indexer = RevisionMetadataTestIndexer()
     indexer.run(*args, **kwargs)
     return indexer.results
 
 
 @task
 def origin_intrinsic_metadata_test_task(*args, **kwargs):
     indexer = OriginMetadataTestIndexer()
     indexer.run(*args, **kwargs)
     return indexer.results
 
 
 class OriginHeadTestIndexer(OriginHeadTestIndexer):
     revision_metadata_task = 'revision_metadata_test_task'
     origin_intrinsic_metadata_task = 'origin_intrinsic_metadata_test_task'
 
 
 class TestOriginMetadata(SchedulerTestFixture, unittest.TestCase):
     def setUp(self):
         super().setUp()
         self.maxDiff = None
         MockIndexerStorage.added_data = []
         self.add_scheduler_task_type(
             'revision_metadata_test_task',
             'swh.indexer.tests.test_origin_metadata.'
             'revision_metadata_test_task')
         self.add_scheduler_task_type(
             'origin_intrinsic_metadata_test_task',
             'swh.indexer.tests.test_origin_metadata.'
             'origin_intrinsic_metadata_test_task')
         RevisionMetadataTestIndexer.scheduler = self.scheduler
 
     def tearDown(self):
         del RevisionMetadataTestIndexer.scheduler
         super().tearDown()
 
     def test_pipeline(self):
         indexer = OriginHeadTestIndexer()
         indexer.scheduler = self.scheduler
         indexer.run(
                 ["git+https://github.com/librariesio/yarn-parser"],
                 policy_update='update-dups',
                 parse_ids=True)
 
         self.run_ready_tasks()  # Run the first task
         time.sleep(0.1)  # Give it time to complete and schedule the 2nd one
         self.run_ready_tasks()  # Run the second task
 
         metadata = {
-            'identifier': None,
-            'maintainer': None,
-            'url': [
-                'https://github.com/librariesio/yarn-parser#readme'
-            ],
-            'codeRepository': [{
-                'type': 'git',
-                'url': 'git+https://github.com/librariesio/yarn-parser.git'
-            }],
-            'author': ['Andrew Nesbitt'],
-            'license': ['AGPL-3.0'],
-            'version': ['1.0.0'],
-            'description': [
-                'Tiny web service for parsing yarn.lock files'
-            ],
-            'relatedLink': None,
-            'developmentStatus': None,
-            'operatingSystem': None,
-            'issueTracker': [{
-                'url': 'https://github.com/librariesio/yarn-parser/issues'
-            }],
-            'softwareRequirements': [{
-                'express': '^4.14.0',
-                'yarn': '^0.21.0',
-                'body-parser': '^1.15.2'
-            }],
-            'name': ['yarn-parser'],
-            'keywords': [['yarn', 'parse', 'lock', 'dependencies']],
-            'email': None
+            '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+            'codemeta:url':
+                'https://github.com/librariesio/yarn-parser#readme',
+            'codemeta:codeRepository':
+                'git+https://github.com/librariesio/yarn-parser.git',
+            'codemeta:author': 'Andrew Nesbitt',
+            'codemeta:license': 'AGPL-3.0',
+            'codemeta:version': '1.0.0',
+            'codemeta:description':
+                'Tiny web service for parsing yarn.lock files',
+            'codemeta:issueTracker':
+                'https://github.com/librariesio/yarn-parser/issues',
+            'codemeta:name': 'yarn-parser',
+            'codemeta:keywords': ['yarn', 'parse', 'lock', 'dependencies'],
         }
         rev_metadata = {
             'id': '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
             'translated_metadata': metadata,
             'indexer_configuration_id': 7,
         }
         origin_metadata = {
             'origin_id': 54974445,
             'from_revision': '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
             'metadata': metadata,
             'indexer_configuration_id': 7,
         }
         expected_results = [
                 ('origin_intrinsic_metadata', True, [origin_metadata]),
                 ('revision_metadata', True, [rev_metadata])]
 
         results = list(indexer.idx_storage.added_data)
         self.assertCountEqual(expected_results, results)
diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py
index 826a909..3be03f7 100644
--- a/swh/indexer/tests/test_utils.py
+++ b/swh/indexer/tests/test_utils.py
@@ -1,410 +1,399 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 from swh.objstorage.exc import ObjNotFoundError
 
 ORIGINS = [
         {
             'id': 52189575,
             'lister': None,
             'project': None,
             'type': 'git',
             'url': 'https://github.com/SoftwareHeritage/swh-storage'},
         {
             'id': 4423668,
             'lister': None,
             'project': None,
             'type': 'ftp',
             'url': 'rsync://ftp.gnu.org/gnu/3dldf'},
         {
             'id': 77775770,
             'lister': None,
             'project': None,
             'type': 'deposit',
             'url': 'https://forge.softwareheritage.org/source/jesuisgpl/'},
         {
             'id': 85072327,
             'lister': None,
             'project': None,
             'type': 'pypi',
             'url': 'https://pypi.org/project/limnoria/'},
         {
             'id': 49908349,
             'lister': None,
             'project': None,
             'type': 'svn',
             'url': 'http://0-512-md.googlecode.com/svn/'},
         {
             'id': 54974445,
             'lister': None,
             'project': None,
             'type': 'git',
             'url': 'https://github.com/librariesio/yarn-parser'},
         ]
 
 SNAPSHOTS = {
         52189575: {
             'branches': {
                 b'refs/heads/add-revision-origin-cache': {
                     'target': b'L[\xce\x1c\x88\x8eF\t\xf1"\x19\x1e\xfb\xc0'
                               b's\xe7/\xe9l\x1e',
                     'target_type': 'revision'},
                 b'HEAD': {
                     'target': b'8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{\xd7}'
                               b'\xac\xefrm',
                     'target_type': 'revision'},
                 b'refs/tags/v0.0.103': {
                     'target': b'\xb6"Im{\xfdLb\xb0\x94N\xea\x96m\x13x\x88+'
                               b'\x0f\xdd',
                     'target_type': 'release'},
                 }},
         4423668: {
             'branches': {
                 b'3DLDF-1.1.4.tar.gz': {
                     'target': b'dJ\xfb\x1c\x91\xf4\x82B%]6\xa2\x90|\xd3\xfc'
                               b'"G\x99\x11',
                     'target_type': 'revision'},
                 b'3DLDF-2.0.2.tar.gz': {
                     'target': b'\xb6\x0e\xe7\x9e9\xac\xaa\x19\x9e='
                               b'\xd1\xc5\x00\\\xc6\xfc\xe0\xa6\xb4V',
                     'target_type': 'revision'},
                 b'3DLDF-2.0.3-examples.tar.gz': {
                     'target': b'!H\x19\xc0\xee\x82-\x12F1\xbd\x97'
                               b'\xfe\xadZ\x80\x80\xc1\x83\xff',
                     'target_type': 'revision'},
                 b'3DLDF-2.0.3.tar.gz': {
                     'target': b'\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee'
                               b'\xcc\x1a\xb4`\x8c\x8by',
                     'target_type': 'revision'},
                 b'3DLDF-2.0.tar.gz': {
                     'target': b'F6*\xff(?\x19a\xef\xb6\xc2\x1fv$S\xe3G'
                               b'\xd3\xd1m',
                     b'target_type': 'revision'}
                 }},
         77775770: {
             'branches': {
                 b'master': {
                     'target': b'\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{'
                               b'\xa6\xe9\x99\xb1\x9e]q\xeb',
                     'target_type': 'revision'}
             },
             'id': b"h\xc0\xd2a\x04\xd4~'\x8d\xd6\xbe\x07\xeda\xfa\xfbV"
                   b"\x1d\r "},
         85072327: {
             'branches': {
                 b'HEAD': {
                     'target': b'releases/2018.09.09',
                     'target_type': 'alias'},
                 b'releases/2018.09.01': {
                     'target': b'<\xee1(\xe8\x8d_\xc1\xc9\xa6rT\xf1\x1d'
                               b'\xbb\xdfF\xfdw\xcf',
                     'target_type': 'revision'},
                 b'releases/2018.09.09': {
                     'target': b'\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k'
                               b'A\x10\x9d\xc5\xfa2\xf8t',
                     'target_type': 'revision'}},
             'id': b'{\xda\x8e\x84\x7fX\xff\x92\x80^\x93V\x18\xa3\xfay'
                   b'\x12\x9e\xd6\xb3'},
         49908349: {
                 'branches': {
                     b'master': {
                         'target': b'\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8'
                                   b'\xc9\xad#.\x1bw=\x18',
                         'target_type': 'revision'}},
                 'id': b'\xa1\xa2\x8c\n\xb3\x87\xa8\xf9\xe0a\x8c\xb7'
                       b'\x05\xea\xb8\x1f\xc4H\xf4s'},
         54974445: {
                 'branches': {
                     b'HEAD': {
                         'target': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
                         'target_type': 'revision'}}}
         }
 
 
 class MockObjStorage:
     """Mock an swh-objstorage objstorage with predefined contents.
 
     """
     data = {}
 
     def __init__(self):
         self.data = {
             '01c9379dfc33803963d07c1ccc748d3fe4c96bb5': b'this is some text',
             '688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text',
             '8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text',
             '02fb2c89e14f7fab46701478c83779c7beb7b069': b"""
             import unittest
             import logging
             from swh.indexer.mimetype import ContentMimetypeIndexer
             from swh.indexer.tests.test_utils import MockObjStorage
 
             class MockStorage():
                 def content_mimetype_add(self, mimetypes):
                     self.state = mimetypes
                     self.conflict_update = conflict_update
 
                 def indexer_configuration_add(self, tools):
                     return [{
                         'id': 10,
                     }]
             """,
             '103bc087db1d26afc3a0283f38663d081e9b01e6': b"""
                 #ifndef __AVL__
                 #define __AVL__
 
                 typedef struct _avl_tree avl_tree;
 
                 typedef struct _data_t {
                   int content;
                 } data_t;
             """,
             '93666f74f1cf635c8c8ac118879da6ec5623c410': b"""
             (should 'pygments (recognize 'lisp 'easily))
 
             """,
             '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5': b"""
             {
                 "name": "test_metadata",
                 "version": "0.0.1",
                 "description": "Simple package.json test for indexer",
                 "repository": {
                   "type": "git",
                   "url": "https://github.com/moranegg/metadata_test"
               }
             }
             """,
             'd4c647f0fc257591cc9ba1722484229780d1c607': b"""
             {
               "version": "5.0.3",
               "name": "npm",
               "description": "a package manager for JavaScript",
               "keywords": [
                 "install",
                 "modules",
                 "package manager",
                 "package.json"
               ],
               "preferGlobal": true,
               "config": {
                 "publishtest": false
               },
               "homepage": "https://docs.npmjs.com/",
               "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
               "repository": {
                 "type": "git",
                 "url": "https://github.com/npm/npm"
               },
               "bugs": {
                 "url": "https://github.com/npm/npm/issues"
               },
               "dependencies": {
                 "JSONStream": "~1.3.1",
                 "abbrev": "~1.1.0",
                 "ansi-regex": "~2.1.1",
                 "ansicolors": "~0.3.2",
                 "ansistyles": "~0.1.3"
               },
               "devDependencies": {
                 "tacks": "~1.2.6",
                 "tap": "~10.3.2"
               },
               "license": "Artistic-2.0"
             }
 
             """,
             'a7ab314d8a11d2c93e3dcf528ca294e7b431c449': b"""
             """,
             'da39a3ee5e6b4b0d3255bfef95601890afd80709': b'',
         }
 
     def __iter__(self):
         yield from self.data.keys()
 
     def __contains__(self, sha1):
         return self.data.get(sha1) is not None
 
     def get(self, sha1):
         raw_content = self.data.get(sha1)
         if raw_content is None:
             raise ObjNotFoundError(sha1)
         return raw_content
 
 
 class MockIndexerStorage():
     """Mock an swh-indexer storage.
 
     """
     added_data = []
 
     def indexer_configuration_add(self, tools):
         tool = tools[0]
         if tool['tool_name'] == 'swh-metadata-translator':
             return [{
                 'id': 30,
                 'tool_name': 'swh-metadata-translator',
                 'tool_version': '0.0.1',
                 'tool_configuration': {
                     'type': 'local',
                     'context': 'NpmMapping'
                 },
             }]
         elif tool['tool_name'] == 'swh-metadata-detector':
             return [{
                 'id': 7,
                 'tool_name': 'swh-metadata-detector',
                 'tool_version': '0.0.1',
                 'tool_configuration': {
                     'type': 'local',
                     'context': 'NpmMapping'
                 },
             }]
         elif tool['tool_name'] == 'origin-metadata':
             return [{
                 'id': 8,
                 'tool_name': 'origin-metadata',
                 'tool_version': '0.0.1',
                 'tool_configuration': {},
             }]
         else:
             assert False, 'Unknown tool {tool_name}'.format(**tool)
 
     def content_metadata_missing(self, sha1s):
         yield from []
 
     def content_metadata_add(self, metadata, conflict_update=None):
         self.added_data.append(
                 ('content_metadata', conflict_update, metadata))
 
     def revision_metadata_add(self, metadata, conflict_update=None):
         self.added_data.append(
                 ('revision_metadata', conflict_update, metadata))
 
     def origin_intrinsic_metadata_add(self, metadata, conflict_update=None):
         self.added_data.append(
                 ('origin_intrinsic_metadata', conflict_update, metadata))
 
     def content_metadata_get(self, sha1s):
         return [{
             'tool': {
                 'configuration': {
                     'type': 'local',
                     'context': 'NpmMapping'
                     },
                 'version': '0.0.1',
                 'id': 6,
                 'name': 'swh-metadata-translator'
             },
             'id': b'cde',
             'translated_metadata': {
-                'issueTracker': {
-                    'url': 'https://github.com/librariesio/yarn-parser/issues'
-                },
-                'version': '1.0.0',
-                'name': 'yarn-parser',
-                'author': 'Andrew Nesbitt',
-                'url': 'https://github.com/librariesio/yarn-parser#readme',
-                'processorRequirements': {'node': '7.5'},
-                'other': {
-                    'scripts': {
-                                    'start': 'node index.js'
-                    },
-                    'main': 'index.js'
-                },
-                'license': 'AGPL-3.0',
-                'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
-                'codeRepository': {
-                    'type': 'git',
-                    'url': 'git+https://github.com/librariesio/yarn-parser.git'
-                },
-                'description': 'Tiny web service for parsing yarn.lock files',
-                'softwareRequirements': {
-                    'yarn': '^0.21.0',
-                    'express': '^4.14.0',
-                    'body-parser': '^1.15.2'}
+                '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+                'codemeta:issueTracker':
+                    'https://github.com/librariesio/yarn-parser/issues',
+                'codemeta:version': '1.0.0',
+                'codemeta:name': 'yarn-parser',
+                'codemeta:author': 'Andrew Nesbitt',
+                'codemeta:url':
+                    'https://github.com/librariesio/yarn-parser#readme',
+                'codemeta:processorRequirements': {'node': '7.5'},
+                'codemeta:license': 'AGPL-3.0',
+                'codemeta:keywords': ['yarn', 'parse', 'lock', 'dependencies'],
+                'codemeta:codeRepository':
+                    'git+https://github.com/librariesio/yarn-parser.git',
+                'codemeta:description':
+                    'Tiny web service for parsing yarn.lock files',
                 }
         }]
 
 
 class MockStorage():
     """Mock a real swh-storage storage to simplify reading indexers'
     outputs.
 
     """
     def origin_get(self, id_):
         for origin in ORIGINS:
             for (k, v) in id_.items():
                 if origin[k] != v:
                     break
             else:
-                # This block is run if and only if we didn't break,
-                # ie. if all supplied parts of the id are set to the
-                # expected value.
+                # This block is run iff we didn't break, ie. if all supplied
+                # parts of the id are set to the expected value.
                 return origin
         assert False, id_
 
     def snapshot_get_latest(self, origin_id):
         if origin_id in SNAPSHOTS:
             return SNAPSHOTS[origin_id]
         else:
             assert False, origin_id
 
     def revision_get(self, revisions):
         return [{
             'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
             'committer': {
                 'id': 26,
                 'name': b'Andrew Nesbitt',
                 'fullname': b'Andrew Nesbitt <andrewnez@gmail.com>',
                 'email': b'andrewnez@gmail.com'
             },
             'synthetic': False,
             'date': {
                 'negative_utc': False,
                 'timestamp': {
                     'seconds': 1487596456,
                     'microseconds': 0
                 },
                 'offset': 0
             },
             'directory': b'10'
         }]
 
     def directory_ls(self, directory, recursive=False, cur=None):
         # with directory: b'\x9d',
         return [{
                 'sha1_git': b'abc',
                 'name': b'index.js',
                 'target': b'abc',
                 'length': 897,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'10',
                 'sha1': b'bcd'
                 },
                 {
                 'sha1_git': b'aab',
                 'name': b'package.json',
                 'target': b'aab',
                 'length': 712,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'10',
                 'sha1': b'cde'
                 },
                 {
                 'dir_id': b'10',
                 'target': b'11',
                 'type': 'dir',
                 'length': None,
                 'name': b'.github',
                 'sha1': None,
                 'perms': 16384,
                 'sha1_git': None,
                 'status': None,
                 'sha256': None
                 }]