diff --git a/requirements.txt b/requirements.txt
index 87ecc1f..3a7428c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
 vcversioner
 pygments
 click
 chardet
 file_magic
 pyld
+xmltodict
diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py
index e06744b..4548029 100644
--- a/swh/indexer/codemeta.py
+++ b/swh/indexer/codemeta.py
@@ -1,99 +1,120 @@
 # Copyright (C) 2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import csv
 import json
 import os.path
 
 import swh.indexer
 from pyld import jsonld
 
 _DATA_DIR = os.path.join(os.path.dirname(swh.indexer.__file__), 'data')
 
 CROSSWALK_TABLE_PATH = os.path.join(_DATA_DIR, 'codemeta', 'crosswalk.csv')
 
 CODEMETA_CONTEXT_PATH = os.path.join(_DATA_DIR, 'codemeta', 'codemeta.jsonld')
 
 
 with open(CODEMETA_CONTEXT_PATH) as fd:
     CODEMETA_CONTEXT = json.load(fd)
 
 CODEMETA_CONTEXT_URL = 'https://doi.org/10.5063/schema/codemeta-2.0'
 CODEMETA_URI = 'https://codemeta.github.io/terms/'
 SCHEMA_URI = 'http://schema.org/'
 
 
 PROPERTY_BLACKLIST = {
     # CodeMeta properties that we cannot properly represent.
-    CODEMETA_URI + 'softwareRequirements',
+    SCHEMA_URI + 'softwareRequirements',
     CODEMETA_URI + 'softwareSuggestions',
 
     # Duplicate of 'author'
-    CODEMETA_URI + 'creator',
+    SCHEMA_URI + 'creator',
     }
 
 
+def make_absolute_uri(local_name):
+    definition = CODEMETA_CONTEXT['@context'][local_name]
+    if isinstance(definition, str):
+        return definition
+    elif isinstance(definition, dict):
+        prefixed_name = definition['@id']
+        (prefix, local_name) = prefixed_name.split(':')
+        if prefix == 'schema':
+            canonical_name = SCHEMA_URI + local_name
+        elif prefix == 'codemeta':
+            canonical_name = CODEMETA_URI + local_name
+        else:
+            assert False, prefix
+        return canonical_name
+    else:
+        assert False, definition
+
+
 def _read_crosstable(fd):
     reader = csv.reader(fd)
     try:
         header = next(reader)
     except StopIteration:
         raise ValueError('empty file')
 
     data_sources = set(header) - {'Parent Type', 'Property',
                                   'Type', 'Description'}
     assert 'codemeta-V1' in data_sources
 
     codemeta_translation = {data_source: {} for data_source in data_sources}
 
     for line in reader:  # For each canonical name
-        canonical_name = CODEMETA_URI + dict(zip(header, line))['Property']
+        local_name = dict(zip(header, line))['Property']
+        if not local_name:
+            continue
+        canonical_name = make_absolute_uri(local_name)
         if canonical_name in PROPERTY_BLACKLIST:
             continue
         for (col, value) in zip(header, line):  # For each cell in the row
             if col in data_sources:
                 # If that's not the parentType/property/type/description
                 for local_name in value.split('/'):
                     # For each of the data source's properties that maps
                     # to this canonical name
                     if local_name.strip():
                         codemeta_translation[col][local_name.strip()] = \
                                 canonical_name
 
     return codemeta_translation
 
 
 with open(CROSSWALK_TABLE_PATH) as fd:
     CROSSWALK_TABLE = _read_crosstable(fd)
 
 
 def _document_loader(url):
     """Document loader for pyld.
 
     Reads the local codemeta.jsonld file instead of fetching it
     from the Internet every single time."""
     if url == CODEMETA_CONTEXT_URL:
         return {
                 'contextUrl': None,
                 'documentUrl': url,
                 'document': CODEMETA_CONTEXT,
                 }
     elif url == CODEMETA_URI:
         raise Exception('{} is CodeMeta\'s URI, use {} as context url'.format(
             CODEMETA_URI, CODEMETA_CONTEXT_URL))
     else:
         raise Exception(url)
 
 
 def compact(doc):
     """Same as `pyld.jsonld.compact`, but in the context of CodeMeta."""
     return jsonld.compact(doc, CODEMETA_CONTEXT_URL,
                           options={'documentLoader': _document_loader})
 
 
 def expand(doc):
     """Same as `pyld.jsonld.expand`, but in the context of CodeMeta."""
     return jsonld.expand(doc,
                          options={'documentLoader': _document_loader})
diff --git a/swh/indexer/metadata_detector.py b/swh/indexer/metadata_detector.py
index 00bef4a..629974a 100644
--- a/swh/indexer/metadata_detector.py
+++ b/swh/indexer/metadata_detector.py
@@ -1,58 +1,60 @@
 # Copyright (C) 2017 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-from swh.indexer.codemeta import compact, expand, CODEMETA_URI
+from swh.indexer.codemeta import compact, expand
+from swh.indexer.codemeta import make_absolute_uri
 from swh.indexer.metadata_dictionary import MAPPINGS
 
 
 def detect_metadata(files):
     """
     Detects files potentially containing metadata
     Args:
         - file_entries (list): list of files
 
     Returns:
         - empty list if nothing was found
         - dictionary {mapping_filenames[name]:f['sha1']}
     """
     results = {}
     for (mapping_name, mapping) in MAPPINGS.items():
         matches = mapping.detect_metadata_files(files)
         if matches:
             results[mapping_name] = matches
     return results
 
 
 _MINIMAL_PROPERTY_SET = {
     "developmentStatus", "version", "operatingSystem", "description",
     "keywords", "issueTracker", "name", "author", "relatedLink",
     "url", "license", "maintainer", "email", "identifier",
     "codeRepository"}
 
-MINIMAL_METADATA_SET = {CODEMETA_URI+prop for prop in _MINIMAL_PROPERTY_SET}
+MINIMAL_METADATA_SET = {make_absolute_uri(prop)
+                        for prop in _MINIMAL_PROPERTY_SET}
 
 
 def extract_minimal_metadata_dict(metadata_list):
     """
     Every item in the metadata_list is a dict of translated_metadata in the
     CodeMeta vocabulary
     we wish to extract a minimal set of terms and keep all values corresponding
     to this term without duplication
     Args:
         - metadata_list (list): list of dicts of translated_metadata
 
     Returns:
         - minimal_dict (dict): one dict with selected values of metadata
     """
     minimal_dict = {}
     for document in metadata_list:
         for metadata_item in expand(document):
             for (term, value) in metadata_item.items():
                 if term in MINIMAL_METADATA_SET:
                     if term not in minimal_dict:
                         minimal_dict[term] = [value]
                     elif value not in minimal_dict[term]:
                         minimal_dict[term].append(value)
     return compact(minimal_dict)
diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
index c2cd7eb..b8e01b9 100644
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -1,230 +1,284 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import os
 import re
 import abc
 import json
 import logging
+import xmltodict
 
-from swh.indexer.codemeta import CROSSWALK_TABLE, CODEMETA_URI, compact
+from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
+from swh.indexer.codemeta import compact, expand
 
 
 MAPPINGS = {}
 
 
 def register_mapping(cls):
     MAPPINGS[cls.__name__] = cls()
     return cls
 
 
 class BaseMapping(metaclass=abc.ABCMeta):
     """Base class for mappings to inherit from
 
     To implement a new mapping:
 
     - inherit this class
     - override translate function
     """
     def __init__(self):
         self.log = logging.getLogger('%s.%s' % (
             self.__class__.__module__,
             self.__class__.__name__))
 
     @abc.abstractmethod
     def detect_metadata_files(self, files):
         """
         Detects files potentially containing metadata
         Args:
             - file_entries (list): list of files
 
         Returns:
             - empty list if nothing was found
             - list of sha1 otherwise
         """
         pass
 
     @abc.abstractmethod
     def translate(self, file_content):
         pass
 
     def normalize_translation(self, metadata):
         return compact(metadata)
 
 
+class SingleFileMapping(BaseMapping):
+    """Base class for all mappings that use a single file as input."""
+
+    @property
+    @abc.abstractmethod
+    def filename(self):
+        """The .json file to extract metadata from."""
+        pass
+
+    def detect_metadata_files(self, file_entries):
+        for entry in file_entries:
+            if entry['name'] == self.filename:
+                return [entry['sha1']]
+        return []
+
+
 class DictMapping(BaseMapping):
     """Base class for mappings that take as input a file that is mostly
     a key-value store (eg. a shallow JSON dict)."""
 
     @property
     @abc.abstractmethod
     def mapping(self):
         """A translation dict to map dict keys into a canonical name."""
         pass
 
-    def translate_dict(self, content_dict):
+    def translate_dict(self, content_dict, *, normalize=True):
         """
         Translates content  by parsing content from a dict object
         and translating with the appropriate mapping
 
         Args:
             content_dict (dict)
 
         Returns:
             dict: translated metadata in json-friendly form needed for
                   the indexer
 
         """
-        translated_metadata = {}
+        translated_metadata = {'@type': SCHEMA_URI + 'SoftwareSourceCode'}
         for k, v in content_dict.items():
             # First, check if there is a specific translation
             # method for this key
             translation_method = getattr(self, 'translate_' + k, None)
             if translation_method:
                 translation_method(translated_metadata, v)
             elif k in self.mapping:
                 # if there is no method, but the key is known from the
                 # crosswalk table
 
                 # if there is a normalization method, use it on the value
                 normalization_method = getattr(self, 'normalize_' + k, None)
                 if normalization_method:
                     v = normalization_method(v)
 
                 # set the translation metadata with the normalized value
                 translated_metadata[self.mapping[k]] = v
-        return self.normalize_translation(translated_metadata)
+        if normalize:
+            return self.normalize_translation(translated_metadata)
+        else:
+            return translated_metadata
 
 
-class JsonMapping(DictMapping):
+class JsonMapping(DictMapping, SingleFileMapping):
     """Base class for all mappings that use a JSON file as input."""
 
-    @property
-    @abc.abstractmethod
-    def filename(self):
-        """The .json file to extract metadata from."""
-        pass
-
-    def detect_metadata_files(self, file_entries):
-        for entry in file_entries:
-            if entry['name'] == self.filename:
-                return [entry['sha1']]
-        return []
-
     def translate(self, raw_content):
         """
         Translates content by parsing content from a bytestring containing
         json data and translating with the appropriate mapping
 
         Args:
             raw_content: bytes
 
         Returns:
             dict: translated metadata in json-friendly form needed for
                   the indexer
 
         """
         try:
             raw_content = raw_content.decode()
         except UnicodeDecodeError:
             self.log.warning('Error unidecoding %r', raw_content)
             return
         try:
             content_dict = json.loads(raw_content)
         except json.JSONDecodeError:
             self.log.warning('Error unjsoning %r' % raw_content)
             return
         return self.translate_dict(content_dict)
 
 
 @register_mapping
 class NpmMapping(JsonMapping):
     """
     dedicated class for NPM (package.json) mapping and translation
     """
     mapping = CROSSWALK_TABLE['NodeJS']
     filename = b'package.json'
 
     _schema_shortcuts = {
             'github': 'https://github.com/',
             'gist': 'https://gist.github.com/',
             'bitbucket': 'https://bitbucket.org/',
             'gitlab': 'https://gitlab.com/',
             }
 
     def normalize_repository(self, d):
         """https://docs.npmjs.com/files/package.json#repository"""
         if isinstance(d, dict):
             return '{type}+{url}'.format(**d)
         elif isinstance(d, str):
             if '://' in d:
                 return d
             elif ':' in d:
                 (schema, rest) = d.split(':', 1)
                 if schema in self._schema_shortcuts:
                     return self._schema_shortcuts[schema] + rest
                 else:
                     return None
             else:
                 return self._schema_shortcuts['github'] + d
 
         else:
             return None
 
     def normalize_bugs(self, d):
         return '{url}'.format(**d)
 
     _parse_author = re.compile(r'^ *'
                                r'(?P<name>.*?)'
                                r'( +<(?P<email>.*)>)?'
                                r'( +\((?P<url>.*)\))?'
                                r' *$')
 
     def normalize_author(self, d):
         'https://docs.npmjs.com/files/package.json' \
                 '#people-fields-author-contributors'
-        author = {'@type': CODEMETA_URI+'Person'}
+        author = {'@type': SCHEMA_URI+'Person'}
         if isinstance(d, dict):
             name = d.get('name', None)
             email = d.get('email', None)
             url = d.get('url', None)
         elif isinstance(d, str):
             match = self._parse_author.match(d)
             name = match.group('name')
             email = match.group('email')
             url = match.group('url')
         else:
             return None
         if name:
-            author[CODEMETA_URI+'name'] = name
+            author[SCHEMA_URI+'name'] = name
         if email:
-            author[CODEMETA_URI+'email'] = email
+            author[SCHEMA_URI+'email'] = email
         if url:
-            author[CODEMETA_URI+'url'] = url
+            author[SCHEMA_URI+'url'] = url
         return author
 
 
 @register_mapping
-class CodemetaMapping(JsonMapping):
+class CodemetaMapping(SingleFileMapping):
     """
     dedicated class for CodeMeta (codemeta.json) mapping and translation
     """
-    mapping = CROSSWALK_TABLE['codemeta-V1']
     filename = b'codemeta.json'
 
+    def translate(self, content):
+        return self.normalize_translation(expand(json.loads(content.decode())))
+
+
+@register_mapping
+class MavenMapping(DictMapping, SingleFileMapping):
+    """
+    dedicated class for Maven (pom.xml) mapping and translation
+    """
+    filename = b'pom.xml'
+    mapping = CROSSWALK_TABLE['Java (Maven)']
+
+    def translate(self, content):
+        d = xmltodict.parse(content)['project']
+        metadata = self.translate_dict(d, normalize=False)
+        metadata[SCHEMA_URI+'codeRepository'] = self.parse_repositories(d)
+        return self.normalize_translation(metadata)
+
+    _default_repository = {'url': 'https://repo.maven.apache.org/maven2/'}
+
+    def parse_repositories(self, d):
+        """https://maven.apache.org/pom.html#Repositories"""
+        if 'repositories' not in d:
+            return [self.parse_repository(d, self._default_repository)]
+        else:
+            repositories = d['repositories'].get('repository', [])
+            if not isinstance(repositories, list):
+                repositories = [repositories]
+            results = []
+            for repo in repositories:
+                res = self.parse_repository(d, repo)
+                if res:
+                    results.append(res)
+            return results
+
+    def parse_repository(self, d, repo):
+        if repo.get('layout', 'default') != 'default':
+            return  # TODO ?
+        url = repo['url']
+        if d['groupId']:
+            url = os.path.join(url, *d['groupId'].split('.'))
+            if d['artifactId']:
+                url = os.path.join(url, d['artifactId'])
+        return url
+
 
 def main():
     raw_content = """{"name": "test_name", "unknown_term": "ut"}"""
     raw_content1 = b"""{"name": "test_name",
                         "unknown_term": "ut",
                         "prerequisites" :"packageXYZ"}"""
     result = MAPPINGS["NpmMapping"].translate(raw_content)
     result1 = MAPPINGS["MavenMapping"].translate(raw_content1)
 
     print(result)
     print(result1)
 
 
 if __name__ == "__main__":
     main()
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
index 7e78f92..657f842 100644
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -1,332 +1,480 @@
 # Copyright (C) 2017-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
 import logging
 
 from swh.indexer.metadata_dictionary import CROSSWALK_TABLE, MAPPINGS
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata_detector import extract_minimal_metadata_dict
 from swh.indexer.metadata import ContentMetadataIndexer
 from swh.indexer.metadata import RevisionMetadataIndexer
 from swh.indexer.tests.test_utils import MockObjStorage, MockStorage
 from swh.indexer.tests.test_utils import MockIndexerStorage
 
 
 class ContentMetadataTestIndexer(ContentMetadataIndexer):
     """Specific Metadata whose configuration is enough to satisfy the
        indexing tests.
     """
     def prepare(self):
         self.idx_storage = MockIndexerStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.destination_task = None
         self.tools = self.register_tools(self.config['tools'])
         self.tool = self.tools[0]
         self.results = []
 
 
 class RevisionMetadataTestIndexer(RevisionMetadataIndexer):
     """Specific indexer whose configuration is enough to satisfy the
        indexing tests.
     """
 
     ContentMetadataIndexer = ContentMetadataTestIndexer
 
     def prepare(self):
         self.config = {
             'storage': {
                 'cls': 'remote',
                 'args': {
                     'url': 'http://localhost:9999',
                 }
             },
             'tools': {
                 'name': 'swh-metadata-detector',
                 'version': '0.0.2',
                 'configuration': {
                     'type': 'local',
                     'context': 'NpmMapping'
                 }
             }
         }
         self.storage = MockStorage()
         self.idx_storage = MockIndexerStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.destination_task = None
         self.tools = self.register_tools(self.config['tools'])
         self.tool = self.tools[0]
         self.results = []
 
 
 class Metadata(unittest.TestCase):
     """
     Tests metadata_mock_tool tool for Metadata detection
     """
     def setUp(self):
         """
         shows the entire diff in the results
         """
         self.maxDiff = None
         self.content_tool = {
             'name': 'swh-metadata-translator',
             'version': '0.0.2',
             'configuration': {
                 'type': 'local',
                 'context': 'NpmMapping'
             }
         }
         MockIndexerStorage.added_data = []
 
     def test_crosstable(self):
         self.assertEqual(CROSSWALK_TABLE['NodeJS'], {
-            'repository': 'https://codemeta.github.io/terms/codeRepository',
-            'os': 'https://codemeta.github.io/terms/operatingSystem',
-            'cpu': 'https://codemeta.github.io/terms/processorRequirements',
+            'repository': 'http://schema.org/codeRepository',
+            'os': 'http://schema.org/operatingSystem',
+            'cpu': 'http://schema.org/processorRequirements',
             'engines':
-                'https://codemeta.github.io/terms/processorRequirements',
-            'author': 'https://codemeta.github.io/terms/author',
-            'author.email': 'https://codemeta.github.io/terms/email',
-            'author.name': 'https://codemeta.github.io/terms/name',
-            'contributor': 'https://codemeta.github.io/terms/contributor',
-            'keywords': 'https://codemeta.github.io/terms/keywords',
-            'license': 'https://codemeta.github.io/terms/license',
-            'version': 'https://codemeta.github.io/terms/version',
-            'description': 'https://codemeta.github.io/terms/description',
-            'name': 'https://codemeta.github.io/terms/name',
+                'http://schema.org/processorRequirements',
+            'author': 'http://schema.org/author',
+            'author.email': 'http://schema.org/email',
+            'author.name': 'http://schema.org/name',
+            'contributor': 'http://schema.org/contributor',
+            'keywords': 'http://schema.org/keywords',
+            'license': 'http://schema.org/license',
+            'version': 'http://schema.org/version',
+            'description': 'http://schema.org/description',
+            'name': 'http://schema.org/name',
             'bugs': 'https://codemeta.github.io/terms/issueTracker',
-            'homepage': 'https://codemeta.github.io/terms/url'
+            'homepage': 'http://schema.org/url'
         })
 
     def test_compute_metadata_none(self):
         """
         testing content empty content is empty
         should return None
         """
         # given
         content = b""
 
         # None if no metadata was found or an error occurred
         declared_metadata = None
         # when
         result = MAPPINGS["NpmMapping"].translate(content)
         # then
         self.assertEqual(declared_metadata, result)
 
     def test_compute_metadata_npm(self):
         """
         testing only computation of metadata with hard_mapping_npm
         """
         # given
         content = b"""
             {
                 "name": "test_metadata",
                 "version": "0.0.2",
                 "description": "Simple package.json test for indexer",
                   "repository": {
                     "type": "git",
                     "url": "https://github.com/moranegg/metadata_test"
                 },
                 "author": {
                     "email": "moranegg@example.com",
                     "name": "Morane G"
                 }
             }
         """
         declared_metadata = {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-            'codemeta:name': 'test_metadata',
-            'codemeta:version': '0.0.2',
-            'codemeta:description': 'Simple package.json test for indexer',
-            'codemeta:codeRepository':
+            'type': 'SoftwareSourceCode',
+            'name': 'test_metadata',
+            'version': '0.0.2',
+            'description': 'Simple package.json test for indexer',
+            'schema:codeRepository':
                 'git+https://github.com/moranegg/metadata_test',
-            'codemeta:author': {
-                'type': 'codemeta:Person',
-                'codemeta:name': 'Morane G',
-                'codemeta:email': 'moranegg@example.com',
+            'schema:author': {
+                'type': 'Person',
+                'name': 'Morane G',
+                'email': 'moranegg@example.com',
             },
         }
 
         # when
         result = MAPPINGS["NpmMapping"].translate(content)
         # then
         self.assertEqual(declared_metadata, result)
 
     def test_extract_minimal_metadata_dict(self):
         """
         Test the creation of a coherent minimal metadata set
         """
         # given
         metadata_list = [{
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-            'codemeta:name': 'test_1',
-            'codemeta:version': '0.0.2',
-            'codemeta:description': 'Simple package.json test for indexer',
-            'codemeta:codeRepository':
+            'name': 'test_1',
+            'version': '0.0.2',
+            'description': 'Simple package.json test for indexer',
+            'schema:codeRepository':
                 'git+https://github.com/moranegg/metadata_test',
         }, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-            'codemeta:name': 'test_0_1',
-            'codemeta:version': '0.0.2',
-            'codemeta:description': 'Simple package.json test for indexer',
-            'codemeta:codeRepository':
+            'name': 'test_0_1',
+            'version': '0.0.2',
+            'description': 'Simple package.json test for indexer',
+            'schema:codeRepository':
                 'git+https://github.com/moranegg/metadata_test'
         }, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-            'codemeta:name': 'test_metadata',
-            'codemeta:version': '0.0.2',
-            'codemeta:author': 'moranegg',
+            'name': 'test_metadata',
+            'version': '0.0.2',
+            'schema:author': 'moranegg',
         }]
 
         # when
         results = extract_minimal_metadata_dict(metadata_list)
 
         # then
         expected_results = {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-            "codemeta:version": '0.0.2',
-            "codemeta:description": 'Simple package.json test for indexer',
-            "codemeta:name": ['test_1', 'test_0_1', 'test_metadata'],
-            "codemeta:author": 'moranegg',
-            "codemeta:codeRepository":
+            "version": '0.0.2',
+            "description": 'Simple package.json test for indexer',
+            "name": ['test_1', 'test_0_1', 'test_metadata'],
+            "schema:author": 'moranegg',
+            "schema:codeRepository":
                 'git+https://github.com/moranegg/metadata_test',
         }
         self.assertEqual(expected_results, results)
 
     def test_index_content_metadata_npm(self):
         """
         testing NPM with package.json
         - one sha1 uses a file that can't be translated to metadata and
           should return None in the translated metadata
         """
         # given
         sha1s = ['26a9f72a7c87cc9205725cfd879f514ff4f3d8d5',
                  'd4c647f0fc257591cc9ba1722484229780d1c607',
                  '02fb2c89e14f7fab46701478c83779c7beb7b069']
         # this metadata indexer computes only metadata for package.json
         # in npm context with a hard mapping
         metadata_indexer = ContentMetadataTestIndexer(
             tool=self.content_tool, config={})
 
         # when
         metadata_indexer.run(sha1s, policy_update='ignore-dups')
         results = metadata_indexer.idx_storage.added_data
 
         expected_results = [('content_metadata', False, [{
             'indexer_configuration_id': 30,
             'translated_metadata': {
                 '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-                'codemeta:codeRepository':
+                'type': 'SoftwareSourceCode',
+                'schema:codeRepository':
                     'git+https://github.com/moranegg/metadata_test',
-                'codemeta:description': 'Simple package.json test for indexer',
-                'codemeta:name': 'test_metadata',
-                'codemeta:version': '0.0.1'
+                'description': 'Simple package.json test for indexer',
+                'name': 'test_metadata',
+                'version': '0.0.1'
             },
             'id': '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5'
             }, {
             'indexer_configuration_id': 30,
             'translated_metadata': {
                 '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+                'type': 'SoftwareSourceCode',
                 'codemeta:issueTracker':
                     'https://github.com/npm/npm/issues',
-                'codemeta:author': {
-                    'type': 'codemeta:Person',
-                    'codemeta:name': 'Isaac Z. Schlueter',
-                    'codemeta:email': 'i@izs.me',
-                    'codemeta:url': 'http://blog.izs.me',
+                'schema:author': {
+                    'type': 'Person',
+                    'name': 'Isaac Z. Schlueter',
+                    'email': 'i@izs.me',
+                    'schema:url': 'http://blog.izs.me',
                 },
-                'codemeta:codeRepository':
+                'schema:codeRepository':
                     'git+https://github.com/npm/npm',
-                'codemeta:description': 'a package manager for JavaScript',
-                'codemeta:license': 'Artistic-2.0',
-                'codemeta:version': '5.0.3',
-                'codemeta:name': 'npm',
-                'codemeta:keywords': [
+                'description': 'a package manager for JavaScript',
+                'schema:license': 'Artistic-2.0',
+                'version': '5.0.3',
+                'name': 'npm',
+                'keywords': [
                     'install',
                     'modules',
                     'package manager',
                     'package.json'
                 ],
-                'codemeta:url': 'https://docs.npmjs.com/'
+                'schema:url': 'https://docs.npmjs.com/'
             },
             'id': 'd4c647f0fc257591cc9ba1722484229780d1c607'
             }, {
             'indexer_configuration_id': 30,
             'translated_metadata': None,
             'id': '02fb2c89e14f7fab46701478c83779c7beb7b069'
         }])]
 
         # The assertion below returns False sometimes because of nested lists
         self.assertEqual(expected_results, results)
 
     def test_detect_metadata_package_json(self):
         # given
         df = [{
                 'sha1_git': b'abc',
                 'name': b'index.js',
                 'target': b'abc',
                 'length': 897,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'dir_a',
                 'sha1': b'bcd'
             },
             {
                 'sha1_git': b'aab',
                 'name': b'package.json',
                 'target': b'aab',
                 'length': 712,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'dir_a',
                 'sha1': b'cde'
         }]
         # when
         results = detect_metadata(df)
 
         expected_results = {
             'NpmMapping': [
                 b'cde'
             ]
         }
         # then
         self.assertEqual(expected_results, results)
 
+    def test_compute_metadata_valid_codemeta(self):
+        raw_content = (
+            b"""{
+            "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+            "@type": "SoftwareSourceCode",
+            "identifier": "CodeMeta",
+            "description": "CodeMeta is a concept vocabulary that can be used to standardize the exchange of software metadata across repositories and organizations.",
+            "name": "CodeMeta: Minimal metadata schemas for science software and code, in JSON-LD",
+            "codeRepository": "https://github.com/codemeta/codemeta",
+            "issueTracker": "https://github.com/codemeta/codemeta/issues",
+            "license": "https://spdx.org/licenses/Apache-2.0",
+            "version": "2.0",
+            "author": [
+              {
+                "@type": "Person",
+                "givenName": "Carl",
+                "familyName": "Boettiger",
+                "email": "cboettig@gmail.com",
+                "@id": "http://orcid.org/0000-0002-1642-628X"
+              },
+              {
+                "@type": "Person",
+                "givenName": "Matthew B.",
+                "familyName": "Jones",
+                "email": "jones@nceas.ucsb.edu",
+                "@id": "http://orcid.org/0000-0003-0077-4738"
+              }
+            ],
+            "maintainer": {
+              "@type": "Person",
+              "givenName": "Carl",
+              "familyName": "Boettiger",
+              "email": "cboettig@gmail.com",
+              "@id": "http://orcid.org/0000-0002-1642-628X"
+            },
+            "contIntegration": "https://travis-ci.org/codemeta/codemeta",
+            "developmentStatus": "active",
+            "downloadUrl": "https://github.com/codemeta/codemeta/archive/2.0.zip",
+            "funder": { 
+                "@id": "https://doi.org/10.13039/100000001",
+                "@type": "Organization",
+                "name": "National Science Foundation"
+            },
+            "funding":"1549758; Codemeta: A Rosetta Stone for Metadata in Scientific Software",
+            "keywords": [
+              "metadata",
+              "software"
+            ],
+            "version":"2.0",
+            "dateCreated":"2017-06-05",
+            "datePublished":"2017-06-05",
+            "programmingLanguage": "JSON-LD"
+          }""") # noqa
+        expected_result = {
+            "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+            "type": "SoftwareSourceCode",
+            "identifier": "CodeMeta",
+            "description":
+                "CodeMeta is a concept vocabulary that can "
+                "be used to standardize the exchange of software metadata "
+                "across repositories and organizations.",
+            "name":
+                "CodeMeta: Minimal metadata schemas for science "
+                "software and code, in JSON-LD",
+            "codeRepository": "https://github.com/codemeta/codemeta",
+            "issueTracker": "https://github.com/codemeta/codemeta/issues",
+            "license": "https://spdx.org/licenses/Apache-2.0",
+            "version": "2.0",
+            "author": [
+              {
+                "type": "Person",
+                "givenName": "Carl",
+                "familyName": "Boettiger",
+                "email": "cboettig@gmail.com",
+                "id": "http://orcid.org/0000-0002-1642-628X"
+              },
+              {
+                "type": "Person",
+                "givenName": "Matthew B.",
+                "familyName": "Jones",
+                "email": "jones@nceas.ucsb.edu",
+                "id": "http://orcid.org/0000-0003-0077-4738"
+              }
+            ],
+            "maintainer": {
+              "type": "Person",
+              "givenName": "Carl",
+              "familyName": "Boettiger",
+              "email": "cboettig@gmail.com",
+              "id": "http://orcid.org/0000-0002-1642-628X"
+            },
+            "contIntegration": "https://travis-ci.org/codemeta/codemeta",
+            "developmentStatus": "active",
+            "downloadUrl":
+                "https://github.com/codemeta/codemeta/archive/2.0.zip",
+            "funder": {
+                "id": "https://doi.org/10.13039/100000001",
+                "type": "Organization",
+                "name": "National Science Foundation"
+            },
+            "funding": "1549758; Codemeta: A Rosetta Stone for Metadata "
+                "in Scientific Software",
+            "keywords": [
+              "metadata",
+              "software"
+            ],
+            "version": "2.0",
+            "dateCreated": "2017-06-05",
+            "datePublished": "2017-06-05",
+            "programmingLanguage": "JSON-LD"
+          }
+        result = MAPPINGS["CodemetaMapping"].translate(raw_content)
+        self.assertEqual(result, expected_result)
+
+    def test_compute_metadata_maven(self):
+        raw_content = b"""
+        <project>
+          <name>Maven Default Project</name>
+          <modelVersion>4.0.0</modelVersion>
+          <groupId>com.mycompany.app</groupId>
+          <artifactId>my-app</artifactId>
+          <version>1.2.3</version>
+          <repositories>
+            <repository>
+              <id>central</id>
+              <name>Maven Repository Switchboard</name>
+              <layout>default</layout>
+              <url>http://repo1.maven.org/maven2</url>
+              <snapshots>
+                <enabled>false</enabled>
+              </snapshots>
+            </repository>
+          </repositories>
+        </project>"""
+        result = MAPPINGS["MavenMapping"].translate(raw_content)
+        self.assertEqual(result, {
+            '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+            'type': 'SoftwareSourceCode',
+            'name': 'Maven Default Project',
+            'schema:identifier': 'com.mycompany.app',
+            'version': '1.2.3',
+            'schema:codeRepository':
+                'http://repo1.maven.org/maven2/com/mycompany/app/my-app',
+            })
+
     def test_revision_metadata_indexer(self):
         metadata_indexer = RevisionMetadataTestIndexer()
 
         sha1_gits = [
             b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
         ]
         metadata_indexer.run(sha1_gits, 'update-dups')
 
         results = metadata_indexer.idx_storage.added_data
 
         expected_results = [('revision_metadata', True, [{
             'id': '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
             'translated_metadata': {
                 '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-                'codemeta:url':
+                'url':
                     'https://github.com/librariesio/yarn-parser#readme',
-                'codemeta:codeRepository':
+                'schema:codeRepository':
                     'git+https://github.com/librariesio/yarn-parser.git',
-                'codemeta:author': 'Andrew Nesbitt',
-                'codemeta:license': 'AGPL-3.0',
-                'codemeta:version': '1.0.0',
-                'codemeta:description':
+                'schema:author': 'Andrew Nesbitt',
+                'license': 'AGPL-3.0',
+                'version': '1.0.0',
+                'description':
                     'Tiny web service for parsing yarn.lock files',
                 'codemeta:issueTracker':
                     'https://github.com/librariesio/yarn-parser/issues',
-                'codemeta:name': 'yarn-parser',
-                'codemeta:keywords': ['yarn', 'parse', 'lock', 'dependencies'],
+                'name': 'yarn-parser',
+                'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
             },
             'indexer_configuration_id': 7
         }])]
         # then
         self.assertEqual(expected_results, results)
diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py
index b5401d1..375c42e 100644
--- a/swh/indexer/tests/test_origin_metadata.py
+++ b/swh/indexer/tests/test_origin_metadata.py
@@ -1,127 +1,127 @@
 # Copyright (C) 2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import time
 import logging
 import unittest
 from celery import task
 
 from swh.indexer.metadata import OriginMetadataIndexer
 from swh.indexer.tests.test_utils import MockObjStorage, MockStorage
 from swh.indexer.tests.test_utils import MockIndexerStorage
 from swh.indexer.tests.test_origin_head import OriginHeadTestIndexer
 from swh.indexer.tests.test_metadata import RevisionMetadataTestIndexer
 
 from swh.scheduler.tests.scheduler_testing import SchedulerTestFixture
 
 
 class OriginMetadataTestIndexer(OriginMetadataIndexer):
     def prepare(self):
         self.config = {
             'storage': {
                 'cls': 'remote',
                 'args': {
                     'url': 'http://localhost:9999',
                 }
             },
             'tools': {
                 'name': 'origin-metadata',
                 'version': '0.0.1',
                 'configuration': {}
             }
         }
         self.storage = MockStorage()
         self.idx_storage = MockIndexerStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.destination_task = None
         self.tools = self.register_tools(self.config['tools'])
         self.tool = self.tools[0]
         self.results = []
 
 
 @task
 def revision_metadata_test_task(*args, **kwargs):
     indexer = RevisionMetadataTestIndexer()
     indexer.run(*args, **kwargs)
     return indexer.results
 
 
 @task
 def origin_intrinsic_metadata_test_task(*args, **kwargs):
     indexer = OriginMetadataTestIndexer()
     indexer.run(*args, **kwargs)
     return indexer.results
 
 
 class OriginHeadTestIndexer(OriginHeadTestIndexer):
     revision_metadata_task = 'revision_metadata_test_task'
     origin_intrinsic_metadata_task = 'origin_intrinsic_metadata_test_task'
 
 
 class TestOriginMetadata(SchedulerTestFixture, unittest.TestCase):
     def setUp(self):
         super().setUp()
         self.maxDiff = None
         MockIndexerStorage.added_data = []
         self.add_scheduler_task_type(
             'revision_metadata_test_task',
             'swh.indexer.tests.test_origin_metadata.'
             'revision_metadata_test_task')
         self.add_scheduler_task_type(
             'origin_intrinsic_metadata_test_task',
             'swh.indexer.tests.test_origin_metadata.'
             'origin_intrinsic_metadata_test_task')
         RevisionMetadataTestIndexer.scheduler = self.scheduler
 
     def tearDown(self):
         del RevisionMetadataTestIndexer.scheduler
         super().tearDown()
 
     def test_pipeline(self):
         indexer = OriginHeadTestIndexer()
         indexer.scheduler = self.scheduler
         indexer.run(
                 ["git+https://github.com/librariesio/yarn-parser"],
                 policy_update='update-dups',
                 parse_ids=True)
 
         self.run_ready_tasks()  # Run the first task
         time.sleep(0.1)  # Give it time to complete and schedule the 2nd one
         self.run_ready_tasks()  # Run the second task
 
         metadata = {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
-            'codemeta:url':
+            'url':
                 'https://github.com/librariesio/yarn-parser#readme',
-            'codemeta:codeRepository':
+            'schema:codeRepository':
                 'git+https://github.com/librariesio/yarn-parser.git',
-            'codemeta:author': 'Andrew Nesbitt',
-            'codemeta:license': 'AGPL-3.0',
-            'codemeta:version': '1.0.0',
-            'codemeta:description':
+            'schema:author': 'Andrew Nesbitt',
+            'license': 'AGPL-3.0',
+            'version': '1.0.0',
+            'description':
                 'Tiny web service for parsing yarn.lock files',
             'codemeta:issueTracker':
                 'https://github.com/librariesio/yarn-parser/issues',
-            'codemeta:name': 'yarn-parser',
-            'codemeta:keywords': ['yarn', 'parse', 'lock', 'dependencies'],
+            'name': 'yarn-parser',
+            'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
         }
         rev_metadata = {
             'id': '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
             'translated_metadata': metadata,
             'indexer_configuration_id': 7,
         }
         origin_metadata = {
             'origin_id': 54974445,
             'from_revision': '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
             'metadata': metadata,
             'indexer_configuration_id': 7,
         }
         expected_results = [
                 ('origin_intrinsic_metadata', True, [origin_metadata]),
                 ('revision_metadata', True, [rev_metadata])]
 
         results = list(indexer.idx_storage.added_data)
         self.assertCountEqual(expected_results, results)
diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py
index 3be03f7..8dc958c 100644
--- a/swh/indexer/tests/test_utils.py
+++ b/swh/indexer/tests/test_utils.py
@@ -1,399 +1,400 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 from swh.objstorage.exc import ObjNotFoundError
 
 ORIGINS = [
         {
             'id': 52189575,
             'lister': None,
             'project': None,
             'type': 'git',
             'url': 'https://github.com/SoftwareHeritage/swh-storage'},
         {
             'id': 4423668,
             'lister': None,
             'project': None,
             'type': 'ftp',
             'url': 'rsync://ftp.gnu.org/gnu/3dldf'},
         {
             'id': 77775770,
             'lister': None,
             'project': None,
             'type': 'deposit',
             'url': 'https://forge.softwareheritage.org/source/jesuisgpl/'},
         {
             'id': 85072327,
             'lister': None,
             'project': None,
             'type': 'pypi',
             'url': 'https://pypi.org/project/limnoria/'},
         {
             'id': 49908349,
             'lister': None,
             'project': None,
             'type': 'svn',
             'url': 'http://0-512-md.googlecode.com/svn/'},
         {
             'id': 54974445,
             'lister': None,
             'project': None,
             'type': 'git',
             'url': 'https://github.com/librariesio/yarn-parser'},
         ]
 
 SNAPSHOTS = {
         52189575: {
             'branches': {
                 b'refs/heads/add-revision-origin-cache': {
                     'target': b'L[\xce\x1c\x88\x8eF\t\xf1"\x19\x1e\xfb\xc0'
                               b's\xe7/\xe9l\x1e',
                     'target_type': 'revision'},
                 b'HEAD': {
                     'target': b'8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{\xd7}'
                               b'\xac\xefrm',
                     'target_type': 'revision'},
                 b'refs/tags/v0.0.103': {
                     'target': b'\xb6"Im{\xfdLb\xb0\x94N\xea\x96m\x13x\x88+'
                               b'\x0f\xdd',
                     'target_type': 'release'},
                 }},
         4423668: {
             'branches': {
                 b'3DLDF-1.1.4.tar.gz': {
                     'target': b'dJ\xfb\x1c\x91\xf4\x82B%]6\xa2\x90|\xd3\xfc'
                               b'"G\x99\x11',
                     'target_type': 'revision'},
                 b'3DLDF-2.0.2.tar.gz': {
                     'target': b'\xb6\x0e\xe7\x9e9\xac\xaa\x19\x9e='
                               b'\xd1\xc5\x00\\\xc6\xfc\xe0\xa6\xb4V',
                     'target_type': 'revision'},
                 b'3DLDF-2.0.3-examples.tar.gz': {
                     'target': b'!H\x19\xc0\xee\x82-\x12F1\xbd\x97'
                               b'\xfe\xadZ\x80\x80\xc1\x83\xff',
                     'target_type': 'revision'},
                 b'3DLDF-2.0.3.tar.gz': {
                     'target': b'\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee'
                               b'\xcc\x1a\xb4`\x8c\x8by',
                     'target_type': 'revision'},
                 b'3DLDF-2.0.tar.gz': {
                     'target': b'F6*\xff(?\x19a\xef\xb6\xc2\x1fv$S\xe3G'
                               b'\xd3\xd1m',
                     b'target_type': 'revision'}
                 }},
         77775770: {
             'branches': {
                 b'master': {
                     'target': b'\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{'
                               b'\xa6\xe9\x99\xb1\x9e]q\xeb',
                     'target_type': 'revision'}
             },
             'id': b"h\xc0\xd2a\x04\xd4~'\x8d\xd6\xbe\x07\xeda\xfa\xfbV"
                   b"\x1d\r "},
         85072327: {
             'branches': {
                 b'HEAD': {
                     'target': b'releases/2018.09.09',
                     'target_type': 'alias'},
                 b'releases/2018.09.01': {
                     'target': b'<\xee1(\xe8\x8d_\xc1\xc9\xa6rT\xf1\x1d'
                               b'\xbb\xdfF\xfdw\xcf',
                     'target_type': 'revision'},
                 b'releases/2018.09.09': {
                     'target': b'\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k'
                               b'A\x10\x9d\xc5\xfa2\xf8t',
                     'target_type': 'revision'}},
             'id': b'{\xda\x8e\x84\x7fX\xff\x92\x80^\x93V\x18\xa3\xfay'
                   b'\x12\x9e\xd6\xb3'},
         49908349: {
                 'branches': {
                     b'master': {
                         'target': b'\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8'
                                   b'\xc9\xad#.\x1bw=\x18',
                         'target_type': 'revision'}},
                 'id': b'\xa1\xa2\x8c\n\xb3\x87\xa8\xf9\xe0a\x8c\xb7'
                       b'\x05\xea\xb8\x1f\xc4H\xf4s'},
         54974445: {
                 'branches': {
                     b'HEAD': {
                         'target': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
                         'target_type': 'revision'}}}
         }
 
 
 class MockObjStorage:
     """Mock an swh-objstorage objstorage with predefined contents.
 
     """
     data = {}
 
     def __init__(self):
         self.data = {
             '01c9379dfc33803963d07c1ccc748d3fe4c96bb5': b'this is some text',
             '688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text',
             '8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text',
             '02fb2c89e14f7fab46701478c83779c7beb7b069': b"""
             import unittest
             import logging
             from swh.indexer.mimetype import ContentMimetypeIndexer
             from swh.indexer.tests.test_utils import MockObjStorage
 
             class MockStorage():
                 def content_mimetype_add(self, mimetypes):
                     self.state = mimetypes
                     self.conflict_update = conflict_update
 
                 def indexer_configuration_add(self, tools):
                     return [{
                         'id': 10,
                     }]
             """,
             '103bc087db1d26afc3a0283f38663d081e9b01e6': b"""
                 #ifndef __AVL__
                 #define __AVL__
 
                 typedef struct _avl_tree avl_tree;
 
                 typedef struct _data_t {
                   int content;
                 } data_t;
             """,
             '93666f74f1cf635c8c8ac118879da6ec5623c410': b"""
             (should 'pygments (recognize 'lisp 'easily))
 
             """,
             '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5': b"""
             {
                 "name": "test_metadata",
                 "version": "0.0.1",
                 "description": "Simple package.json test for indexer",
                 "repository": {
                   "type": "git",
                   "url": "https://github.com/moranegg/metadata_test"
               }
             }
             """,
             'd4c647f0fc257591cc9ba1722484229780d1c607': b"""
             {
               "version": "5.0.3",
               "name": "npm",
               "description": "a package manager for JavaScript",
               "keywords": [
                 "install",
                 "modules",
                 "package manager",
                 "package.json"
               ],
               "preferGlobal": true,
               "config": {
                 "publishtest": false
               },
               "homepage": "https://docs.npmjs.com/",
               "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
               "repository": {
                 "type": "git",
                 "url": "https://github.com/npm/npm"
               },
               "bugs": {
                 "url": "https://github.com/npm/npm/issues"
               },
               "dependencies": {
                 "JSONStream": "~1.3.1",
                 "abbrev": "~1.1.0",
                 "ansi-regex": "~2.1.1",
                 "ansicolors": "~0.3.2",
                 "ansistyles": "~0.1.3"
               },
               "devDependencies": {
                 "tacks": "~1.2.6",
                 "tap": "~10.3.2"
               },
               "license": "Artistic-2.0"
             }
 
             """,
             'a7ab314d8a11d2c93e3dcf528ca294e7b431c449': b"""
             """,
             'da39a3ee5e6b4b0d3255bfef95601890afd80709': b'',
         }
 
     def __iter__(self):
         yield from self.data.keys()
 
     def __contains__(self, sha1):
         return self.data.get(sha1) is not None
 
     def get(self, sha1):
         raw_content = self.data.get(sha1)
         if raw_content is None:
             raise ObjNotFoundError(sha1)
         return raw_content
 
 
 class MockIndexerStorage():
     """Mock an swh-indexer storage.
 
     """
     added_data = []
 
     def indexer_configuration_add(self, tools):
         tool = tools[0]
         if tool['tool_name'] == 'swh-metadata-translator':
             return [{
                 'id': 30,
                 'tool_name': 'swh-metadata-translator',
                 'tool_version': '0.0.1',
                 'tool_configuration': {
                     'type': 'local',
                     'context': 'NpmMapping'
                 },
             }]
         elif tool['tool_name'] == 'swh-metadata-detector':
             return [{
                 'id': 7,
                 'tool_name': 'swh-metadata-detector',
                 'tool_version': '0.0.1',
                 'tool_configuration': {
                     'type': 'local',
                     'context': 'NpmMapping'
                 },
             }]
         elif tool['tool_name'] == 'origin-metadata':
             return [{
                 'id': 8,
                 'tool_name': 'origin-metadata',
                 'tool_version': '0.0.1',
                 'tool_configuration': {},
             }]
         else:
             assert False, 'Unknown tool {tool_name}'.format(**tool)
 
     def content_metadata_missing(self, sha1s):
         yield from []
 
     def content_metadata_add(self, metadata, conflict_update=None):
         self.added_data.append(
                 ('content_metadata', conflict_update, metadata))
 
     def revision_metadata_add(self, metadata, conflict_update=None):
         self.added_data.append(
                 ('revision_metadata', conflict_update, metadata))
 
     def origin_intrinsic_metadata_add(self, metadata, conflict_update=None):
         self.added_data.append(
                 ('origin_intrinsic_metadata', conflict_update, metadata))
 
     def content_metadata_get(self, sha1s):
         return [{
             'tool': {
                 'configuration': {
                     'type': 'local',
                     'context': 'NpmMapping'
                     },
                 'version': '0.0.1',
                 'id': 6,
                 'name': 'swh-metadata-translator'
             },
             'id': b'cde',
             'translated_metadata': {
                 '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+                'type': 'SoftwareSourceCode',
                 'codemeta:issueTracker':
                     'https://github.com/librariesio/yarn-parser/issues',
-                'codemeta:version': '1.0.0',
-                'codemeta:name': 'yarn-parser',
-                'codemeta:author': 'Andrew Nesbitt',
-                'codemeta:url':
+                'version': '1.0.0',
+                'name': 'yarn-parser',
+                'schema:author': 'Andrew Nesbitt',
+                'url':
                     'https://github.com/librariesio/yarn-parser#readme',
-                'codemeta:processorRequirements': {'node': '7.5'},
-                'codemeta:license': 'AGPL-3.0',
-                'codemeta:keywords': ['yarn', 'parse', 'lock', 'dependencies'],
-                'codemeta:codeRepository':
+                'processorRequirements': {'node': '7.5'},
+                'license': 'AGPL-3.0',
+                'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
+                'schema:codeRepository':
                     'git+https://github.com/librariesio/yarn-parser.git',
-                'codemeta:description':
+                'description':
                     'Tiny web service for parsing yarn.lock files',
                 }
         }]
 
 
 class MockStorage():
     """Mock a real swh-storage storage to simplify reading indexers'
     outputs.
 
     """
     def origin_get(self, id_):
         for origin in ORIGINS:
             for (k, v) in id_.items():
                 if origin[k] != v:
                     break
             else:
                 # This block is run iff we didn't break, ie. if all supplied
                 # parts of the id are set to the expected value.
                 return origin
         assert False, id_
 
     def snapshot_get_latest(self, origin_id):
         if origin_id in SNAPSHOTS:
             return SNAPSHOTS[origin_id]
         else:
             assert False, origin_id
 
     def revision_get(self, revisions):
         return [{
             'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
             'committer': {
                 'id': 26,
                 'name': b'Andrew Nesbitt',
                 'fullname': b'Andrew Nesbitt <andrewnez@gmail.com>',
                 'email': b'andrewnez@gmail.com'
             },
             'synthetic': False,
             'date': {
                 'negative_utc': False,
                 'timestamp': {
                     'seconds': 1487596456,
                     'microseconds': 0
                 },
                 'offset': 0
             },
             'directory': b'10'
         }]
 
     def directory_ls(self, directory, recursive=False, cur=None):
         # with directory: b'\x9d',
         return [{
                 'sha1_git': b'abc',
                 'name': b'index.js',
                 'target': b'abc',
                 'length': 897,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'10',
                 'sha1': b'bcd'
                 },
                 {
                 'sha1_git': b'aab',
                 'name': b'package.json',
                 'target': b'aab',
                 'length': 712,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'10',
                 'sha1': b'cde'
                 },
                 {
                 'dir_id': b'10',
                 'target': b'11',
                 'type': 'dir',
                 'length': None,
                 'name': b'.github',
                 'sha1': None,
                 'perms': 16384,
                 'sha1_git': None,
                 'status': None,
                 'sha256': None
                 }]