diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py
index 267b7bf..e06744b 100644
--- a/swh/indexer/codemeta.py
+++ b/swh/indexer/codemeta.py
@@ -1,95 +1,99 @@
 # Copyright (C) 2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import csv
 import json
 import os.path
 
 import swh.indexer
 from pyld import jsonld
 
 _DATA_DIR = os.path.join(os.path.dirname(swh.indexer.__file__), 'data')
 
 CROSSWALK_TABLE_PATH = os.path.join(_DATA_DIR, 'codemeta', 'crosswalk.csv')
 
 CODEMETA_CONTEXT_PATH = os.path.join(_DATA_DIR, 'codemeta', 'codemeta.jsonld')
 
 
 with open(CODEMETA_CONTEXT_PATH) as fd:
     CODEMETA_CONTEXT = json.load(fd)
 
 CODEMETA_CONTEXT_URL = 'https://doi.org/10.5063/schema/codemeta-2.0'
 CODEMETA_URI = 'https://codemeta.github.io/terms/'
+SCHEMA_URI = 'http://schema.org/'
 
 
-# CodeMeta properties that we cannot properly represent.
 PROPERTY_BLACKLIST = {
-    'https://codemeta.github.io/terms/softwareRequirements',
-    'https://codemeta.github.io/terms/softwareSuggestions',
+    # CodeMeta properties that we cannot properly represent.
+    CODEMETA_URI + 'softwareRequirements',
+    CODEMETA_URI + 'softwareSuggestions',
+
+    # Duplicate of 'author'
+    CODEMETA_URI + 'creator',
     }
 
 
 def _read_crosstable(fd):
     reader = csv.reader(fd)
     try:
         header = next(reader)
     except StopIteration:
         raise ValueError('empty file')
 
     data_sources = set(header) - {'Parent Type', 'Property',
                                   'Type', 'Description'}
     assert 'codemeta-V1' in data_sources
 
     codemeta_translation = {data_source: {} for data_source in data_sources}
 
     for line in reader:  # For each canonical name
         canonical_name = CODEMETA_URI + dict(zip(header, line))['Property']
         if canonical_name in PROPERTY_BLACKLIST:
             continue
         for (col, value) in zip(header, line):  # For each cell in the row
             if col in data_sources:
                 # If that's not the parentType/property/type/description
                 for local_name in value.split('/'):
                     # For each of the data source's properties that maps
                     # to this canonical name
                     if local_name.strip():
                         codemeta_translation[col][local_name.strip()] = \
                                 canonical_name
 
     return codemeta_translation
 
 
 with open(CROSSWALK_TABLE_PATH) as fd:
     CROSSWALK_TABLE = _read_crosstable(fd)
 
 
 def _document_loader(url):
     """Document loader for pyld.
 
     Reads the local codemeta.jsonld file instead of fetching it
     from the Internet every single time."""
     if url == CODEMETA_CONTEXT_URL:
         return {
                 'contextUrl': None,
                 'documentUrl': url,
                 'document': CODEMETA_CONTEXT,
                 }
     elif url == CODEMETA_URI:
         raise Exception('{} is CodeMeta\'s URI, use {} as context url'.format(
             CODEMETA_URI, CODEMETA_CONTEXT_URL))
     else:
         raise Exception(url)
 
 
 def compact(doc):
     """Same as `pyld.jsonld.compact`, but in the context of CodeMeta."""
     return jsonld.compact(doc, CODEMETA_CONTEXT_URL,
                           options={'documentLoader': _document_loader})
 
 
 def expand(doc):
     """Same as `pyld.jsonld.expand`, but in the context of CodeMeta."""
     return jsonld.expand(doc,
                          options={'documentLoader': _document_loader})
diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
index bf704c8..c2cd7eb 100644
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -1,177 +1,230 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import re
 import abc
 import json
 import logging
 
-from swh.indexer.codemeta import CROSSWALK_TABLE, compact
+from swh.indexer.codemeta import CROSSWALK_TABLE, CODEMETA_URI, compact
 
 
 MAPPINGS = {}
 
 
 def register_mapping(cls):
     MAPPINGS[cls.__name__] = cls()
     return cls
 
 
 class BaseMapping(metaclass=abc.ABCMeta):
     """Base class for mappings to inherit from
 
     To implement a new mapping:
 
     - inherit this class
     - override translate function
     """
     def __init__(self):
         self.log = logging.getLogger('%s.%s' % (
             self.__class__.__module__,
             self.__class__.__name__))
 
     @abc.abstractmethod
     def detect_metadata_files(self, files):
         """
         Detects files potentially containing metadata
         Args:
             - file_entries (list): list of files
 
         Returns:
             - empty list if nothing was found
             - list of sha1 otherwise
         """
         pass
 
     @abc.abstractmethod
     def translate(self, file_content):
         pass
 
     def normalize_translation(self, metadata):
         return compact(metadata)
 
 
 class DictMapping(BaseMapping):
     """Base class for mappings that take as input a file that is mostly
     a key-value store (eg. a shallow JSON dict)."""
 
     @property
     @abc.abstractmethod
     def mapping(self):
         """A translation dict to map dict keys into a canonical name."""
         pass
 
     def translate_dict(self, content_dict):
         """
         Translates content  by parsing content from a dict object
         and translating with the appropriate mapping
 
         Args:
             content_dict (dict)
 
         Returns:
             dict: translated metadata in json-friendly form needed for
                   the indexer
 
         """
         translated_metadata = {}
         for k, v in content_dict.items():
             # First, check if there is a specific translation
             # method for this key
             translation_method = getattr(self, 'translate_' + k, None)
             if translation_method:
                 translation_method(translated_metadata, v)
             elif k in self.mapping:
                 # if there is no method, but the key is known from the
                 # crosswalk table
 
                 # if there is a normalization method, use it on the value
                 normalization_method = getattr(self, 'normalize_' + k, None)
                 if normalization_method:
                     v = normalization_method(v)
 
                 # set the translation metadata with the normalized value
                 translated_metadata[self.mapping[k]] = v
         return self.normalize_translation(translated_metadata)
 
 
 class JsonMapping(DictMapping):
     """Base class for all mappings that use a JSON file as input."""
 
     @property
     @abc.abstractmethod
     def filename(self):
         """The .json file to extract metadata from."""
         pass
 
     def detect_metadata_files(self, file_entries):
         for entry in file_entries:
             if entry['name'] == self.filename:
                 return [entry['sha1']]
         return []
 
     def translate(self, raw_content):
         """
         Translates content by parsing content from a bytestring containing
         json data and translating with the appropriate mapping
 
         Args:
             raw_content: bytes
 
         Returns:
             dict: translated metadata in json-friendly form needed for
                   the indexer
 
         """
         try:
             raw_content = raw_content.decode()
         except UnicodeDecodeError:
             self.log.warning('Error unidecoding %r', raw_content)
             return
         try:
             content_dict = json.loads(raw_content)
         except json.JSONDecodeError:
             self.log.warning('Error unjsoning %r' % raw_content)
             return
         return self.translate_dict(content_dict)
 
 
 @register_mapping
 class NpmMapping(JsonMapping):
     """
     dedicated class for NPM (package.json) mapping and translation
     """
     mapping = CROSSWALK_TABLE['NodeJS']
     filename = b'package.json'
 
+    _schema_shortcuts = {
+            'github': 'https://github.com/',
+            'gist': 'https://gist.github.com/',
+            'bitbucket': 'https://bitbucket.org/',
+            'gitlab': 'https://gitlab.com/',
+            }
+
     def normalize_repository(self, d):
-        return '{type}+{url}'.format(**d)
+        """https://docs.npmjs.com/files/package.json#repository"""
+        if isinstance(d, dict):
+            return '{type}+{url}'.format(**d)
+        elif isinstance(d, str):
+            if '://' in d:
+                return d
+            elif ':' in d:
+                (schema, rest) = d.split(':', 1)
+                if schema in self._schema_shortcuts:
+                    return self._schema_shortcuts[schema] + rest
+                else:
+                    return None
+            else:
+                return self._schema_shortcuts['github'] + d
+
+        else:
+            return None
 
     def normalize_bugs(self, d):
         return '{url}'.format(**d)
 
+    _parse_author = re.compile(r'^ *'
+                               r'(?P<name>.*?)'
+                               r'( +<(?P<email>.*)>)?'
+                               r'( +\((?P<url>.*)\))?'
+                               r' *$')
+
+    def normalize_author(self, d):
+        'https://docs.npmjs.com/files/package.json' \
+                '#people-fields-author-contributors'
+        author = {'@type': CODEMETA_URI+'Person'}
+        if isinstance(d, dict):
+            name = d.get('name', None)
+            email = d.get('email', None)
+            url = d.get('url', None)
+        elif isinstance(d, str):
+            match = self._parse_author.match(d)
+            name = match.group('name')
+            email = match.group('email')
+            url = match.group('url')
+        else:
+            return None
+        if name:
+            author[CODEMETA_URI+'name'] = name
+        if email:
+            author[CODEMETA_URI+'email'] = email
+        if url:
+            author[CODEMETA_URI+'url'] = url
+        return author
+
 
 @register_mapping
 class CodemetaMapping(JsonMapping):
     """
     dedicated class for CodeMeta (codemeta.json) mapping and translation
     """
     mapping = CROSSWALK_TABLE['codemeta-V1']
     filename = b'codemeta.json'
 
 
 def main():
     raw_content = """{"name": "test_name", "unknown_term": "ut"}"""
     raw_content1 = b"""{"name": "test_name",
                         "unknown_term": "ut",
                         "prerequisites" :"packageXYZ"}"""
     result = MAPPINGS["NpmMapping"].translate(raw_content)
     result1 = MAPPINGS["MavenMapping"].translate(raw_content1)
 
     print(result)
     print(result1)
 
 
 if __name__ == "__main__":
     main()
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
index e6e5734..7e78f92 100644
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -1,319 +1,332 @@
 # Copyright (C) 2017-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
 import logging
 
 from swh.indexer.metadata_dictionary import CROSSWALK_TABLE, MAPPINGS
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata_detector import extract_minimal_metadata_dict
 from swh.indexer.metadata import ContentMetadataIndexer
 from swh.indexer.metadata import RevisionMetadataIndexer
 from swh.indexer.tests.test_utils import MockObjStorage, MockStorage
 from swh.indexer.tests.test_utils import MockIndexerStorage
 
 
 class ContentMetadataTestIndexer(ContentMetadataIndexer):
     """Specific Metadata whose configuration is enough to satisfy the
        indexing tests.
     """
     def prepare(self):
         self.idx_storage = MockIndexerStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.destination_task = None
         self.tools = self.register_tools(self.config['tools'])
         self.tool = self.tools[0]
         self.results = []
 
 
 class RevisionMetadataTestIndexer(RevisionMetadataIndexer):
     """Specific indexer whose configuration is enough to satisfy the
        indexing tests.
     """
 
     ContentMetadataIndexer = ContentMetadataTestIndexer
 
     def prepare(self):
         self.config = {
             'storage': {
                 'cls': 'remote',
                 'args': {
                     'url': 'http://localhost:9999',
                 }
             },
             'tools': {
                 'name': 'swh-metadata-detector',
                 'version': '0.0.2',
                 'configuration': {
                     'type': 'local',
                     'context': 'NpmMapping'
                 }
             }
         }
         self.storage = MockStorage()
         self.idx_storage = MockIndexerStorage()
         self.log = logging.getLogger('swh.indexer')
         self.objstorage = MockObjStorage()
         self.destination_task = None
         self.tools = self.register_tools(self.config['tools'])
         self.tool = self.tools[0]
         self.results = []
 
 
 class Metadata(unittest.TestCase):
     """
     Tests metadata_mock_tool tool for Metadata detection
     """
     def setUp(self):
         """
         shows the entire diff in the results
         """
         self.maxDiff = None
         self.content_tool = {
             'name': 'swh-metadata-translator',
             'version': '0.0.2',
             'configuration': {
                 'type': 'local',
                 'context': 'NpmMapping'
             }
         }
         MockIndexerStorage.added_data = []
 
     def test_crosstable(self):
         self.assertEqual(CROSSWALK_TABLE['NodeJS'], {
             'repository': 'https://codemeta.github.io/terms/codeRepository',
             'os': 'https://codemeta.github.io/terms/operatingSystem',
             'cpu': 'https://codemeta.github.io/terms/processorRequirements',
             'engines':
                 'https://codemeta.github.io/terms/processorRequirements',
-            'author': 'https://codemeta.github.io/terms/creator',
+            'author': 'https://codemeta.github.io/terms/author',
             'author.email': 'https://codemeta.github.io/terms/email',
             'author.name': 'https://codemeta.github.io/terms/name',
             'contributor': 'https://codemeta.github.io/terms/contributor',
             'keywords': 'https://codemeta.github.io/terms/keywords',
             'license': 'https://codemeta.github.io/terms/license',
             'version': 'https://codemeta.github.io/terms/version',
             'description': 'https://codemeta.github.io/terms/description',
             'name': 'https://codemeta.github.io/terms/name',
             'bugs': 'https://codemeta.github.io/terms/issueTracker',
             'homepage': 'https://codemeta.github.io/terms/url'
         })
 
     def test_compute_metadata_none(self):
         """
         testing content empty content is empty
         should return None
         """
         # given
         content = b""
 
         # None if no metadata was found or an error occurred
         declared_metadata = None
         # when
         result = MAPPINGS["NpmMapping"].translate(content)
         # then
         self.assertEqual(declared_metadata, result)
 
     def test_compute_metadata_npm(self):
         """
         testing only computation of metadata with hard_mapping_npm
         """
         # given
         content = b"""
             {
                 "name": "test_metadata",
                 "version": "0.0.2",
                 "description": "Simple package.json test for indexer",
                   "repository": {
                     "type": "git",
                     "url": "https://github.com/moranegg/metadata_test"
+                },
+                "author": {
+                    "email": "moranegg@example.com",
+                    "name": "Morane G"
                 }
             }
         """
         declared_metadata = {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'codemeta:name': 'test_metadata',
             'codemeta:version': '0.0.2',
             'codemeta:description': 'Simple package.json test for indexer',
             'codemeta:codeRepository':
                 'git+https://github.com/moranegg/metadata_test',
+            'codemeta:author': {
+                'type': 'codemeta:Person',
+                'codemeta:name': 'Morane G',
+                'codemeta:email': 'moranegg@example.com',
+            },
         }
 
         # when
         result = MAPPINGS["NpmMapping"].translate(content)
         # then
         self.assertEqual(declared_metadata, result)
 
     def test_extract_minimal_metadata_dict(self):
         """
         Test the creation of a coherent minimal metadata set
         """
         # given
         metadata_list = [{
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'codemeta:name': 'test_1',
             'codemeta:version': '0.0.2',
             'codemeta:description': 'Simple package.json test for indexer',
             'codemeta:codeRepository':
                 'git+https://github.com/moranegg/metadata_test',
         }, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'codemeta:name': 'test_0_1',
             'codemeta:version': '0.0.2',
             'codemeta:description': 'Simple package.json test for indexer',
             'codemeta:codeRepository':
                 'git+https://github.com/moranegg/metadata_test'
         }, {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             'codemeta:name': 'test_metadata',
             'codemeta:version': '0.0.2',
             'codemeta:author': 'moranegg',
         }]
 
         # when
         results = extract_minimal_metadata_dict(metadata_list)
 
         # then
         expected_results = {
             '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
             "codemeta:version": '0.0.2',
             "codemeta:description": 'Simple package.json test for indexer',
             "codemeta:name": ['test_1', 'test_0_1', 'test_metadata'],
             "codemeta:author": 'moranegg',
             "codemeta:codeRepository":
                 'git+https://github.com/moranegg/metadata_test',
         }
         self.assertEqual(expected_results, results)
 
     def test_index_content_metadata_npm(self):
         """
         testing NPM with package.json
         - one sha1 uses a file that can't be translated to metadata and
           should return None in the translated metadata
         """
         # given
         sha1s = ['26a9f72a7c87cc9205725cfd879f514ff4f3d8d5',
                  'd4c647f0fc257591cc9ba1722484229780d1c607',
                  '02fb2c89e14f7fab46701478c83779c7beb7b069']
         # this metadata indexer computes only metadata for package.json
         # in npm context with a hard mapping
         metadata_indexer = ContentMetadataTestIndexer(
             tool=self.content_tool, config={})
 
         # when
         metadata_indexer.run(sha1s, policy_update='ignore-dups')
         results = metadata_indexer.idx_storage.added_data
 
         expected_results = [('content_metadata', False, [{
             'indexer_configuration_id': 30,
             'translated_metadata': {
                 '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
                 'codemeta:codeRepository':
                     'git+https://github.com/moranegg/metadata_test',
                 'codemeta:description': 'Simple package.json test for indexer',
                 'codemeta:name': 'test_metadata',
                 'codemeta:version': '0.0.1'
             },
             'id': '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5'
             }, {
             'indexer_configuration_id': 30,
             'translated_metadata': {
                 '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
                 'codemeta:issueTracker':
                     'https://github.com/npm/npm/issues',
-                'codemeta:creator':
-                    'Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)',
+                'codemeta:author': {
+                    'type': 'codemeta:Person',
+                    'codemeta:name': 'Isaac Z. Schlueter',
+                    'codemeta:email': 'i@izs.me',
+                    'codemeta:url': 'http://blog.izs.me',
+                },
                 'codemeta:codeRepository':
                     'git+https://github.com/npm/npm',
                 'codemeta:description': 'a package manager for JavaScript',
                 'codemeta:license': 'Artistic-2.0',
                 'codemeta:version': '5.0.3',
                 'codemeta:name': 'npm',
                 'codemeta:keywords': [
                     'install',
                     'modules',
                     'package manager',
                     'package.json'
                 ],
                 'codemeta:url': 'https://docs.npmjs.com/'
             },
             'id': 'd4c647f0fc257591cc9ba1722484229780d1c607'
             }, {
             'indexer_configuration_id': 30,
             'translated_metadata': None,
             'id': '02fb2c89e14f7fab46701478c83779c7beb7b069'
         }])]
 
         # The assertion below returns False sometimes because of nested lists
         self.assertEqual(expected_results, results)
 
     def test_detect_metadata_package_json(self):
         # given
         df = [{
                 'sha1_git': b'abc',
                 'name': b'index.js',
                 'target': b'abc',
                 'length': 897,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'dir_a',
                 'sha1': b'bcd'
             },
             {
                 'sha1_git': b'aab',
                 'name': b'package.json',
                 'target': b'aab',
                 'length': 712,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'dir_a',
                 'sha1': b'cde'
         }]
         # when
         results = detect_metadata(df)
 
         expected_results = {
             'NpmMapping': [
                 b'cde'
             ]
         }
         # then
         self.assertEqual(expected_results, results)
 
     def test_revision_metadata_indexer(self):
         metadata_indexer = RevisionMetadataTestIndexer()
 
         sha1_gits = [
             b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
         ]
         metadata_indexer.run(sha1_gits, 'update-dups')
 
         results = metadata_indexer.idx_storage.added_data
 
         expected_results = [('revision_metadata', True, [{
             'id': '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
             'translated_metadata': {
                 '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
                 'codemeta:url':
                     'https://github.com/librariesio/yarn-parser#readme',
                 'codemeta:codeRepository':
                     'git+https://github.com/librariesio/yarn-parser.git',
                 'codemeta:author': 'Andrew Nesbitt',
                 'codemeta:license': 'AGPL-3.0',
                 'codemeta:version': '1.0.0',
                 'codemeta:description':
                     'Tiny web service for parsing yarn.lock files',
                 'codemeta:issueTracker':
                     'https://github.com/librariesio/yarn-parser/issues',
                 'codemeta:name': 'yarn-parser',
                 'codemeta:keywords': ['yarn', 'parse', 'lock', 'dependencies'],
             },
             'indexer_configuration_id': 7
         }])]
         # then
         self.assertEqual(expected_results, results)