diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py --- a/swh/indexer/codemeta.py +++ b/swh/indexer/codemeta.py @@ -22,12 +22,16 @@ CODEMETA_CONTEXT_URL = 'https://doi.org/10.5063/schema/codemeta-2.0' CODEMETA_URI = 'https://codemeta.github.io/terms/' +SCHEMA_URI = 'http://schema.org/' -# CodeMeta properties that we cannot properly represent. PROPERTY_BLACKLIST = { - 'https://codemeta.github.io/terms/softwareRequirements', - 'https://codemeta.github.io/terms/softwareSuggestions', + # CodeMeta properties that we cannot properly represent. + CODEMETA_URI + 'softwareRequirements', + CODEMETA_URI + 'softwareSuggestions', + + # Duplicate of 'author' + CODEMETA_URI + 'creator', } diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py --- a/swh/indexer/metadata_dictionary.py +++ b/swh/indexer/metadata_dictionary.py @@ -3,11 +3,12 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import re import abc import json import logging -from swh.indexer.codemeta import CROSSWALK_TABLE, compact +from swh.indexer.codemeta import CROSSWALK_TABLE, CODEMETA_URI, compact MAPPINGS = {} @@ -145,12 +146,64 @@ mapping = CROSSWALK_TABLE['NodeJS'] filename = b'package.json' + _schema_shortcuts = { + 'github': 'https://github.com/', + 'gist': 'https://gist.github.com/', + 'bitbucket': 'https://bitbucket.org/', + 'gitlab': 'https://gitlab.com/', + } + def normalize_repository(self, d): - return '{type}+{url}'.format(**d) + """https://docs.npmjs.com/files/package.json#repository""" + if isinstance(d, dict): + return '{type}+{url}'.format(**d) + elif isinstance(d, str): + if '://' in d: + return d + elif ':' in d: + (schema, rest) = d.split(':', 1) + if schema in self._schema_shortcuts: + return self._schema_shortcuts[schema] + rest + else: + return None + else: + return self._schema_shortcuts['github'] + d + + else: + return None def normalize_bugs(self, d): return '{url}'.format(**d) + _parse_author = re.compile(r'^ *' + r'(?P.*?)' + r'( +<(?P.*)>)?' + r'( +\((?P.*)\))?' + r' *$') + + def normalize_author(self, d): + 'https://docs.npmjs.com/files/package.json' \ + '#people-fields-author-contributors' + author = {'@type': CODEMETA_URI+'Person'} + if isinstance(d, dict): + name = d.get('name', None) + email = d.get('email', None) + url = d.get('url', None) + elif isinstance(d, str): + match = self._parse_author.match(d) + name = match.group('name') + email = match.group('email') + url = match.group('url') + else: + return None + if name: + author[CODEMETA_URI+'name'] = name + if email: + author[CODEMETA_URI+'email'] = email + if url: + author[CODEMETA_URI+'url'] = url + return author + @register_mapping class CodemetaMapping(JsonMapping): diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -89,7 +89,7 @@ 'cpu': 'https://codemeta.github.io/terms/processorRequirements', 'engines': 'https://codemeta.github.io/terms/processorRequirements', - 'author': 'https://codemeta.github.io/terms/creator', + 'author': 'https://codemeta.github.io/terms/author', 'author.email': 'https://codemeta.github.io/terms/email', 'author.name': 'https://codemeta.github.io/terms/name', 'contributor': 'https://codemeta.github.io/terms/contributor', @@ -130,6 +130,10 @@ "repository": { "type": "git", "url": "https://github.com/moranegg/metadata_test" + }, + "author": { + "email": "moranegg@example.com", + "name": "Morane G" } } """ @@ -140,6 +144,11 @@ 'codemeta:description': 'Simple package.json test for indexer', 'codemeta:codeRepository': 'git+https://github.com/moranegg/metadata_test', + 'codemeta:author': { + 'type': 'codemeta:Person', + 'codemeta:name': 'Morane G', + 'codemeta:email': 'moranegg@example.com', + }, } # when @@ -224,8 +233,12 @@ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', 'codemeta:issueTracker': 'https://github.com/npm/npm/issues', - 'codemeta:creator': - 'Isaac Z. Schlueter (http://blog.izs.me)', + 'codemeta:author': { + 'type': 'codemeta:Person', + 'codemeta:name': 'Isaac Z. Schlueter', + 'codemeta:email': 'i@izs.me', + 'codemeta:url': 'http://blog.izs.me', + }, 'codemeta:codeRepository': 'git+https://github.com/npm/npm', 'codemeta:description': 'a package manager for JavaScript',