Page MenuHomeSoftware Heritage

D619.diff
No OneTemporary

D619.diff

diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py
--- a/swh/indexer/codemeta.py
+++ b/swh/indexer/codemeta.py
@@ -22,12 +22,16 @@
CODEMETA_CONTEXT_URL = 'https://doi.org/10.5063/schema/codemeta-2.0'
CODEMETA_URI = 'https://codemeta.github.io/terms/'
+SCHEMA_URI = 'http://schema.org/'
-# CodeMeta properties that we cannot properly represent.
PROPERTY_BLACKLIST = {
- 'https://codemeta.github.io/terms/softwareRequirements',
- 'https://codemeta.github.io/terms/softwareSuggestions',
+ # CodeMeta properties that we cannot properly represent.
+ CODEMETA_URI + 'softwareRequirements',
+ CODEMETA_URI + 'softwareSuggestions',
+
+ # Duplicate of 'author'
+ CODEMETA_URI + 'creator',
}
diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -3,11 +3,12 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import re
import abc
import json
import logging
-from swh.indexer.codemeta import CROSSWALK_TABLE, compact
+from swh.indexer.codemeta import CROSSWALK_TABLE, CODEMETA_URI, compact
MAPPINGS = {}
@@ -145,12 +146,64 @@
mapping = CROSSWALK_TABLE['NodeJS']
filename = b'package.json'
+ _schema_shortcuts = {
+ 'github': 'https://github.com/',
+ 'gist': 'https://gist.github.com/',
+ 'bitbucket': 'https://bitbucket.org/',
+ 'gitlab': 'https://gitlab.com/',
+ }
+
def normalize_repository(self, d):
- return '{type}+{url}'.format(**d)
+ """https://docs.npmjs.com/files/package.json#repository"""
+ if isinstance(d, dict):
+ return '{type}+{url}'.format(**d)
+ elif isinstance(d, str):
+ if '://' in d:
+ return d
+ elif ':' in d:
+ (schema, rest) = d.split(':', 1)
+ if schema in self._schema_shortcuts:
+ return self._schema_shortcuts[schema] + rest
+ else:
+ return None
+ else:
+ return self._schema_shortcuts['github'] + d
+
+ else:
+ return None
def normalize_bugs(self, d):
return '{url}'.format(**d)
+ _parse_author = re.compile(r'^ *'
+ r'(?P<name>.*?)'
+ r'( +<(?P<email>.*)>)?'
+ r'( +\((?P<url>.*)\))?'
+ r' *$')
+
+ def normalize_author(self, d):
+ 'https://docs.npmjs.com/files/package.json' \
+ '#people-fields-author-contributors'
+ author = {'@type': CODEMETA_URI+'Person'}
+ if isinstance(d, dict):
+ name = d.get('name', None)
+ email = d.get('email', None)
+ url = d.get('url', None)
+ elif isinstance(d, str):
+ match = self._parse_author.match(d)
+ name = match.group('name')
+ email = match.group('email')
+ url = match.group('url')
+ else:
+ return None
+ if name:
+ author[CODEMETA_URI+'name'] = name
+ if email:
+ author[CODEMETA_URI+'email'] = email
+ if url:
+ author[CODEMETA_URI+'url'] = url
+ return author
+
@register_mapping
class CodemetaMapping(JsonMapping):
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -89,7 +89,7 @@
'cpu': 'https://codemeta.github.io/terms/processorRequirements',
'engines':
'https://codemeta.github.io/terms/processorRequirements',
- 'author': 'https://codemeta.github.io/terms/creator',
+ 'author': 'https://codemeta.github.io/terms/author',
'author.email': 'https://codemeta.github.io/terms/email',
'author.name': 'https://codemeta.github.io/terms/name',
'contributor': 'https://codemeta.github.io/terms/contributor',
@@ -130,6 +130,10 @@
"repository": {
"type": "git",
"url": "https://github.com/moranegg/metadata_test"
+ },
+ "author": {
+ "email": "moranegg@example.com",
+ "name": "Morane G"
}
}
"""
@@ -140,6 +144,11 @@
'codemeta:description': 'Simple package.json test for indexer',
'codemeta:codeRepository':
'git+https://github.com/moranegg/metadata_test',
+ 'codemeta:author': {
+ 'type': 'codemeta:Person',
+ 'codemeta:name': 'Morane G',
+ 'codemeta:email': 'moranegg@example.com',
+ },
}
# when
@@ -224,8 +233,12 @@
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'codemeta:issueTracker':
'https://github.com/npm/npm/issues',
- 'codemeta:creator':
- 'Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)',
+ 'codemeta:author': {
+ 'type': 'codemeta:Person',
+ 'codemeta:name': 'Isaac Z. Schlueter',
+ 'codemeta:email': 'i@izs.me',
+ 'codemeta:url': 'http://blog.izs.me',
+ },
'codemeta:codeRepository':
'git+https://github.com/npm/npm',
'codemeta:description': 'a package manager for JavaScript',

File Metadata

Mime Type
text/plain
Expires
Tue, Dec 17, 10:27 PM (2 d, 14 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219643

Event Timeline