Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary.py
# Copyright (C) 2017 The Software Heritage developers | # Copyright (C) 2017 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import re | |||||
import abc | import abc | ||||
import json | import json | ||||
import logging | import logging | ||||
from swh.indexer.codemeta import CROSSWALK_TABLE, compact | from swh.indexer.codemeta import CROSSWALK_TABLE, CODEMETA_URI, compact | ||||
MAPPINGS = {} | MAPPINGS = {} | ||||
def register_mapping(cls): | def register_mapping(cls): | ||||
MAPPINGS[cls.__name__] = cls() | MAPPINGS[cls.__name__] = cls() | ||||
return cls | return cls | ||||
▲ Show 20 Lines • Show All 121 Lines • ▼ Show 20 Lines | |||||
@register_mapping | @register_mapping | ||||
class NpmMapping(JsonMapping): | class NpmMapping(JsonMapping): | ||||
""" | """ | ||||
dedicated class for NPM (package.json) mapping and translation | dedicated class for NPM (package.json) mapping and translation | ||||
""" | """ | ||||
mapping = CROSSWALK_TABLE['NodeJS'] | mapping = CROSSWALK_TABLE['NodeJS'] | ||||
filename = b'package.json' | filename = b'package.json' | ||||
_schema_shortcuts = { | |||||
'github': 'https://github.com/', | |||||
'gist': 'https://gist.github.com/', | |||||
'bitbucket': 'https://bitbucket.org/', | |||||
'gitlab': 'https://gitlab.com/', | |||||
} | |||||
def normalize_repository(self, d): | def normalize_repository(self, d): | ||||
"""https://docs.npmjs.com/files/package.json#repository""" | |||||
if isinstance(d, dict): | |||||
return '{type}+{url}'.format(**d) | return '{type}+{url}'.format(**d) | ||||
elif isinstance(d, str): | |||||
if '://' in d: | |||||
return d | |||||
elif ':' in d: | |||||
(schema, rest) = d.split(':', 1) | |||||
if schema in self._schema_shortcuts: | |||||
return self._schema_shortcuts[schema] + rest | |||||
else: | |||||
return None | |||||
else: | |||||
return self._schema_shortcuts['github'] + d | |||||
else: | |||||
return None | |||||
def normalize_bugs(self, d): | def normalize_bugs(self, d): | ||||
return '{url}'.format(**d) | return '{url}'.format(**d) | ||||
_parse_author = re.compile(r'^ *' | |||||
r'(?P<name>.*?)' | |||||
r'( +<(?P<email>.*)>)?' | |||||
r'( +\((?P<url>.*)\))?' | |||||
r' *$') | |||||
def normalize_author(self, d): | |||||
'https://docs.npmjs.com/files/package.json' \ | |||||
'#people-fields-author-contributors' | |||||
author = {'@type': CODEMETA_URI+'Person'} | |||||
if isinstance(d, dict): | |||||
name = d.get('name', None) | |||||
email = d.get('email', None) | |||||
url = d.get('url', None) | |||||
elif isinstance(d, str): | |||||
match = self._parse_author.match(d) | |||||
name = match.group('name') | |||||
email = match.group('email') | |||||
url = match.group('url') | |||||
else: | |||||
return None | |||||
if name: | |||||
author[CODEMETA_URI+'name'] = name | |||||
if email: | |||||
author[CODEMETA_URI+'email'] = email | |||||
if url: | |||||
author[CODEMETA_URI+'url'] = url | |||||
return author | |||||
@register_mapping | @register_mapping | ||||
class CodemetaMapping(JsonMapping): | class CodemetaMapping(JsonMapping): | ||||
""" | """ | ||||
dedicated class for CodeMeta (codemeta.json) mapping and translation | dedicated class for CodeMeta (codemeta.json) mapping and translation | ||||
""" | """ | ||||
mapping = CROSSWALK_TABLE['codemeta-V1'] | mapping = CROSSWALK_TABLE['codemeta-V1'] | ||||
filename = b'codemeta.json' | filename = b'codemeta.json' | ||||
Show All 16 Lines |