Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary.py
Show First 20 Lines • Show All 126 Lines • ▼ Show 20 Lines | class DictMapping(BaseMapping): | ||||
a key-value store (eg. a shallow JSON dict).""" | a key-value store (eg. a shallow JSON dict).""" | ||||
@property | @property | ||||
@abc.abstractmethod | @abc.abstractmethod | ||||
def mapping(self): | def mapping(self): | ||||
"""A translation dict to map dict keys into a canonical name.""" | """A translation dict to map dict keys into a canonical name.""" | ||||
pass | pass | ||||
def translate_dict(self, content_dict, *, normalize=True): | def _translate_dict(self, content_dict, *, normalize=True): | ||||
""" | """ | ||||
Translates content by parsing content from a dict object | Translates content by parsing content from a dict object | ||||
and translating with the appropriate mapping | and translating with the appropriate mapping | ||||
Args: | Args: | ||||
content_dict (dict): content dict to translate | content_dict (dict): content dict to translate | ||||
Returns: | Returns: | ||||
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines | def translate(self, raw_content): | ||||
self.log.warning('Error unidecoding from %s', self.log_suffix) | self.log.warning('Error unidecoding from %s', self.log_suffix) | ||||
return | return | ||||
try: | try: | ||||
content_dict = json.loads(raw_content) | content_dict = json.loads(raw_content) | ||||
except json.JSONDecodeError: | except json.JSONDecodeError: | ||||
self.log.warning('Error unjsoning from %s', self.log_suffix) | self.log.warning('Error unjsoning from %s', self.log_suffix) | ||||
return | return | ||||
if isinstance(content_dict, dict): | if isinstance(content_dict, dict): | ||||
return self.translate_dict(content_dict) | return self._translate_dict(content_dict) | ||||
@register_mapping | @register_mapping | ||||
class NpmMapping(JsonMapping): | class NpmMapping(JsonMapping): | ||||
""" | """ | ||||
dedicated class for NPM (package.json) mapping and translation | dedicated class for NPM (package.json) mapping and translation | ||||
""" | """ | ||||
name = 'npm' | name = 'npm' | ||||
▲ Show 20 Lines • Show All 160 Lines • ▼ Show 20 Lines | def translate(self, content): | ||||
except UnicodeDecodeError: | except UnicodeDecodeError: | ||||
self.log.warning('Error unidecoding XML from %s', self.log_suffix) | self.log.warning('Error unidecoding XML from %s', self.log_suffix) | ||||
return None | return None | ||||
except (LookupError, ValueError): | except (LookupError, ValueError): | ||||
# unknown encoding or multi-byte encoding | # unknown encoding or multi-byte encoding | ||||
self.log.warning('Error detecting XML encoding from %s', | self.log.warning('Error detecting XML encoding from %s', | ||||
self.log_suffix) | self.log_suffix) | ||||
return None | return None | ||||
metadata = self.translate_dict(d, normalize=False) | metadata = self._translate_dict(d, normalize=False) | ||||
metadata[SCHEMA_URI+'codeRepository'] = self.parse_repositories(d) | metadata[SCHEMA_URI+'codeRepository'] = self.parse_repositories(d) | ||||
metadata[SCHEMA_URI+'license'] = self.parse_licenses(d) | metadata[SCHEMA_URI+'license'] = self.parse_licenses(d) | ||||
return self.normalize_translation(metadata) | return self.normalize_translation(metadata) | ||||
_default_repository = {'url': 'https://repo.maven.apache.org/maven2/'} | _default_repository = {'url': 'https://repo.maven.apache.org/maven2/'} | ||||
def parse_repositories(self, d): | def parse_repositories(self, d): | ||||
"""https://maven.apache.org/pom.html#Repositories | """https://maven.apache.org/pom.html#Repositories | ||||
▲ Show 20 Lines • Show All 127 Lines • ▼ Show 20 Lines | class PythonPkginfoMapping(DictMapping, SingleFileMapping): | ||||
def translate(self, content): | def translate(self, content): | ||||
msg = self._parser.parsebytes(content) | msg = self._parser.parsebytes(content) | ||||
d = {} | d = {} | ||||
for (key, value) in msg.items(): | for (key, value) in msg.items(): | ||||
key = _normalize_pkginfo_key(key) | key = _normalize_pkginfo_key(key) | ||||
if value != 'UNKNOWN': | if value != 'UNKNOWN': | ||||
d.setdefault(key, []).append(value) | d.setdefault(key, []).append(value) | ||||
metadata = self.translate_dict(d, normalize=False) | metadata = self._translate_dict(d, normalize=False) | ||||
if SCHEMA_URI+'author' in metadata or SCHEMA_URI+'email' in metadata: | if SCHEMA_URI+'author' in metadata or SCHEMA_URI+'email' in metadata: | ||||
metadata[SCHEMA_URI+'author'] = { | metadata[SCHEMA_URI+'author'] = { | ||||
'@list': [{ | '@list': [{ | ||||
'@type': SCHEMA_URI+'Person', | '@type': SCHEMA_URI+'Person', | ||||
SCHEMA_URI+'name': | SCHEMA_URI+'name': | ||||
metadata.pop(SCHEMA_URI+'author', [None])[0], | metadata.pop(SCHEMA_URI+'author', [None])[0], | ||||
SCHEMA_URI+'email': | SCHEMA_URI+'email': | ||||
metadata.pop(SCHEMA_URI+'email', [None])[0], | metadata.pop(SCHEMA_URI+'email', [None])[0], | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | def translate(self, raw_content): | ||||
content_dict = {} | content_dict = {} | ||||
for line in lines: | for line in lines: | ||||
match = self._re_spec_entry.match(line) | match = self._re_spec_entry.match(line) | ||||
if match: | if match: | ||||
value = self.eval_ruby_expression(match.group('expr')) | value = self.eval_ruby_expression(match.group('expr')) | ||||
if value: | if value: | ||||
content_dict[match.group('key')] = value | content_dict[match.group('key')] = value | ||||
return self.translate_dict(content_dict) | return self._translate_dict(content_dict) | ||||
def eval_ruby_expression(self, expr): | def eval_ruby_expression(self, expr): | ||||
"""Very simple evaluator of Ruby expressions. | """Very simple evaluator of Ruby expressions. | ||||
>>> GemspecMapping().eval_ruby_expression('"Foo bar"') | >>> GemspecMapping().eval_ruby_expression('"Foo bar"') | ||||
'Foo bar' | 'Foo bar' | ||||
>>> GemspecMapping().eval_ruby_expression("'Foo bar'") | >>> GemspecMapping().eval_ruby_expression("'Foo bar'") | ||||
'Foo bar' | 'Foo bar' | ||||
▲ Show 20 Lines • Show All 54 Lines • Show Last 20 Lines |