Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary.py
Show All 33 Lines | def __init__(self): | ||||
self.log = logging.getLogger('%s.%s' % ( | self.log = logging.getLogger('%s.%s' % ( | ||||
self.__class__.__module__, | self.__class__.__module__, | ||||
self.__class__.__name__)) | self.__class__.__name__)) | ||||
@abc.abstractmethod | @abc.abstractmethod | ||||
def detect_metadata_files(self, files): | def detect_metadata_files(self, files): | ||||
""" | """ | ||||
Detects files potentially containing metadata | Detects files potentially containing metadata | ||||
Args: | Args: | ||||
- file_entries (list): list of files | file_entries (list): list of files | ||||
Returns: | Returns: | ||||
- empty list if nothing was found | list: list of sha1 (possibly empty) | ||||
- list of sha1 otherwise | |||||
""" | """ | ||||
pass | pass | ||||
@abc.abstractmethod | @abc.abstractmethod | ||||
def translate(self, file_content): | def translate(self, file_content): | ||||
pass | pass | ||||
def normalize_translation(self, metadata): | def normalize_translation(self, metadata): | ||||
Show All 27 Lines | def mapping(self): | ||||
pass | pass | ||||
def translate_dict(self, content_dict, *, normalize=True): | def translate_dict(self, content_dict, *, normalize=True): | ||||
""" | """ | ||||
Translates content by parsing content from a dict object | Translates content by parsing content from a dict object | ||||
and translating with the appropriate mapping | and translating with the appropriate mapping | ||||
Args: | Args: | ||||
content_dict (dict) | content_dict (dict): content dict to translate | ||||
Returns: | Returns: | ||||
dict: translated metadata in json-friendly form needed for | dict: translated metadata in json-friendly form needed for | ||||
the indexer | the indexer | ||||
""" | """ | ||||
translated_metadata = {'@type': SCHEMA_URI + 'SoftwareSourceCode'} | translated_metadata = {'@type': SCHEMA_URI + 'SoftwareSourceCode'} | ||||
for k, v in content_dict.items(): | for k, v in content_dict.items(): | ||||
# First, check if there is a specific translation | # First, check if there is a specific translation | ||||
# method for this key | # method for this key | ||||
translation_method = getattr(self, 'translate_' + k, None) | translation_method = getattr(self, 'translate_' + k, None) | ||||
if translation_method: | if translation_method: | ||||
Show All 19 Lines | class JsonMapping(DictMapping, SingleFileMapping): | ||||
"""Base class for all mappings that use a JSON file as input.""" | """Base class for all mappings that use a JSON file as input.""" | ||||
def translate(self, raw_content): | def translate(self, raw_content): | ||||
""" | """ | ||||
Translates content by parsing content from a bytestring containing | Translates content by parsing content from a bytestring containing | ||||
json data and translating with the appropriate mapping | json data and translating with the appropriate mapping | ||||
Args: | Args: | ||||
raw_content: bytes | raw_content (bytes): raw content to translate | ||||
Returns: | Returns: | ||||
dict: translated metadata in json-friendly form needed for | dict: translated metadata in json-friendly form needed for | ||||
the indexer | the indexer | ||||
""" | """ | ||||
try: | try: | ||||
raw_content = raw_content.decode() | raw_content = raw_content.decode() | ||||
except UnicodeDecodeError: | except UnicodeDecodeError: | ||||
self.log.warning('Error unidecoding %r', raw_content) | self.log.warning('Error unidecoding %r', raw_content) | ||||
return | return | ||||
try: | try: | ||||
▲ Show 20 Lines • Show All 141 Lines • Show Last 20 Lines |