Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary/base.py
Show First 20 Lines • Show All 56 Lines • ▼ Show 20 Lines | def __init__(self, log_suffix=""): | ||||
) | ) | ||||
@property | @property | ||||
def name(self): | def name(self): | ||||
"""A name of this mapping, used as an identifier in the | """A name of this mapping, used as an identifier in the | ||||
indexer storage.""" | indexer storage.""" | ||||
raise NotImplementedError(f"{self.__class__.__name__}.name") | raise NotImplementedError(f"{self.__class__.__name__}.name") | ||||
def translate(self, file_content: bytes) -> Optional[Dict]: | def translate(self, raw_content: bytes) -> Optional[Dict]: | ||||
"""Translates metadata, from the content of a file or of a RawExtrinsicMetadata | """ | ||||
object.""" | Translates content by parsing content from a bytestring containing | ||||
mapping-specific data and translating with the appropriate mapping | |||||
to JSON-LD using the Codemeta and ForgeFed vocabularies. | |||||
Args: | |||||
raw_content: raw content to translate | |||||
Returns: | |||||
translated metadata in JSON friendly form needed for the content | |||||
if parseable, :const:`None` otherwise. | |||||
""" | |||||
raise NotImplementedError(f"{self.__class__.__name__}.translate") | raise NotImplementedError(f"{self.__class__.__name__}.translate") | ||||
def normalize_translation(self, metadata: Dict[str, Any]) -> Dict[str, Any]: | def normalize_translation(self, metadata: Dict[str, Any]) -> Dict[str, Any]: | ||||
raise NotImplementedError(f"{self.__class__.__name__}.normalize_translation") | raise NotImplementedError(f"{self.__class__.__name__}.normalize_translation") | ||||
class BaseExtrinsicMapping(BaseMapping): | class BaseExtrinsicMapping(BaseMapping): | ||||
"""Base class for extrinsic_metadata mappings to inherit from | """Base class for extrinsic_metadata mappings to inherit from | ||||
▲ Show 20 Lines • Show All 195 Lines • ▼ Show 20 Lines | ): | ||||
""" | """ | ||||
pass | pass | ||||
class JsonMapping(DictMapping): | class JsonMapping(DictMapping): | ||||
"""Base class for all mappings that use JSON data as input.""" | """Base class for all mappings that use JSON data as input.""" | ||||
def translate(self, raw_content: bytes) -> Optional[Dict]: | def translate(self, raw_content: bytes) -> Optional[Dict]: | ||||
""" | |||||
Translates content by parsing content from a bytestring containing | |||||
json data and translating with the appropriate mapping | |||||
Args: | |||||
raw_content (bytes): raw content to translate | |||||
Returns: | |||||
dict: translated metadata in json-friendly form needed for | |||||
the indexer | |||||
""" | |||||
try: | try: | ||||
raw_content_string: str = raw_content.decode() | raw_content_string: str = raw_content.decode() | ||||
except UnicodeDecodeError: | except UnicodeDecodeError: | ||||
self.log.warning("Error unidecoding from %s", self.log_suffix) | self.log.warning("Error unidecoding from %s", self.log_suffix) | ||||
return None | return None | ||||
try: | try: | ||||
content_dict = json.loads(raw_content_string) | content_dict = json.loads(raw_content_string) | ||||
except json.JSONDecodeError: | except json.JSONDecodeError: | ||||
self.log.warning("Error unjsoning from %s", self.log_suffix) | self.log.warning("Error unjsoning from %s", self.log_suffix) | ||||
return None | return None | ||||
if isinstance(content_dict, dict): | if isinstance(content_dict, dict): | ||||
return self._translate_dict(content_dict) | return self._translate_dict(content_dict) | ||||
return None | return None | ||||
class XmlMapping(DictMapping): | class XmlMapping(DictMapping): | ||||
"""Base class for all mappings that use XML data as input.""" | """Base class for all mappings that use XML data as input.""" | ||||
def translate(self, raw_content: bytes) -> Optional[Dict]: | def translate(self, raw_content: bytes) -> Optional[Dict]: | ||||
""" | |||||
Translates content by parsing content from a bytestring containing | |||||
XML data and translating with the appropriate mapping | |||||
Args: | |||||
raw_content (bytes): raw content to translate | |||||
Returns: | |||||
dict: translated metadata in json-friendly form needed for | |||||
the indexer | |||||
""" | |||||
try: | try: | ||||
d = xmltodict.parse(raw_content) | d = xmltodict.parse(raw_content) | ||||
except xml.parsers.expat.ExpatError: | except xml.parsers.expat.ExpatError: | ||||
self.log.warning("Error parsing XML from %s", self.log_suffix) | self.log.warning("Error parsing XML from %s", self.log_suffix) | ||||
return None | return None | ||||
except UnicodeDecodeError: | except UnicodeDecodeError: | ||||
self.log.warning("Error unidecoding XML from %s", self.log_suffix) | self.log.warning("Error unidecoding XML from %s", self.log_suffix) | ||||
return None | return None | ||||
Show All 31 Lines |