Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary/base.py
Show First 20 Lines • Show All 142 Lines • ▼ Show 20 Lines | |||||
class DictMapping(BaseMapping): | class DictMapping(BaseMapping): | ||||
"""Base class for mappings that take as input a file that is mostly | """Base class for mappings that take as input a file that is mostly | ||||
a key-value store (eg. a shallow JSON dict).""" | a key-value store (eg. a shallow JSON dict).""" | ||||
string_fields: List[str] = [] | string_fields: List[str] = [] | ||||
"""List of fields that are simple strings, and don't need any | """List of fields that are simple strings, and don't need any | ||||
normalization.""" | normalization.""" | ||||
date_fields: List[str] = [] | |||||
"""List of fields that are strings that should be typed as http://schema.org/Date | |||||
""" | |||||
uri_fields: List[str] = [] | uri_fields: List[str] = [] | ||||
"""List of fields that are simple URIs, and don't need any | """List of fields that are simple URIs, and don't need any | ||||
normalization.""" | normalization.""" | ||||
@property | @property | ||||
def mapping(self): | def mapping(self): | ||||
"""A translation dict to map dict keys into a canonical name.""" | """A translation dict to map dict keys into a canonical name.""" | ||||
raise NotImplementedError(f"{self.__class__.__name__}.mapping") | raise NotImplementedError(f"{self.__class__.__name__}.mapping") | ||||
@staticmethod | @staticmethod | ||||
def _normalize_method_name(name: str) -> str: | def _normalize_method_name(name: str) -> str: | ||||
return name.replace("-", "_") | return name.replace("-", "_") | ||||
@classmethod | @classmethod | ||||
def supported_terms(cls): | def supported_terms(cls): | ||||
# one-to-one mapping from the original key to a CodeMeta term | # one-to-one mapping from the original key to a CodeMeta term | ||||
simple_terms = { | simple_terms = { | ||||
str(term) | str(term) | ||||
for (key, term) in cls.mapping.items() | for (key, term) in cls.mapping.items() | ||||
if key in cls.string_fields + cls.uri_fields | if key in cls.string_fields + cls.date_fields + cls.uri_fields | ||||
or hasattr(cls, "normalize_" + cls._normalize_method_name(key)) | or hasattr(cls, "normalize_" + cls._normalize_method_name(key)) | ||||
} | } | ||||
# more complex mapping from the original key to JSON-LD | # more complex mapping from the original key to JSON-LD | ||||
complex_terms = { | complex_terms = { | ||||
str(term) | str(term) | ||||
for meth_name in dir(cls) | for meth_name in dir(cls) | ||||
if meth_name.startswith("translate_") | if meth_name.startswith("translate_") | ||||
▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines | def _translate_dict(self, content_dict: Dict) -> Dict[str, Any]: | ||||
graph.add((root, codemeta_key, item)) | graph.add((root, codemeta_key, item)) | ||||
else: | else: | ||||
graph.add((root, codemeta_key, v)) | graph.add((root, codemeta_key, v)) | ||||
elif k in self.string_fields and isinstance(v, str): | elif k in self.string_fields and isinstance(v, str): | ||||
graph.add((root, codemeta_key, rdflib.Literal(v))) | graph.add((root, codemeta_key, rdflib.Literal(v))) | ||||
elif k in self.string_fields and isinstance(v, list): | elif k in self.string_fields and isinstance(v, list): | ||||
for item in v: | for item in v: | ||||
graph.add((root, codemeta_key, rdflib.Literal(item))) | graph.add((root, codemeta_key, rdflib.Literal(item))) | ||||
elif k in self.date_fields and isinstance(v, str): | |||||
typed_v = rdflib.Literal(v, datatype=SCHEMA.Date) | |||||
graph.add((root, codemeta_key, typed_v)) | |||||
elif k in self.date_fields and isinstance(v, list): | |||||
for item in v: | |||||
if isinstance(item, str): | |||||
typed_item = rdflib.Literal(item, datatype=SCHEMA.Date) | |||||
graph.add((root, codemeta_key, typed_item)) | |||||
elif k in self.uri_fields and isinstance(v, str): | elif k in self.uri_fields and isinstance(v, str): | ||||
# Workaround for https://github.com/digitalbazaar/pyld/issues/91 : drop | # Workaround for https://github.com/digitalbazaar/pyld/issues/91 : drop | ||||
# URLs that are blatantly invalid early, so PyLD does not crash. | # URLs that are blatantly invalid early, so PyLD does not crash. | ||||
parsed_url = urllib.parse.urlparse(v) | parsed_url = urllib.parse.urlparse(v) | ||||
if parsed_url.netloc: | if parsed_url.netloc: | ||||
graph.add((root, codemeta_key, rdflib.URIRef(v))) | graph.add((root, codemeta_key, rdflib.URIRef(v))) | ||||
elif k in self.uri_fields and isinstance(v, list): | elif k in self.uri_fields and isinstance(v, list): | ||||
for item in v: | for item in v: | ||||
▲ Show 20 Lines • Show All 121 Lines • Show Last 20 Lines |