diff --git a/swh/indexer/metadata_dictionary/cff.py b/swh/indexer/metadata_dictionary/cff.py --- a/swh/indexer/metadata_dictionary/cff.py +++ b/swh/indexer/metadata_dictionary/cff.py @@ -1,4 +1,3 @@ -import re from typing import Dict, List, Optional, Union import yaml @@ -31,77 +30,28 @@ return None if isinstance(content_dict, dict): - content_dict_checked = self.value_type_check(content_dict) - metadata = self._translate_dict(content_dict_checked) + metadata = self._translate_dict(content_dict) metadata["@context"] = CODEMETA_CONTEXT_URL return metadata return None - def value_type_check(self, content_dict): - type_checked_content_dict = {} - - for field in content_dict: - if field == "authors": - authors = [] - - for author in content_dict["authors"]: - author_checked = {} - - if "given-names" in author and isinstance( - author["given-names"], str - ): - author_checked["given-names"] = author["given-names"] - - if "name-particle" in author and isinstance( - author["name-particle"], str - ): - author_checked["name-particle"] = author["name-particle"] - - # will add the rest if what I'm doing is correct... - - # Check if the given author email keeps with the re of emails - if "email" in author and bool( - re.match( - r"([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z|a-z]{2,})+", - author["email"], - ) - ): - author_checked["email"] = author["email"] - - for subField in author: - if not ( - subField == "given-names" - or subField == "name-particle" - or subField == "email" - ): - author_checked[subField] = author[subField] - - authors.append(author_checked) - - type_checked_content_dict["authors"] = authors - - else: - type_checked_content_dict[field] = content_dict[field] - - return type_checked_content_dict - def normalize_authors(self, d: List[dict]) -> Dict[str, list]: result = [] for author in d: author_data: Dict[str, Optional[Union[str, Dict]]] = { "@type": SCHEMA_URI + "Person" } - if "orcid" in author: + if "orcid" in author and isinstance(author["orcid"], str): author_data["@id"] = author["orcid"] - if "affiliation" in author: + if "affiliation" in author and isinstance(author["affiliation"], str): author_data[SCHEMA_URI + "affiliation"] = { "@type": SCHEMA_URI + "Organization", SCHEMA_URI + "name": author["affiliation"], } - if "family-names" in author: + if "family-names" in author and isinstance(author["family-names"], str): author_data[SCHEMA_URI + "familyName"] = author["family-names"] - if "given-names" in author: + if "given-names" in author and isinstance(author["given-names"], str): author_data[SCHEMA_URI + "givenName"] = author["given-names"] result.append(author_data)