diff --git a/swh/indexer/metadata_dictionary/cff.py b/swh/indexer/metadata_dictionary/cff.py --- a/swh/indexer/metadata_dictionary/cff.py +++ b/swh/indexer/metadata_dictionary/cff.py @@ -22,14 +22,20 @@ mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"] string_fields = ["keywords", "license", "abstract", "version", "doi"] - def translate(self, raw_content: bytes) -> Dict[str, str]: + def translate(self, raw_content: bytes) -> Optional[Dict[str, str]]: raw_content_string: str = raw_content.decode() - content_dict = yaml.load(raw_content_string, Loader=SafeLoader) - metadata = self._translate_dict(content_dict) - - metadata["@context"] = CODEMETA_CONTEXT_URL - - return metadata + try: + content_dict = yaml.load(raw_content_string, Loader=yaml.SafeLoader) + except yaml.scanner.ScannerError: + self.log.warning("Error yaml is invalid and will be skipped") + return None + + if isinstance(content_dict, dict): + metadata = self._translate_dict(content_dict) + metadata["@context"] = CODEMETA_CONTEXT_URL + return metadata + + return None def normalize_authors(self, d: List[dict]) -> Dict[str, list]: result = []