diff --git a/swh/indexer/metadata_dictionary/cff.py b/swh/indexer/metadata_dictionary/cff.py --- a/swh/indexer/metadata_dictionary/cff.py +++ b/swh/indexer/metadata_dictionary/cff.py @@ -22,14 +22,19 @@ mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"] string_fields = ["keywords", "license", "abstract", "version", "doi"] - def translate(self, raw_content: bytes) -> Dict[str, str]: + def translate(self, raw_content: bytes) -> Optional[Dict[str, str]]: raw_content_string: str = raw_content.decode() - content_dict = yaml.load(raw_content_string, Loader=SafeLoader) - metadata = self._translate_dict(content_dict) + try: + content_dict = yaml.load(raw_content_string, Loader=SafeLoader) + except yaml.scanner.ScannerError: + return None - metadata["@context"] = CODEMETA_CONTEXT_URL + if isinstance(content_dict, dict): + metadata = self._translate_dict(content_dict) + metadata["@context"] = CODEMETA_CONTEXT_URL + return metadata - return metadata + return None def normalize_authors(self, d: List[dict]) -> Dict[str, list]: result = [] diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -159,6 +159,59 @@ # then assert expected == result + def test_compute_metadata_cff_invalid_yaml(self): + """ + test yaml translation for invalid yaml file + """ + # given + content = """cff-version: 1.0.3 +message: To cite the SigMF specification, please include the following: +authors: + - name: The GNU Radio Foundation, Inc. + """.encode( + "utf-8" + ) + + expected = None + + result = self.cff_mapping.translate(content) + # then + assert expected == result + + def test_compute_metadata_cff_empty(self): + """ + test yaml translation for empty yaml file + """ + # given + content = """ + """.encode( + "utf-8" + ) + + expected = None + + result = self.cff_mapping.translate(content) + # then + assert expected == result + + def test_compute_metadata_cff_list(self): + """ + test yaml translation for empty yaml file + """ + # given + content = """ +- Foo +- Bar + """.encode( + "utf-8" + ) + + expected = None + + result = self.cff_mapping.translate(content) + # then + assert expected == result + def test_compute_metadata_npm(self): """ testing only computation of metadata with hard_mapping_npm