diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py --- a/swh/indexer/metadata_dictionary.py +++ b/swh/indexer/metadata_dictionary.py @@ -9,6 +9,7 @@ import json import logging import email.parser +import xml.parsers.expat import xmltodict @@ -258,7 +259,11 @@ mapping = CROSSWALK_TABLE['Java (Maven)'] def translate(self, content): - d = xmltodict.parse(content).get('project') + try: + d = xmltodict.parse(content).get('project') or {} + except xml.parsers.expat.ExpatError: + self.log.warning('Error parsing XML of %r', content) + return None metadata = self.translate_dict(d, normalize=False) metadata[SCHEMA_URI+'codeRepository'] = self.parse_repositories(d) metadata[SCHEMA_URI+'license'] = self.parse_licenses(d) diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -548,6 +548,16 @@ 'http://repo1.maven.org/maven2/com/mycompany/app/my-app', }) + def test_compute_metadata_maven_empty(self): + raw_content = b""" + + """ + result = MAPPINGS["MavenMapping"].translate(raw_content) + self.assertEqual(result, { + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'type': 'SoftwareSourceCode', + }) + def test_compute_metadata_maven_almost_empty(self): raw_content = b""" @@ -559,6 +569,16 @@ 'type': 'SoftwareSourceCode', }) + def test_compute_metadata_maven_invalid_xml(self): + raw_content = b""" + """ + result = MAPPINGS["MavenMapping"].translate(raw_content) + self.assertEqual(result, None) + raw_content = b""" + """ + result = MAPPINGS["MavenMapping"].translate(raw_content) + self.assertEqual(result, None) + def test_compute_metadata_maven_minimal(self): raw_content = b"""