diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py --- a/swh/indexer/metadata_dictionary.py +++ b/swh/indexer/metadata_dictionary.py @@ -15,6 +15,7 @@ import click import defusedxml.ElementTree +import elementpath.exceptions import xmlschema from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI @@ -423,7 +424,9 @@ def translate(self, content): try: tree = defusedxml.ElementTree.fromstring(content) - except defusedxml.ElementTree.ParseError: + res = self.schema.to_dict(tree, validation='lax') + except (defusedxml.ElementTree.ParseError, + elementpath.exceptions.ElementPathError): self.log.warning('Error parsing XML from %s', self.log_suffix) return None except UnicodeDecodeError: @@ -435,7 +438,6 @@ self.log_suffix) return None - res = self.schema.to_dict(tree, validation='lax') if res is None: d = {} else: diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -679,6 +679,24 @@ self.assertEqual(cm.output, [expected_warning]) self.assertEqual(result, None) + raw_content = b""" + + """ + with self.assertLogs('swh.indexer.metadata_dictionary', + level='WARNING') as cm: + result = MAPPINGS["MavenMapping"]('foo').translate(raw_content) + self.assertEqual(cm.output, [expected_warning]) + self.assertEqual(result, None) + + raw_content = b""" + + """ + with self.assertLogs('swh.indexer.metadata_dictionary', + level='WARNING') as cm: + result = MAPPINGS["MavenMapping"]('foo').translate(raw_content) + self.assertEqual(cm.output, [expected_warning]) + self.assertEqual(result, None) + def test_compute_metadata_maven_unknown_encoding(self): expected_warning = ( 'WARNING:swh.indexer.metadata_dictionary.MavenMapping:'