diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -15,6 +15,7 @@
import click
import defusedxml.ElementTree
+import elementpath.exceptions
import xmlschema
from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
@@ -423,7 +424,9 @@
def translate(self, content):
try:
tree = defusedxml.ElementTree.fromstring(content)
- except defusedxml.ElementTree.ParseError:
+ res = self.schema.to_dict(tree, validation='lax')
+ except (defusedxml.ElementTree.ParseError,
+ elementpath.exceptions.ElementPathError):
self.log.warning('Error parsing XML from %s', self.log_suffix)
return None
except UnicodeDecodeError:
@@ -435,7 +438,6 @@
self.log_suffix)
return None
- res = self.schema.to_dict(tree, validation='lax')
if res is None:
d = {}
else:
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -679,6 +679,24 @@
self.assertEqual(cm.output, [expected_warning])
self.assertEqual(result, None)
+ raw_content = b"""
+
+ """
+ with self.assertLogs('swh.indexer.metadata_dictionary',
+ level='WARNING') as cm:
+ result = MAPPINGS["MavenMapping"]('foo').translate(raw_content)
+ self.assertEqual(cm.output, [expected_warning])
+ self.assertEqual(result, None)
+
+ raw_content = b"""
+
+ """
+ with self.assertLogs('swh.indexer.metadata_dictionary',
+ level='WARNING') as cm:
+ result = MAPPINGS["MavenMapping"]('foo').translate(raw_content)
+ self.assertEqual(cm.output, [expected_warning])
+ self.assertEqual(result, None)
+
def test_compute_metadata_maven_unknown_encoding(self):
expected_warning = (
'WARNING:swh.indexer.metadata_dictionary.MavenMapping:'