diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -9,6 +9,7 @@
import json
import logging
import email.parser
+import xml.parsers.expat
import xmltodict
@@ -258,7 +259,11 @@
mapping = CROSSWALK_TABLE['Java (Maven)']
def translate(self, content):
- d = xmltodict.parse(content).get('project')
+ try:
+ d = xmltodict.parse(content).get('project') or {}
+ except xml.parsers.expat.ExpatError:
+ self.log.warning('Error parsing XML of %r', content)
+ return None
metadata = self.translate_dict(d, normalize=False)
metadata[SCHEMA_URI+'codeRepository'] = self.parse_repositories(d)
metadata[SCHEMA_URI+'license'] = self.parse_licenses(d)
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -548,6 +548,16 @@
'http://repo1.maven.org/maven2/com/mycompany/app/my-app',
})
+ def test_compute_metadata_maven_empty(self):
+ raw_content = b"""
+
+ """
+ result = MAPPINGS["MavenMapping"].translate(raw_content)
+ self.assertEqual(result, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'type': 'SoftwareSourceCode',
+ })
+
def test_compute_metadata_maven_almost_empty(self):
raw_content = b"""
@@ -559,6 +569,16 @@
'type': 'SoftwareSourceCode',
})
+ def test_compute_metadata_maven_invalid_xml(self):
+ raw_content = b"""
+ """
+ result = MAPPINGS["MavenMapping"].translate(raw_content)
+ self.assertEqual(result, None)
+ raw_content = b"""
+ """
+ result = MAPPINGS["MavenMapping"].translate(raw_content)
+ self.assertEqual(result, None)
+
def test_compute_metadata_maven_minimal(self):
raw_content = b"""