diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -258,7 +258,7 @@
mapping = CROSSWALK_TABLE['Java (Maven)']
def translate(self, content):
- d = xmltodict.parse(content)['project']
+ d = xmltodict.parse(content).get('project')
metadata = self.translate_dict(d, normalize=False)
metadata[SCHEMA_URI+'codeRepository'] = self.parse_repositories(d)
metadata[SCHEMA_URI+'license'] = self.parse_licenses(d)
@@ -269,17 +269,14 @@
def parse_repositories(self, d):
"""https://maven.apache.org/pom.html#Repositories"""
if 'repositories' not in d:
- return [self.parse_repository(d, self._default_repository)]
+ results = [self.parse_repository(d, self._default_repository)]
else:
- repositories = d['repositories'].get('repository', [])
+ repositories = d.get('repositories', {}).get('repository', [])
if not isinstance(repositories, list):
repositories = [repositories]
- results = []
- for repo in repositories:
- res = self.parse_repository(d, repo)
- if res:
- results.append(res)
- return results
+ results = [self.parse_repository(d, repo)
+ for repo in repositories]
+ return [res for res in results if res] or None
def parse_repository(self, d, repo):
if repo.get('layout', 'default') != 'default':
@@ -287,12 +284,10 @@
url = repo.get('url')
group_id = d.get('groupId')
artifact_id = d.get('artifactId')
- if isinstance(url, str):
- if isinstance(group_id, str):
- url = os.path.join(url, *group_id.split('.'))
- if isinstance(artifact_id, str):
- url = os.path.join(url, artifact_id)
- return {"@id": url}
+ if (isinstance(url, str) and isinstance(group_id, str)
+ and isinstance(artifact_id, str)):
+ repo = os.path.join(url, *group_id.split('.'), artifact_id)
+ return {"@id": repo}
def normalize_groupId(self, id_):
return {"@id": id_}
@@ -353,7 +348,7 @@
licenses = [licenses]
return [{"@id": license['url']}
for license in licenses
- if 'url' in license]
+ if 'url' in license] or None
_normalize_pkginfo_key = str.lower
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -548,6 +548,17 @@
'http://repo1.maven.org/maven2/com/mycompany/app/my-app',
})
+ def test_compute_metadata_maven_almost_empty(self):
+ raw_content = b"""
+
+
+ """
+ result = MAPPINGS["MavenMapping"].translate(raw_content)
+ self.assertEqual(result, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'type': 'SoftwareSourceCode',
+ })
+
def test_compute_metadata_maven_minimal(self):
raw_content = b"""
@@ -566,7 +577,6 @@
'version': '1.2.3',
'codeRepository':
'https://repo.maven.apache.org/maven2/com/mycompany/app/my-app',
- 'license': [],
})
def test_compute_metadata_maven_multiple(self):