diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py --- a/swh/indexer/metadata_dictionary.py +++ b/swh/indexer/metadata_dictionary.py @@ -472,12 +472,17 @@ {'@id': 'https://opensource.org/licenses/MIT'}] """ - licenses = d.get('licenses', {}).get('license', []) + licenses = d.get('licenses') + if not isinstance(licenses, dict): + return + licenses = licenses.get('license') if isinstance(licenses, dict): licenses = [licenses] + elif not isinstance(licenses, list): + return return [{"@id": license['url']} for license in licenses - if 'url' in license] or None + if isinstance(license, dict) and 'url' in license] or None _normalize_pkginfo_key = str.lower diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -707,6 +707,50 @@ 'https://repo.maven.apache.org/maven2/com/mycompany/app/my-app', }) + raw_content = b""" + + Maven Default Project + 4.0.0 + com.mycompany.app + my-app + 1.2.3 + + + """ + result = self.maven_mapping.translate(raw_content) + self.assertEqual(result, { + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'type': 'SoftwareSourceCode', + 'name': 'Maven Default Project', + 'identifier': 'com.mycompany.app', + 'version': '1.2.3', + 'codeRepository': + 'https://repo.maven.apache.org/maven2/com/mycompany/app/my-app', + }) + + def test_compute_metadata_maven_invalid_licenses(self): + raw_content = b""" + + Maven Default Project + 4.0.0 + com.mycompany.app + my-app + 1.2.3 + + foo + + """ + result = self.maven_mapping.translate(raw_content) + self.assertEqual(result, { + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'type': 'SoftwareSourceCode', + 'name': 'Maven Default Project', + 'identifier': 'com.mycompany.app', + 'version': '1.2.3', + 'codeRepository': + 'https://repo.maven.apache.org/maven2/com/mycompany/app/my-app', + }) + def test_compute_metadata_maven_multiple(self): '''Tests when there are multiple code repos and licenses.''' raw_content = b"""