diff --git a/swh/indexer/metadata_dictionary/maven.py b/swh/indexer/metadata_dictionary/maven.py --- a/swh/indexer/metadata_dictionary/maven.py +++ b/swh/indexer/metadata_dictionary/maven.py @@ -75,6 +75,9 @@ and isinstance(artifact_id, str) ): repo = os.path.join(url, *group_id.split("."), artifact_id) + if "${" in repo: + # Often use as templating in pom.xml files collected from VCSs + return graph.add((root, SCHEMA.codeRepository, URIRef(repo))) def normalize_groupId(self, id_): diff --git a/swh/indexer/metadata_dictionary/npm.py b/swh/indexer/metadata_dictionary/npm.py --- a/swh/indexer/metadata_dictionary/npm.py +++ b/swh/indexer/metadata_dictionary/npm.py @@ -270,6 +270,12 @@ rdflib.term.URIRef('https://spdx.org/licenses/MIT') """ if isinstance(s, str): + if s.startswith("SEE LICENSE IN "): + # Very common pattern, because it is an example in the specification. + # It is followed by the filename; and the indexer architecture currently + # does not allow accessing that from metadata mappings. + # (Plus, an hypothetical license mapping would eventually pick it up) + return return SPDX + s def normalize_keywords(self, lst): diff --git a/swh/indexer/tests/metadata_dictionary/test_maven.py b/swh/indexer/tests/metadata_dictionary/test_maven.py --- a/swh/indexer/tests/metadata_dictionary/test_maven.py +++ b/swh/indexer/tests/metadata_dictionary/test_maven.py @@ -353,6 +353,47 @@ } +def test_compute_metadata_maven_invalid_repository(): + raw_content = b""" + + Maven Default Project + 4.0.0 + com.mycompany.app + my-app + 1.2.3 + + + tcc-transaction-internal-releases + internal repository for released artifacts + ${repo.internal.releases.url} + + false + + + true + + + + + + Apache License, Version 2.0 + https://www.apache.org/licenses/LICENSE-2.0.txt + repo + A business-friendly OSS license + + + """ + result = MAPPINGS["MavenMapping"]().translate(raw_content) + assert result == { + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "name": "Maven Default Project", + "schema:identifier": "com.mycompany.app", + "version": "1.2.3", + "license": "https://www.apache.org/licenses/LICENSE-2.0.txt", + } + + @settings(suppress_health_check=[HealthCheck.too_slow]) @given( xml_document_strategy( diff --git a/swh/indexer/tests/metadata_dictionary/test_npm.py b/swh/indexer/tests/metadata_dictionary/test_npm.py --- a/swh/indexer/tests/metadata_dictionary/test_npm.py +++ b/swh/indexer/tests/metadata_dictionary/test_npm.py @@ -361,6 +361,24 @@ } +def test_npm_invalid_licenses(): + package_json = rb"""{ + "version": "1.0.0", + "license": "SEE LICENSE IN LICENSE.md", + "author": { + "name": "foo", + "url": "http://example.org" + } +}""" + result = MAPPINGS["NpmMapping"]().translate(package_json) + assert result == { + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "author": [{"name": "foo", "type": "Person", "url": "http://example.org"}], + "version": "1.0.0", + } + + @settings(suppress_health_check=[HealthCheck.too_slow]) @given(json_document_strategy(keys=list(MAPPINGS["NpmMapping"].mapping))) # type: ignore def test_npm_adversarial(doc):