Page MenuHomeSoftware Heritage

D8384.diff
No OneTemporary

D8384.diff

diff --git a/swh/indexer/metadata_dictionary/maven.py b/swh/indexer/metadata_dictionary/maven.py
--- a/swh/indexer/metadata_dictionary/maven.py
+++ b/swh/indexer/metadata_dictionary/maven.py
@@ -75,6 +75,9 @@
and isinstance(artifact_id, str)
):
repo = os.path.join(url, *group_id.split("."), artifact_id)
+ if "${" in repo:
+ # Often use as templating in pom.xml files collected from VCSs
+ return
graph.add((root, SCHEMA.codeRepository, URIRef(repo)))
def normalize_groupId(self, id_):
diff --git a/swh/indexer/metadata_dictionary/npm.py b/swh/indexer/metadata_dictionary/npm.py
--- a/swh/indexer/metadata_dictionary/npm.py
+++ b/swh/indexer/metadata_dictionary/npm.py
@@ -270,6 +270,12 @@
rdflib.term.URIRef('https://spdx.org/licenses/MIT')
"""
if isinstance(s, str):
+ if s.startswith("SEE LICENSE IN "):
+ # Very common pattern, because it is an example in the specification.
+ # It is followed by the filename; and the indexer architecture currently
+ # does not allow accessing that from metadata mappings.
+ # (Plus, an hypothetical license mapping would eventually pick it up)
+ return
return SPDX + s
def normalize_keywords(self, lst):
diff --git a/swh/indexer/tests/metadata_dictionary/test_maven.py b/swh/indexer/tests/metadata_dictionary/test_maven.py
--- a/swh/indexer/tests/metadata_dictionary/test_maven.py
+++ b/swh/indexer/tests/metadata_dictionary/test_maven.py
@@ -353,6 +353,47 @@
}
+def test_compute_metadata_maven_invalid_repository():
+ raw_content = b"""
+ <project>
+ <name>Maven Default Project</name>
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>com.mycompany.app</groupId>
+ <artifactId>my-app</artifactId>
+ <version>1.2.3</version>
+ <repositories>
+ <repository>
+ <id>tcc-transaction-internal-releases</id>
+ <name>internal repository for released artifacts</name>
+ <url>${repo.internal.releases.url}</url>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ </repository>
+ </repositories>
+ <licenses>
+ <license>
+ <name>Apache License, Version 2.0</name>
+ <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments>A business-friendly OSS license</comments>
+ </license>
+ </licenses>
+ </project>"""
+ result = MAPPINGS["MavenMapping"]().translate(raw_content)
+ assert result == {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "name": "Maven Default Project",
+ "schema:identifier": "com.mycompany.app",
+ "version": "1.2.3",
+ "license": "https://www.apache.org/licenses/LICENSE-2.0.txt",
+ }
+
+
@settings(suppress_health_check=[HealthCheck.too_slow])
@given(
xml_document_strategy(
diff --git a/swh/indexer/tests/metadata_dictionary/test_npm.py b/swh/indexer/tests/metadata_dictionary/test_npm.py
--- a/swh/indexer/tests/metadata_dictionary/test_npm.py
+++ b/swh/indexer/tests/metadata_dictionary/test_npm.py
@@ -361,6 +361,24 @@
}
+def test_npm_invalid_licenses():
+ package_json = rb"""{
+ "version": "1.0.0",
+ "license": "SEE LICENSE IN LICENSE.md",
+ "author": {
+ "name": "foo",
+ "url": "http://example.org"
+ }
+}"""
+ result = MAPPINGS["NpmMapping"]().translate(package_json)
+ assert result == {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "author": [{"name": "foo", "type": "Person", "url": "http://example.org"}],
+ "version": "1.0.0",
+ }
+
+
@settings(suppress_health_check=[HealthCheck.too_slow])
@given(json_document_strategy(keys=list(MAPPINGS["NpmMapping"].mapping))) # type: ignore
def test_npm_adversarial(doc):

File Metadata

Mime Type
text/plain
Expires
Mon, Aug 18, 12:59 AM (3 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226262

Event Timeline