Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9697818
D8384.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D8384.diff
View Options
diff --git a/swh/indexer/metadata_dictionary/maven.py b/swh/indexer/metadata_dictionary/maven.py
--- a/swh/indexer/metadata_dictionary/maven.py
+++ b/swh/indexer/metadata_dictionary/maven.py
@@ -75,6 +75,9 @@
and isinstance(artifact_id, str)
):
repo = os.path.join(url, *group_id.split("."), artifact_id)
+ if "${" in repo:
+ # Often use as templating in pom.xml files collected from VCSs
+ return
graph.add((root, SCHEMA.codeRepository, URIRef(repo)))
def normalize_groupId(self, id_):
diff --git a/swh/indexer/metadata_dictionary/npm.py b/swh/indexer/metadata_dictionary/npm.py
--- a/swh/indexer/metadata_dictionary/npm.py
+++ b/swh/indexer/metadata_dictionary/npm.py
@@ -270,6 +270,12 @@
rdflib.term.URIRef('https://spdx.org/licenses/MIT')
"""
if isinstance(s, str):
+ if s.startswith("SEE LICENSE IN "):
+ # Very common pattern, because it is an example in the specification.
+ # It is followed by the filename; and the indexer architecture currently
+ # does not allow accessing that from metadata mappings.
+ # (Plus, an hypothetical license mapping would eventually pick it up)
+ return
return SPDX + s
def normalize_keywords(self, lst):
diff --git a/swh/indexer/tests/metadata_dictionary/test_maven.py b/swh/indexer/tests/metadata_dictionary/test_maven.py
--- a/swh/indexer/tests/metadata_dictionary/test_maven.py
+++ b/swh/indexer/tests/metadata_dictionary/test_maven.py
@@ -353,6 +353,47 @@
}
+def test_compute_metadata_maven_invalid_repository():
+ raw_content = b"""
+ <project>
+ <name>Maven Default Project</name>
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>com.mycompany.app</groupId>
+ <artifactId>my-app</artifactId>
+ <version>1.2.3</version>
+ <repositories>
+ <repository>
+ <id>tcc-transaction-internal-releases</id>
+ <name>internal repository for released artifacts</name>
+ <url>${repo.internal.releases.url}</url>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ </repository>
+ </repositories>
+ <licenses>
+ <license>
+ <name>Apache License, Version 2.0</name>
+ <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments>A business-friendly OSS license</comments>
+ </license>
+ </licenses>
+ </project>"""
+ result = MAPPINGS["MavenMapping"]().translate(raw_content)
+ assert result == {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "name": "Maven Default Project",
+ "schema:identifier": "com.mycompany.app",
+ "version": "1.2.3",
+ "license": "https://www.apache.org/licenses/LICENSE-2.0.txt",
+ }
+
+
@settings(suppress_health_check=[HealthCheck.too_slow])
@given(
xml_document_strategy(
diff --git a/swh/indexer/tests/metadata_dictionary/test_npm.py b/swh/indexer/tests/metadata_dictionary/test_npm.py
--- a/swh/indexer/tests/metadata_dictionary/test_npm.py
+++ b/swh/indexer/tests/metadata_dictionary/test_npm.py
@@ -361,6 +361,24 @@
}
+def test_npm_invalid_licenses():
+ package_json = rb"""{
+ "version": "1.0.0",
+ "license": "SEE LICENSE IN LICENSE.md",
+ "author": {
+ "name": "foo",
+ "url": "http://example.org"
+ }
+}"""
+ result = MAPPINGS["NpmMapping"]().translate(package_json)
+ assert result == {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "author": [{"name": "foo", "type": "Person", "url": "http://example.org"}],
+ "version": "1.0.0",
+ }
+
+
@settings(suppress_health_check=[HealthCheck.too_slow])
@given(json_document_strategy(keys=list(MAPPINGS["NpmMapping"].mapping))) # type: ignore
def test_npm_adversarial(doc):
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Aug 18, 12:59 AM (3 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226262
Attached To
D8384: npm, maven: ignore blatantly invalid licenses
Event Timeline
Log In to Comment