Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066254
D8144.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
12 KB
Subscribers
None
D8144.diff
View Options
diff --git a/swh/indexer/data/nuget.csv b/swh/indexer/data/nuget.csv
new file mode 100644
--- /dev/null
+++ b/swh/indexer/data/nuget.csv
@@ -0,0 +1,68 @@
+Property,NuGet
+codeRepository,repository.url
+programmingLanguage,
+runtimePlatform,
+targetProduct,
+applicationCategory,
+applicationSubCategory,
+downloadUrl,
+fileSize,
+installUrl,
+memoryRequirements,
+operatingSystem,
+permissions,
+processorRequirements,
+releaseNotes,releaseNotes
+softwareHelp,
+softwareRequirements,
+softwareVersion,
+storageRequirements,
+supportingData,
+author,authors
+citation,
+contributor,
+copyrightHolder,
+copyrightYear,
+dateCreated,
+dateModified,
+datePublished,
+editor,
+encoding,
+fileFormat,
+funder,
+keywords,tags
+license,license/licenseUrl
+producer,
+provider,
+publisher,
+sponsor,
+version,version
+isAccessibleForFree,
+isPartOf,
+hasPart,
+position,
+description,description/summary
+identifier,
+name,name
+sameAs,
+url,projectUrl
+relatedLink,
+givenName,
+familyName,
+email,
+affiliation,
+identifier,id
+name,
+address,
+type,
+id,
+softwareSuggestions,
+maintainer,
+contIntegration,
+buildInstructions,
+developmentStatus,
+embargoDate,
+funding,
+issueTracker,
+referencePublication,
+readme,
diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py
--- a/swh/indexer/metadata_dictionary/__init__.py
+++ b/swh/indexer/metadata_dictionary/__init__.py
@@ -8,7 +8,7 @@
import click
-from . import cff, codemeta, composer, dart, github, maven, npm, python, ruby
+from . import cff, codemeta, composer, dart, github, maven, npm, nuget, python, ruby
from .base import BaseExtrinsicMapping, BaseIntrinsicMapping, BaseMapping
INTRINSIC_MAPPINGS: Dict[str, Type[BaseIntrinsicMapping]] = {
@@ -20,6 +20,7 @@
"PubMapping": dart.PubspecMapping,
"PythonPkginfoMapping": python.PythonPkginfoMapping,
"ComposerMapping": composer.ComposerMapping,
+ "NuGetMapping": nuget.NuGetMapping,
}
EXTRINSIC_MAPPINGS: Dict[str, Type[BaseExtrinsicMapping]] = {
diff --git a/swh/indexer/metadata_dictionary/nuget.py b/swh/indexer/metadata_dictionary/nuget.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/metadata_dictionary/nuget.py
@@ -0,0 +1,109 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os.path
+import re
+from typing import Any, Dict, List, Optional
+
+import xmltodict
+
+from swh.indexer.codemeta import _DATA_DIR, SCHEMA_URI, _read_crosstable
+from swh.indexer.storage.interface import Sha1
+
+from .base import DictMapping, DirectoryLsEntry, SingleFileIntrinsicMapping
+
+NUGET_TABLE_PATH = os.path.join(_DATA_DIR, "nuget.csv")
+
+with open(NUGET_TABLE_PATH) as fd:
+ (CODEMETA_TERMS, NUGET_TABLE) = _read_crosstable(fd)
+
+
+class NuGetMapping(DictMapping, SingleFileIntrinsicMapping):
+ """
+ dedicated class for NuGet (.nuspec) mapping and translation
+ """
+
+ name = "nuget"
+ mapping = NUGET_TABLE["NuGet"]
+ mapping["copyright"] = "http://schema.org/copyrightNotice"
+ mapping["language"] = "http://schema.org/inLanguage"
+ string_fields = [
+ "description",
+ "version",
+ "projectUrl",
+ "name",
+ "tags",
+ "license",
+ "licenseUrl",
+ "summary",
+ "copyright",
+ "language",
+ ]
+
+ @classmethod
+ def detect_metadata_files(cls, file_entries: List[DirectoryLsEntry]) -> List[Sha1]:
+ for entry in file_entries:
+ if entry["name"].endswith(b".nuspec"):
+ return [entry["sha1"]]
+ return []
+
+ def translate(self, content: bytes) -> Optional[Dict[str, Any]]:
+ d = (
+ xmltodict.parse(content.strip(b" \n "))
+ .get("package", {})
+ .get("metadata", {})
+ )
+ if not isinstance(d, dict):
+ self.log.warning("Skipping ill-formed XML content: %s", content)
+ return None
+
+ return self._translate_dict(d)
+
+ def normalize_projectUrl(self, s):
+ if isinstance(s, str):
+ return {"@id": s}
+
+ def translate_repository(self, translated_metadata, v):
+ if isinstance(v, dict) and isinstance(v["@url"], str):
+ codemeta_key = self.mapping["repository.url"]
+ translated_metadata[codemeta_key] = {"@id": v["@url"]}
+
+ def normalize_license(self, v):
+ if isinstance(v, dict) and v["@type"] == "expression":
+ license_string = v["#text"]
+ if not bool(
+ re.search(r" with |\(|\)| and ", license_string, re.IGNORECASE)
+ ):
+ return [
+ {"@id": "https://spdx.org/licenses/" + license_type.strip()}
+ for license_type in re.split(
+ r" or ", license_string, flags=re.IGNORECASE
+ )
+ ]
+ else:
+ return None
+
+ def normalize_licenseUrl(self, s):
+ if isinstance(s, str):
+ return {"@id": s}
+
+ def normalize_authors(self, s):
+ if isinstance(s, str):
+ author_names = [a.strip() for a in s.split(",")]
+ authors = [
+ {"@type": SCHEMA_URI + "Person", SCHEMA_URI + "name": name}
+ for name in author_names
+ ]
+ return {"@list": authors}
+
+ def translate_releaseNotes(self, translated_metadata, s):
+ if isinstance(s, str):
+ translated_metadata.setdefault("http://schema.org/releaseNotes", []).append(
+ s
+ )
+
+ def normalize_tags(self, s):
+ if isinstance(s, str):
+ return s.split(" ")
diff --git a/swh/indexer/tests/metadata_dictionary/test_nuget.py b/swh/indexer/tests/metadata_dictionary/test_nuget.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/tests/metadata_dictionary/test_nuget.py
@@ -0,0 +1,171 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pytest
+
+from swh.indexer.metadata_detector import detect_metadata
+from swh.indexer.metadata_dictionary import MAPPINGS
+
+
+def test_compute_metadata_nuget():
+ raw_content = b"""
+ <?xml version="1.0" encoding="utf-8"?>
+ <package xmlns="http://schemas.microsoft.com/packaging/2010/07/nuspec.xsd">
+ <metadata>
+ <id>sample</id>
+ <version>1.2.3</version>
+ <authors>Kim Abercrombie, Franck Halmaert</authors>
+ <description>Sample exists only to show a sample .nuspec file.</description>
+ <summary>Summary is being deprecated. Use description instead.</summary>
+ <projectUrl>http://example.org/</projectUrl>
+ <repository type="git" url="https://github.com/NuGet/NuGet.Client.git"/>
+ <license type="expression">MIT</license>
+ <licenseUrl>https://raw.github.com/timrwood/moment/master/LICENSE</licenseUrl>
+ <dependencies>
+ <dependency id="another-package" version="3.0.0" />
+ <dependency id="yet-another-package" version="1.0.0" />
+ </dependencies>
+ <releaseNotes>
+ See the [changelog](https://github.com/httpie/httpie/releases/tag/3.2.0).
+ </releaseNotes>
+ <tags>python3 java cpp search-tag</tags>
+ </metadata>
+ <files>
+ <file src="bin\\Debug\\*.dll" target="lib" />
+ </files>
+ </package>"""
+ result = MAPPINGS["NuGetMapping"]().translate(raw_content)
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "author": [
+ {"type": "Person", "name": "Kim Abercrombie"},
+ {"type": "Person", "name": "Franck Halmaert"},
+ ],
+ "codeRepository": "https://github.com/NuGet/NuGet.Client.git",
+ "description": [
+ "Sample exists only to show a sample .nuspec file.",
+ "Summary is being deprecated. Use description instead.",
+ ],
+ "license": [
+ "https://spdx.org/licenses/MIT",
+ "https://raw.github.com/timrwood/moment/master/LICENSE",
+ ],
+ "url": "http://example.org/",
+ "version": "1.2.3",
+ "schema:releaseNotes": (
+ "See the [changelog](https://github.com/httpie/httpie/releases/tag/3.2.0)."
+ ),
+ "keywords": [
+ "python3",
+ "java",
+ "cpp",
+ "search-tag",
+ ],
+ }
+
+ assert result == expected
+
+
+@pytest.mark.parametrize(
+ "filename",
+ [b"package_name.nuspec", b"number_5.nuspec", b"CAPS.nuspec", b"\x8anan.nuspec"],
+)
+def test_detect_metadata_package_nuspec(filename):
+ df = [
+ {
+ "sha1_git": b"abc",
+ "name": b"example.json",
+ "target": b"abc",
+ "length": 897,
+ "status": "visible",
+ "type": "file",
+ "perms": 33188,
+ "dir_id": b"dir_a",
+ "sha1": b"bcd",
+ },
+ {
+ "sha1_git": b"aab",
+ "name": filename,
+ "target": b"aab",
+ "length": 712,
+ "status": "visible",
+ "type": "file",
+ "perms": 33188,
+ "dir_id": b"dir_a",
+ "sha1": b"cde",
+ },
+ ]
+ results = detect_metadata(df)
+
+ expected_results = {"NuGetMapping": [b"cde"]}
+ assert expected_results == results
+
+
+def test_normalize_license_multiple_licenses_or_delimiter():
+ raw_content = raw_content = b"""
+ <?xml version="1.0" encoding="utf-8"?>
+ <package xmlns="http://schemas.microsoft.com/packaging/2010/07/nuspec.xsd">
+ <metadata>
+ <license type="expression">BitTorrent-1.0 or GPL-3.0-with-GCC-exception</license>
+ </metadata>
+ <files>
+ <file src="bin\\Debug\\*.dll" target="lib" />
+ </files>
+ </package>"""
+ result = MAPPINGS["NuGetMapping"]().translate(raw_content)
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "license": [
+ "https://spdx.org/licenses/BitTorrent-1.0",
+ "https://spdx.org/licenses/GPL-3.0-with-GCC-exception",
+ ],
+ }
+
+ assert result == expected
+
+
+def test_normalize_license_unsupported_delimiter():
+ raw_content = raw_content = b"""
+ <?xml version="1.0" encoding="utf-8"?>
+ <package xmlns="http://schemas.microsoft.com/packaging/2010/07/nuspec.xsd">
+ <metadata>
+ <license type="expression">(MIT)</license>
+ </metadata>
+ <files>
+ <file src="bin\\Debug\\*.dll" target="lib" />
+ </files>
+ </package>"""
+ result = MAPPINGS["NuGetMapping"]().translate(raw_content)
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ }
+
+ assert result == expected
+
+
+def test_copyrightNotice_absolute_uri_property():
+ raw_content = raw_content = b"""
+ <?xml version="1.0" encoding="utf-8"?>
+ <package xmlns="http://schemas.microsoft.com/packaging/2010/07/nuspec.xsd">
+ <metadata>
+ <copyright>Copyright 2017-2022</copyright>
+ <language>en-us</language>
+ </metadata>
+ <files>
+ <file src="bin\\Debug\\*.dll" target="lib" />
+ </files>
+ </package>"""
+ result = MAPPINGS["NuGetMapping"]().translate(raw_content)
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "schema:copyrightNotice": "Copyright 2017-2022",
+ "schema:inLanguage": "en-us",
+ }
+
+ assert result == expected
diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py
--- a/swh/indexer/tests/test_cli.py
+++ b/swh/indexer/tests/test_cli.py
@@ -113,6 +113,7 @@
"github",
"maven",
"npm",
+ "nuget",
"pkg-info",
"pubspec",
"",
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 5 2024, 4:15 AM (19 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224154
Attached To
D8144: Add NuGet Mapping
Event Timeline
Log In to Comment