diff --git a/swh/indexer/data/nuget.csv b/swh/indexer/data/nuget.csv
new file mode 100644
--- /dev/null
+++ b/swh/indexer/data/nuget.csv
@@ -0,0 +1,68 @@
+Property,NuGet
+codeRepository,repository.url
+programmingLanguage,
+runtimePlatform,
+targetProduct,
+applicationCategory,
+applicationSubCategory,
+downloadUrl,
+fileSize,
+installUrl,
+memoryRequirements,
+operatingSystem,
+permissions,
+processorRequirements,
+releaseNotes,releaseNotes
+softwareHelp,
+softwareRequirements,
+softwareVersion,
+storageRequirements,
+supportingData,
+author,authors
+citation,
+contributor,
+copyrightHolder,
+copyrightYear,
+dateCreated,
+dateModified,
+datePublished,
+editor,
+encoding,
+fileFormat,
+funder,
+keywords,tags
+license,license/licenseUrl
+producer,
+provider,
+publisher,
+sponsor,
+version,version
+isAccessibleForFree,
+isPartOf,
+hasPart,
+position,
+description,description/summary
+identifier,
+name,name
+sameAs,
+url,projectUrl
+relatedLink,
+givenName,
+familyName,
+email,
+affiliation,
+identifier,id
+name,
+address,
+type,
+id,
+softwareSuggestions,
+maintainer,
+contIntegration,
+buildInstructions,
+developmentStatus,
+embargoDate,
+funding,
+issueTracker,
+referencePublication,
+readme,
diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py
--- a/swh/indexer/metadata_dictionary/__init__.py
+++ b/swh/indexer/metadata_dictionary/__init__.py
@@ -8,7 +8,7 @@
import click
-from . import cff, codemeta, composer, dart, github, maven, npm, python, ruby
+from . import cff, codemeta, composer, dart, github, maven, npm, nuget, python, ruby
from .base import BaseExtrinsicMapping, BaseIntrinsicMapping, BaseMapping
INTRINSIC_MAPPINGS: Dict[str, Type[BaseIntrinsicMapping]] = {
@@ -20,6 +20,7 @@
"PubMapping": dart.PubspecMapping,
"PythonPkginfoMapping": python.PythonPkginfoMapping,
"ComposerMapping": composer.ComposerMapping,
+ "NuGetMapping": nuget.NuGetMapping,
}
EXTRINSIC_MAPPINGS: Dict[str, Type[BaseExtrinsicMapping]] = {
diff --git a/swh/indexer/metadata_dictionary/nuget.py b/swh/indexer/metadata_dictionary/nuget.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/metadata_dictionary/nuget.py
@@ -0,0 +1,109 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os.path
+import re
+from typing import Any, Dict, List, Optional
+
+import xmltodict
+
+from swh.indexer.codemeta import _DATA_DIR, SCHEMA_URI, _read_crosstable
+from swh.indexer.storage.interface import Sha1
+
+from .base import DictMapping, DirectoryLsEntry, SingleFileIntrinsicMapping
+
+NUGET_TABLE_PATH = os.path.join(_DATA_DIR, "nuget.csv")
+
+with open(NUGET_TABLE_PATH) as fd:
+ (CODEMETA_TERMS, NUGET_TABLE) = _read_crosstable(fd)
+
+
+class NuGetMapping(DictMapping, SingleFileIntrinsicMapping):
+ """
+ dedicated class for NuGet (.nuspec) mapping and translation
+ """
+
+ name = "nuget"
+ mapping = NUGET_TABLE["NuGet"]
+ mapping["copyright"] = "http://schema.org/copyrightNotice"
+ mapping["language"] = "http://schema.org/inLanguage"
+ string_fields = [
+ "description",
+ "version",
+ "projectUrl",
+ "name",
+ "tags",
+ "license",
+ "licenseUrl",
+ "summary",
+ "copyright",
+ "language",
+ ]
+
+ @classmethod
+ def detect_metadata_files(cls, file_entries: List[DirectoryLsEntry]) -> List[Sha1]:
+ for entry in file_entries:
+ if entry["name"].endswith(b".nuspec"):
+ return [entry["sha1"]]
+ return []
+
+ def translate(self, content: bytes) -> Optional[Dict[str, Any]]:
+ d = (
+ xmltodict.parse(content.strip(b" \n "))
+ .get("package", {})
+ .get("metadata", {})
+ )
+ if not isinstance(d, dict):
+ self.log.warning("Skipping ill-formed XML content: %s", content)
+ return None
+
+ return self._translate_dict(d)
+
+ def normalize_projectUrl(self, s):
+ if isinstance(s, str):
+ return {"@id": s}
+
+ def translate_repository(self, translated_metadata, v):
+ if isinstance(v, dict) and isinstance(v["@url"], str):
+ codemeta_key = self.mapping["repository.url"]
+ translated_metadata[codemeta_key] = {"@id": v["@url"]}
+
+ def normalize_license(self, v):
+ if isinstance(v, dict) and v["@type"] == "expression":
+ license_string = v["#text"]
+ if not bool(
+ re.search(r" with |\(|\)| and ", license_string, re.IGNORECASE)
+ ):
+ return [
+ {"@id": "https://spdx.org/licenses/" + license_type.strip()}
+ for license_type in re.split(
+ r" or ", license_string, flags=re.IGNORECASE
+ )
+ ]
+ else:
+ return None
+
+ def normalize_licenseUrl(self, s):
+ if isinstance(s, str):
+ return {"@id": s}
+
+ def normalize_authors(self, s):
+ if isinstance(s, str):
+ author_names = [a.strip() for a in s.split(",")]
+ authors = [
+ {"@type": SCHEMA_URI + "Person", SCHEMA_URI + "name": name}
+ for name in author_names
+ ]
+ return {"@list": authors}
+
+ def translate_releaseNotes(self, translated_metadata, s):
+ if isinstance(s, str):
+ translated_metadata.setdefault("http://schema.org/releaseNotes", []).append(
+ s
+ )
+
+ def normalize_tags(self, s):
+ if isinstance(s, str):
+ return s.split(" ")
diff --git a/swh/indexer/tests/metadata_dictionary/test_nuget.py b/swh/indexer/tests/metadata_dictionary/test_nuget.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/tests/metadata_dictionary/test_nuget.py
@@ -0,0 +1,171 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pytest
+
+from swh.indexer.metadata_detector import detect_metadata
+from swh.indexer.metadata_dictionary import MAPPINGS
+
+
+def test_compute_metadata_nuget():
+ raw_content = b"""
+
+
+
+ sample
+ 1.2.3
+ Kim Abercrombie, Franck Halmaert
+ Sample exists only to show a sample .nuspec file.
+ Summary is being deprecated. Use description instead.
+ http://example.org/
+
+ MIT
+ https://raw.github.com/timrwood/moment/master/LICENSE
+
+
+
+
+
+ See the [changelog](https://github.com/httpie/httpie/releases/tag/3.2.0).
+
+ python3 java cpp search-tag
+
+
+
+
+ """
+ result = MAPPINGS["NuGetMapping"]().translate(raw_content)
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "author": [
+ {"type": "Person", "name": "Kim Abercrombie"},
+ {"type": "Person", "name": "Franck Halmaert"},
+ ],
+ "codeRepository": "https://github.com/NuGet/NuGet.Client.git",
+ "description": [
+ "Sample exists only to show a sample .nuspec file.",
+ "Summary is being deprecated. Use description instead.",
+ ],
+ "license": [
+ "https://spdx.org/licenses/MIT",
+ "https://raw.github.com/timrwood/moment/master/LICENSE",
+ ],
+ "url": "http://example.org/",
+ "version": "1.2.3",
+ "schema:releaseNotes": (
+ "See the [changelog](https://github.com/httpie/httpie/releases/tag/3.2.0)."
+ ),
+ "keywords": [
+ "python3",
+ "java",
+ "cpp",
+ "search-tag",
+ ],
+ }
+
+ assert result == expected
+
+
+@pytest.mark.parametrize(
+ "filename",
+ [b"package_name.nuspec", b"number_5.nuspec", b"CAPS.nuspec", b"\x8anan.nuspec"],
+)
+def test_detect_metadata_package_nuspec(filename):
+ df = [
+ {
+ "sha1_git": b"abc",
+ "name": b"example.json",
+ "target": b"abc",
+ "length": 897,
+ "status": "visible",
+ "type": "file",
+ "perms": 33188,
+ "dir_id": b"dir_a",
+ "sha1": b"bcd",
+ },
+ {
+ "sha1_git": b"aab",
+ "name": filename,
+ "target": b"aab",
+ "length": 712,
+ "status": "visible",
+ "type": "file",
+ "perms": 33188,
+ "dir_id": b"dir_a",
+ "sha1": b"cde",
+ },
+ ]
+ results = detect_metadata(df)
+
+ expected_results = {"NuGetMapping": [b"cde"]}
+ assert expected_results == results
+
+
+def test_normalize_license_multiple_licenses_or_delimiter():
+ raw_content = raw_content = b"""
+
+
+
+ BitTorrent-1.0 or GPL-3.0-with-GCC-exception
+
+
+
+
+ """
+ result = MAPPINGS["NuGetMapping"]().translate(raw_content)
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "license": [
+ "https://spdx.org/licenses/BitTorrent-1.0",
+ "https://spdx.org/licenses/GPL-3.0-with-GCC-exception",
+ ],
+ }
+
+ assert result == expected
+
+
+def test_normalize_license_unsupported_delimiter():
+ raw_content = raw_content = b"""
+
+
+
+ (MIT)
+
+
+
+
+ """
+ result = MAPPINGS["NuGetMapping"]().translate(raw_content)
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ }
+
+ assert result == expected
+
+
+def test_copyrightNotice_absolute_uri_property():
+ raw_content = raw_content = b"""
+
+
+
+ Copyright 2017-2022
+ en-us
+
+
+
+
+ """
+ result = MAPPINGS["NuGetMapping"]().translate(raw_content)
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "schema:copyrightNotice": "Copyright 2017-2022",
+ "schema:inLanguage": "en-us",
+ }
+
+ assert result == expected
diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py
--- a/swh/indexer/tests/test_cli.py
+++ b/swh/indexer/tests/test_cli.py
@@ -113,6 +113,7 @@
"github",
"maven",
"npm",
+ "nuget",
"pkg-info",
"pubspec",
"",