diff --git a/swh/indexer/data/nuget.csv b/swh/indexer/data/nuget.csv new file mode 100644 --- /dev/null +++ b/swh/indexer/data/nuget.csv @@ -0,0 +1,67 @@ +Property,NuGet +codeRepository,repository.url +programmingLanguage, +runtimePlatform, +targetProduct, +applicationCategory, +applicationSubCategory, +downloadUrl, +fileSize, +installUrl, +memoryRequirements, +operatingSystem, +permissions, +processorRequirements, +releaseNotes, +softwareHelp, +softwareRequirements, +softwareVersion, +storageRequirements, +supportingData, +author,authors +citation, +contributor, +copyrightHolder, +copyrightYear, +dateCreated, +dateModified, +datePublished, +editor, +encoding, +fileFormat, +funder, +keywords, +license,license +producer, +provider, +publisher, +sponsor, +version,version +isAccessibleForFree, +isPartOf, +hasPart, +position, +description,description +identifier, +name,name +sameAs, +url,projectUrl +relatedLink, +givenName, +familyName, +email, +affiliation, +identifier,id +name, +address, +type, +id, +softwareSuggestions, +maintainer, +contIntegration, +buildInstructions, +developmentStatus, +embargoDate, +funding, +issueTracker, +referencePublication, diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py --- a/swh/indexer/metadata_dictionary/__init__.py +++ b/swh/indexer/metadata_dictionary/__init__.py @@ -8,7 +8,7 @@ import click -from . import cff, codemeta, composer, dart, github, maven, npm, python, ruby +from . import cff, codemeta, composer, dart, github, maven, npm, nuget, python, ruby from .base import BaseExtrinsicMapping, BaseIntrinsicMapping, BaseMapping INTRINSIC_MAPPINGS: Dict[str, Type[BaseIntrinsicMapping]] = { @@ -20,6 +20,7 @@ "PubMapping": dart.PubspecMapping, "PythonPkginfoMapping": python.PythonPkginfoMapping, "ComposerMapping": composer.ComposerMapping, + "NuGetMapping": nuget.NuGetMapping, } EXTRINSIC_MAPPINGS: Dict[str, Type[BaseExtrinsicMapping]] = { diff --git a/swh/indexer/metadata_dictionary/nuget.py b/swh/indexer/metadata_dictionary/nuget.py new file mode 100644 --- /dev/null +++ b/swh/indexer/metadata_dictionary/nuget.py @@ -0,0 +1,71 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os.path +from typing import Any, Dict, Optional + +import xmltodict + +from swh.indexer.codemeta import _DATA_DIR, SCHEMA_URI, _read_crosstable + +from .base import DictMapping, SingleFileIntrinsicMapping + +NUGET_TABLE_PATH = os.path.join(_DATA_DIR, "nuget.csv") + +with open(NUGET_TABLE_PATH) as fd: + (CODEMETA_TERMS, NUGET_TABLE) = _read_crosstable(fd) + + +class NuGetMapping(DictMapping, SingleFileIntrinsicMapping): + """ + dedicated class for NuGet (.nuspec) mapping and translation + """ + + name = "nuget" + filename = b".nuspec" + mapping = NUGET_TABLE["NuGet"] + string_fields = [ + "description", + "version", + "projectUrl", + "name", + ] + + def translate(self, content: bytes) -> Optional[Dict[str, Any]]: + d = ( + xmltodict.parse(content.strip(b" \n ")).get("package").get("metadata", {}) + or {} + ) + if not isinstance(d, dict): + self.log.warning("Skipping ill-formed XML content: %s", content) + return None + + metadata = self._translate_dict(d, normalize=False) + + return self.normalize_translation(metadata) + + def normalize_projectUrl(self, s): + if isinstance(s, str): + return {"@id": s} + + def translate_repository(self, translated_metadata, v): + if isinstance(v, dict) and isinstance(v.get("@url"), str): + codemeta_key = self.mapping["repository.url"] + translated_metadata[codemeta_key] = {"@id": v.get("@url")} + return translated_metadata + + def normalize_license(self, v): + if isinstance(v, dict) and v.get("@type") == "expression": + license_type = v.get("#text") + return {"@id": "https://spdx.org/licenses/" + license_type} + + def normalize_authors(self, s): + if isinstance(s, str): + author_names = [a.strip() for a in s.split(",")] + authors = [ + {"@type": SCHEMA_URI + "Person", SCHEMA_URI + "name": name} + for name in author_names + ] + return {"@list": authors} diff --git a/swh/indexer/tests/metadata_dictionary/test_nuget.py b/swh/indexer/tests/metadata_dictionary/test_nuget.py new file mode 100644 --- /dev/null +++ b/swh/indexer/tests/metadata_dictionary/test_nuget.py @@ -0,0 +1,46 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.indexer.metadata_dictionary import MAPPINGS + + +def test_compute_metadata_nuget(): + raw_content = b""" + + + + sample + 1.2.3 + Kim Abercrombie, Franck Halmaert + Sample exists only to show a sample .nuspec file. + en-US + http://example.org/ + + MIT + + + + + + + + + """ + result = MAPPINGS["NuGetMapping"]().translate(raw_content) + expected = { + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "author": [ + {"type": "Person", "name": "Kim Abercrombie"}, + {"type": "Person", "name": "Franck Halmaert"}, + ], + "codeRepository": "https://github.com/NuGet/NuGet.Client.git", + "description": "Sample exists only to show a sample .nuspec file.", + "license": "https://spdx.org/licenses/MIT", + "url": "http://example.org/", + "version": "1.2.3", + } + + assert result == expected diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py --- a/swh/indexer/tests/test_cli.py +++ b/swh/indexer/tests/test_cli.py @@ -100,6 +100,7 @@ "github", "maven", "npm", + "nuget", "pkg-info", "pubspec", "",