Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9348099
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
View Options
diff --git a/swh/indexer/metadata_dictionary/nuget.py b/swh/indexer/metadata_dictionary/nuget.py
index b2578ed..05b95d4 100644
--- a/swh/indexer/metadata_dictionary/nuget.py
+++ b/swh/indexer/metadata_dictionary/nuget.py
@@ -1,109 +1,105 @@
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os.path
import re
from typing import Any, Dict, List, Optional
import xmltodict
from swh.indexer.codemeta import _DATA_DIR, _read_crosstable
from swh.indexer.namespaces import SCHEMA
from swh.indexer.storage.interface import Sha1
from .base import BaseIntrinsicMapping, DictMapping, DirectoryLsEntry
NUGET_TABLE_PATH = os.path.join(_DATA_DIR, "nuget.csv")
with open(NUGET_TABLE_PATH) as fd:
(CODEMETA_TERMS, NUGET_TABLE) = _read_crosstable(fd)
class NuGetMapping(DictMapping, BaseIntrinsicMapping):
"""
dedicated class for NuGet (.nuspec) mapping and translation
"""
name = "nuget"
mapping = NUGET_TABLE["NuGet"]
mapping["copyright"] = "http://schema.org/copyrightNotice"
mapping["language"] = "http://schema.org/inLanguage"
string_fields = [
"description",
"version",
"projectUrl",
"name",
"tags",
"license",
"licenseUrl",
"summary",
"copyright",
"language",
]
@classmethod
def detect_metadata_files(cls, file_entries: List[DirectoryLsEntry]) -> List[Sha1]:
for entry in file_entries:
if entry["name"].endswith(b".nuspec"):
return [entry["sha1"]]
return []
def translate(self, content: bytes) -> Optional[Dict[str, Any]]:
- d = (
- xmltodict.parse(content.strip(b" \n "))
- .get("package", {})
- .get("metadata", {})
- )
+ d = xmltodict.parse(content).get("package", {}).get("metadata", {})
if not isinstance(d, dict):
self.log.warning("Skipping ill-formed XML content: %s", content)
return None
return self._translate_dict(d)
def normalize_projectUrl(self, s):
if isinstance(s, str):
return {"@id": s}
def translate_repository(self, translated_metadata, v):
if isinstance(v, dict) and isinstance(v["@url"], str):
codemeta_key = self.mapping["repository.url"]
translated_metadata[codemeta_key] = {"@id": v["@url"]}
def normalize_license(self, v):
if isinstance(v, dict) and v["@type"] == "expression":
license_string = v["#text"]
if not bool(
re.search(r" with |\(|\)| and ", license_string, re.IGNORECASE)
):
return [
{"@id": "https://spdx.org/licenses/" + license_type.strip()}
for license_type in re.split(
r" or ", license_string, flags=re.IGNORECASE
)
]
else:
return None
def normalize_licenseUrl(self, s):
if isinstance(s, str):
return {"@id": s}
def normalize_authors(self, s):
if isinstance(s, str):
author_names = [a.strip() for a in s.split(",")]
authors = [
{"@type": SCHEMA.Person, SCHEMA.name: name} for name in author_names
]
return {"@list": authors}
def translate_releaseNotes(self, translated_metadata, s):
if isinstance(s, str):
translated_metadata.setdefault("http://schema.org/releaseNotes", []).append(
s
)
def normalize_tags(self, s):
if isinstance(s, str):
return s.split(" ")
diff --git a/swh/indexer/tests/metadata_dictionary/test_nuget.py b/swh/indexer/tests/metadata_dictionary/test_nuget.py
index 58e01bc..f34c550 100644
--- a/swh/indexer/tests/metadata_dictionary/test_nuget.py
+++ b/swh/indexer/tests/metadata_dictionary/test_nuget.py
@@ -1,171 +1,167 @@
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import pytest
from swh.indexer.metadata_detector import detect_metadata
from swh.indexer.metadata_dictionary import MAPPINGS
def test_compute_metadata_nuget():
- raw_content = b"""
- <?xml version="1.0" encoding="utf-8"?>
+ raw_content = b"""<?xml version="1.0" encoding="utf-8"?>
<package xmlns="http://schemas.microsoft.com/packaging/2010/07/nuspec.xsd">
<metadata>
<id>sample</id>
<version>1.2.3</version>
<authors>Kim Abercrombie, Franck Halmaert</authors>
<description>Sample exists only to show a sample .nuspec file.</description>
<summary>Summary is being deprecated. Use description instead.</summary>
<projectUrl>http://example.org/</projectUrl>
<repository type="git" url="https://github.com/NuGet/NuGet.Client.git"/>
<license type="expression">MIT</license>
<licenseUrl>https://raw.github.com/timrwood/moment/master/LICENSE</licenseUrl>
<dependencies>
<dependency id="another-package" version="3.0.0" />
<dependency id="yet-another-package" version="1.0.0" />
</dependencies>
<releaseNotes>
See the [changelog](https://github.com/httpie/httpie/releases/tag/3.2.0).
</releaseNotes>
<tags>python3 java cpp search-tag</tags>
</metadata>
<files>
<file src="bin\\Debug\\*.dll" target="lib" />
</files>
</package>"""
result = MAPPINGS["NuGetMapping"]().translate(raw_content)
expected = {
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
"type": "SoftwareSourceCode",
"author": [
{"type": "Person", "name": "Kim Abercrombie"},
{"type": "Person", "name": "Franck Halmaert"},
],
"codeRepository": "https://github.com/NuGet/NuGet.Client.git",
"description": [
"Sample exists only to show a sample .nuspec file.",
"Summary is being deprecated. Use description instead.",
],
"license": [
"https://spdx.org/licenses/MIT",
"https://raw.github.com/timrwood/moment/master/LICENSE",
],
"url": "http://example.org/",
"version": "1.2.3",
"schema:releaseNotes": (
"See the [changelog](https://github.com/httpie/httpie/releases/tag/3.2.0)."
),
"keywords": [
"python3",
"java",
"cpp",
"search-tag",
],
}
assert result == expected
@pytest.mark.parametrize(
"filename",
[b"package_name.nuspec", b"number_5.nuspec", b"CAPS.nuspec", b"\x8anan.nuspec"],
)
def test_detect_metadata_package_nuspec(filename):
df = [
{
"sha1_git": b"abc",
"name": b"example.json",
"target": b"abc",
"length": 897,
"status": "visible",
"type": "file",
"perms": 33188,
"dir_id": b"dir_a",
"sha1": b"bcd",
},
{
"sha1_git": b"aab",
"name": filename,
"target": b"aab",
"length": 712,
"status": "visible",
"type": "file",
"perms": 33188,
"dir_id": b"dir_a",
"sha1": b"cde",
},
]
results = detect_metadata(df)
expected_results = {"NuGetMapping": [b"cde"]}
assert expected_results == results
def test_normalize_license_multiple_licenses_or_delimiter():
- raw_content = raw_content = b"""
- <?xml version="1.0" encoding="utf-8"?>
+ raw_content = raw_content = b"""<?xml version="1.0" encoding="utf-8"?>
<package xmlns="http://schemas.microsoft.com/packaging/2010/07/nuspec.xsd">
<metadata>
<license type="expression">BitTorrent-1.0 or GPL-3.0-with-GCC-exception</license>
</metadata>
<files>
<file src="bin\\Debug\\*.dll" target="lib" />
</files>
</package>"""
result = MAPPINGS["NuGetMapping"]().translate(raw_content)
expected = {
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
"type": "SoftwareSourceCode",
"license": [
"https://spdx.org/licenses/BitTorrent-1.0",
"https://spdx.org/licenses/GPL-3.0-with-GCC-exception",
],
}
assert result == expected
def test_normalize_license_unsupported_delimiter():
- raw_content = raw_content = b"""
- <?xml version="1.0" encoding="utf-8"?>
+ raw_content = raw_content = b"""<?xml version="1.0" encoding="utf-8"?>
<package xmlns="http://schemas.microsoft.com/packaging/2010/07/nuspec.xsd">
<metadata>
<license type="expression">(MIT)</license>
</metadata>
<files>
<file src="bin\\Debug\\*.dll" target="lib" />
</files>
</package>"""
result = MAPPINGS["NuGetMapping"]().translate(raw_content)
expected = {
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
"type": "SoftwareSourceCode",
}
assert result == expected
def test_copyrightNotice_absolute_uri_property():
- raw_content = raw_content = b"""
- <?xml version="1.0" encoding="utf-8"?>
+ raw_content = raw_content = b"""<?xml version="1.0" encoding="utf-8"?>
<package xmlns="http://schemas.microsoft.com/packaging/2010/07/nuspec.xsd">
<metadata>
<copyright>Copyright 2017-2022</copyright>
<language>en-us</language>
</metadata>
<files>
<file src="bin\\Debug\\*.dll" target="lib" />
</files>
</package>"""
result = MAPPINGS["NuGetMapping"]().translate(raw_content)
expected = {
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
"type": "SoftwareSourceCode",
"schema:copyrightNotice": "Copyright 2017-2022",
"schema:inLanguage": "en-us",
}
assert result == expected
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jul 4 2025, 6:12 PM (4 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3268613
Attached To
rDCIDX Metadata indexer
Event Timeline
Log In to Comment