diff --git a/swh/indexer/data/pub.csv b/swh/indexer/data/pub.csv new file mode 100644 --- /dev/null +++ b/swh/indexer/data/pub.csv @@ -0,0 +1,68 @@ +Property,Pub +codeRepository,repository +programmingLanguage, +runtimePlatform, +targetProduct, +applicationCategory, +applicationSubCategory, +downloadUrl, +fileSize, +installUrl, +memoryRequirements, +operatingSystem, +permissions, +processorRequirements, +releaseNotes, +softwareHelp, +softwareRequirements, +softwareVersion,version +storageRequirements, +supportingData, +author,author +citation, +contributor, +copyrightHolder, +copyrightYear, +dateCreated, +dateModified, +datePublished, +editor, +encoding, +fileFormat, +funder, +keywords,keywords +license,license +producer, +provider, +publisher, +sponsor, +version,version +isAccessibleForFree, +isPartOf, +hasPart, +position, +description,description +identifier, +name,name +sameAs, +url,homepage +relatedLink, +givenName, +familyName, +email, +affiliation, +identifier, +name, +address, +type, +id, +softwareSuggestions, +maintainer, +contIntegration, +buildInstructions, +developmentStatus, +embargoDate, +funding, +issueTracker,issue_tracker +referencePublication, +readme, diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py --- a/swh/indexer/metadata_dictionary/__init__.py +++ b/swh/indexer/metadata_dictionary/__init__.py @@ -7,7 +7,7 @@ import click -from . import cff, codemeta, composer, github, maven, npm, python, ruby +from . import cff, codemeta, composer, github, maven, npm, pub, python, ruby MAPPINGS = { "CffMapping": cff.CffMapping, @@ -16,6 +16,7 @@ "GitHubMapping": github.GitHubMapping, "MavenMapping": maven.MavenMapping, "NpmMapping": npm.NpmMapping, + "PubMapping": pub.PubMapping, "PythonPkginfoMapping": python.PythonPkginfoMapping, "ComposerMapping": composer.ComposerMapping, } diff --git a/swh/indexer/metadata_dictionary/base.py b/swh/indexer/metadata_dictionary/base.py --- a/swh/indexer/metadata_dictionary/base.py +++ b/swh/indexer/metadata_dictionary/base.py @@ -8,8 +8,9 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar from typing_extensions import TypedDict +import yaml -from swh.indexer.codemeta import SCHEMA_URI, compact, merge_values +from swh.indexer.codemeta import CODEMETA_CONTEXT_URL, SCHEMA_URI, compact, merge_values from swh.indexer.storage.interface import Sha1 @@ -227,3 +228,28 @@ if isinstance(content_dict, dict): return self._translate_dict(content_dict) return None + + +class SafeLoader(yaml.SafeLoader): + yaml_implicit_resolvers = { + k: [r for r in v if r[0] != "tag:yaml.org,2002:timestamp"] + for k, v in yaml.SafeLoader.yaml_implicit_resolvers.items() + } + + +class YamlMapping(DictMapping, SingleFileMapping): + """Base class for all mappings that use Yaml data as input.""" + + def translate(self, raw_content): + raw_content_string = raw_content.decode() + try: + content_dict = yaml.load(raw_content_string, Loader=SafeLoader) + except yaml.scanner.ScannerError: + return None + + if isinstance(content_dict, dict): + metadata = self._translate_dict(content_dict) + metadata["@context"] = CODEMETA_CONTEXT_URL + return metadata + + return None diff --git a/swh/indexer/metadata_dictionary/cff.py b/swh/indexer/metadata_dictionary/cff.py --- a/swh/indexer/metadata_dictionary/cff.py +++ b/swh/indexer/metadata_dictionary/cff.py @@ -2,9 +2,9 @@ import yaml -from swh.indexer.codemeta import CODEMETA_CONTEXT_URL, CROSSWALK_TABLE, SCHEMA_URI +from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI -from .base import DictMapping, SingleFileMapping +from .base import YamlMapping class SafeLoader(yaml.SafeLoader): @@ -14,7 +14,7 @@ } -class CffMapping(DictMapping, SingleFileMapping): +class CffMapping(YamlMapping): """Dedicated class for Citation (CITATION.cff) mapping and translation""" name = "cff" @@ -22,20 +22,6 @@ mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"] string_fields = ["keywords", "license", "abstract", "version", "doi"] - def translate(self, raw_content: bytes) -> Optional[Dict[str, str]]: - raw_content_string: str = raw_content.decode() - try: - content_dict = yaml.load(raw_content_string, Loader=SafeLoader) - except yaml.scanner.ScannerError: - return None - - if isinstance(content_dict, dict): - metadata = self._translate_dict(content_dict) - metadata["@context"] = CODEMETA_CONTEXT_URL - return metadata - - return None - def normalize_authors(self, d: List[dict]) -> Dict[str, list]: result = [] for author in d: diff --git a/swh/indexer/metadata_dictionary/pub.py b/swh/indexer/metadata_dictionary/pub.py new file mode 100644 --- /dev/null +++ b/swh/indexer/metadata_dictionary/pub.py @@ -0,0 +1,38 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os.path + +from swh.indexer.codemeta import _DATA_DIR, _read_crosstable + +from .base import YamlMapping + +PUB_TABLE_PATH = os.path.join(_DATA_DIR, "pub.csv") + +with open(PUB_TABLE_PATH) as fd: + (CODEMETA_TERMS, PUB_TABLE) = _read_crosstable(fd) + + +class PubMapping(YamlMapping): + + name = "pub" + filename = b"pubspec.yaml" + mapping = PUB_TABLE["Pub"] + string_fields = [ + "repository", + "keywords", + "license", + "description", + "name", + "homepage", + ] + + def normalize_license(self, s): + if isinstance(s, str): + return {"@id": "https://spdx.org/licenses/" + s} + + def normalize_homepage(self, s): + if isinstance(s, str): + return {"@id": s} diff --git a/swh/indexer/tests/metadata_dictionary/test_pub.py b/swh/indexer/tests/metadata_dictionary/test_pub.py new file mode 100644 --- /dev/null +++ b/swh/indexer/tests/metadata_dictionary/test_pub.py @@ -0,0 +1,63 @@ +# Copyright (C) 2017-2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.indexer.metadata_dictionary import MAPPINGS + + +def test_compute_metadata_pub(): + raw_content = """ +--- +name: newtify +description: >- + Have you been turned into a newt? Would you like to be? + This package can help. It has all of the + newt-transmogrification functionality you have been looking + for. +keywords: + - polyfill + - shim + - compatibility + - portable + - mbstring +version: 1.2.3 +license: MIT +homepage: https://example-pet-store.com/newtify +documentation: https://example-pet-store.com/newtify/docs + +environment: + sdk: '>=2.10.0 <3.0.0' + +dependencies: + efts: ^2.0.4 + transmogrify: ^0.4.0 + +dev_dependencies: + test: '>=1.15.0 <2.0.0' + """.encode( + "utf-8" + ) + + result = MAPPINGS["PubMapping"]().translate(raw_content) + + expected = { + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "name": "newtify", + "keywords": [ + "polyfill", + "shim", + "compatibility", + "portable", + "mbstring", + ], + "description": """Have you been turned into a newt? Would you like to be? \ +This package can help. It has all of the \ +newt-transmogrification functionality you have been looking \ +for.""", + "url": "https://example-pet-store.com/newtify", + "license": "https://spdx.org/licenses/MIT", + } + + assert result == expected diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py --- a/swh/indexer/tests/test_cli.py +++ b/swh/indexer/tests/test_cli.py @@ -101,6 +101,7 @@ "maven", "npm", "pkg-info", + "pub", "", ] # must be sorted for test to pass )