diff --git a/swh/indexer/data/pubspec.csv b/swh/indexer/data/pubspec.csv new file mode 100644 --- /dev/null +++ b/swh/indexer/data/pubspec.csv @@ -0,0 +1,68 @@ +Property,Pubspec +codeRepository,repository +programmingLanguage, +runtimePlatform,platforms +targetProduct, +applicationCategory, +applicationSubCategory, +downloadUrl, +fileSize, +installUrl, +memoryRequirements, +operatingSystem, +permissions, +processorRequirements, +releaseNotes, +softwareHelp, +softwareRequirements, +softwareVersion,version +storageRequirements, +supportingData, +author,author/authors +citation, +contributor, +copyrightHolder, +copyrightYear, +dateCreated, +dateModified, +datePublished, +editor, +encoding, +fileFormat, +funder, +keywords,keywords +license,license +producer, +provider, +publisher, +sponsor, +version,version +isAccessibleForFree, +isPartOf, +hasPart, +position, +description,description +identifier, +name,name +sameAs, +url,homepage +relatedLink, +givenName, +familyName, +email,author.email/authors.email +affiliation, +identifier, +name, +address, +type, +id, +softwareSuggestions, +maintainer, +contIntegration, +buildInstructions, +developmentStatus, +embargoDate, +funding, +issueTracker,issue_tracker +referencePublication, +readme, diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py --- a/swh/indexer/metadata_dictionary/__init__.py +++ b/swh/indexer/metadata_dictionary/__init__.py @@ -7,7 +7,7 @@ import click -from . import cff, codemeta, composer, github, maven, npm, python, ruby +from . import cff, codemeta, composer, dart, github, maven, npm, python, ruby MAPPINGS = { "CffMapping": cff.CffMapping, @@ -16,6 +16,7 @@ "GitHubMapping": github.GitHubMapping, "MavenMapping": maven.MavenMapping, "NpmMapping": npm.NpmMapping, + "PubMapping": dart.PubspecMapping, "PythonPkginfoMapping": python.PythonPkginfoMapping, "ComposerMapping": composer.ComposerMapping, } diff --git a/swh/indexer/metadata_dictionary/base.py b/swh/indexer/metadata_dictionary/base.py --- a/swh/indexer/metadata_dictionary/base.py +++ b/swh/indexer/metadata_dictionary/base.py @@ -8,6 +8,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar from typing_extensions import TypedDict +import yaml from swh.indexer.codemeta import SCHEMA_URI, compact, merge_values from swh.indexer.storage.interface import Sha1 @@ -227,3 +228,26 @@ if isinstance(content_dict, dict): return self._translate_dict(content_dict) return None + + +class SafeLoader(yaml.SafeLoader): + yaml_implicit_resolvers = { + k: [r for r in v if r[0] != "tag:yaml.org,2002:timestamp"] + for k, v in yaml.SafeLoader.yaml_implicit_resolvers.items() + } + + +class YamlMapping(DictMapping, SingleFileMapping): + """Base class for all mappings that use Yaml data as input.""" + + def translate(self, raw_content: bytes) -> Optional[Dict[str, str]]: + raw_content_string: str = raw_content.decode() + try: + content_dict = yaml.load(raw_content_string, Loader=SafeLoader) + except yaml.scanner.ScannerError: + return None + + if isinstance(content_dict, dict): + return self._translate_dict(content_dict) + + return None diff --git a/swh/indexer/metadata_dictionary/cff.py b/swh/indexer/metadata_dictionary/cff.py --- a/swh/indexer/metadata_dictionary/cff.py +++ b/swh/indexer/metadata_dictionary/cff.py @@ -1,20 +1,11 @@ from typing import Dict, List, Optional, Union -import yaml - from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI -from .base import DictMapping, SingleFileMapping - - -class SafeLoader(yaml.SafeLoader): - yaml_implicit_resolvers = { - k: [r for r in v if r[0] != "tag:yaml.org,2002:timestamp"] - for k, v in yaml.SafeLoader.yaml_implicit_resolvers.items() - } +from .base import YamlMapping -class CffMapping(DictMapping, SingleFileMapping): +class CffMapping(YamlMapping): """Dedicated class for Citation (CITATION.cff) mapping and translation""" name = "cff" @@ -22,18 +13,6 @@ mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"] string_fields = ["keywords", "license", "abstract", "version", "doi"] - def translate(self, raw_content: bytes) -> Optional[Dict[str, str]]: - raw_content_string: str = raw_content.decode() - try: - content_dict = yaml.load(raw_content_string, Loader=SafeLoader) - except yaml.scanner.ScannerError: - return None - - if isinstance(content_dict, dict): - return self._translate_dict(content_dict) - - return None - def normalize_authors(self, d: List[dict]) -> Dict[str, list]: result = [] for author in d: diff --git a/swh/indexer/metadata_dictionary/dart.py b/swh/indexer/metadata_dictionary/dart.py new file mode 100644 --- /dev/null +++ b/swh/indexer/metadata_dictionary/dart.py @@ -0,0 +1,70 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os.path + +from swh.indexer.codemeta import _DATA_DIR, SCHEMA_URI, _read_crosstable + +from .base import YamlMapping + +PUB_TABLE_PATH = os.path.join(_DATA_DIR, "pubspec.csv") + +with open(PUB_TABLE_PATH) as fd: + (CODEMETA_TERMS, PUB_TABLE) = _read_crosstable(fd) + + +def name_to_person(name): + return { + "@type": SCHEMA_URI + "Person", + SCHEMA_URI + "name": name, + } + + +class PubspecMapping(YamlMapping): + + name = "pubspec" + filename = b"pubspec.yaml" + mapping = PUB_TABLE["Pubspec"] + string_fields = [ + "repository", + "keywords", + "description", + "name", + "homepage", + "issue_tracker", + "platforms", + "license" + # license will only be used with the SPDX Identifier + ] + + def normalize_license(self, s): + if isinstance(s, str): + return {"@id": "https://spdx.org/licenses/" + s} + + def normalize_homepage(self, s): + if isinstance(s, str): + return {"@id": s} + + def normalize_author(self, s): + author = {"@type": SCHEMA_URI + "Person"} + if isinstance(s, str): + name = " ".join(s.split(" ")[:-1]) + email = s.split(" ")[-1].strip("<>") # to exclude '<' and '>' + + author[SCHEMA_URI + "name"] = name + + if "@" in email: + author[SCHEMA_URI + "email"] = email + + return {"@list": [author]} + + def normalize_authors(self, authors_list): + authors = {"@list": []} + + if isinstance(authors_list, list): + for s in authors_list: + author = self.normalize_author(s)["@list"] + authors["@list"] += author + return authors diff --git a/swh/indexer/tests/metadata_dictionary/test_pubspec.py b/swh/indexer/tests/metadata_dictionary/test_pubspec.py new file mode 100644 --- /dev/null +++ b/swh/indexer/tests/metadata_dictionary/test_pubspec.py @@ -0,0 +1,110 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.indexer.metadata_dictionary import MAPPINGS + + +def test_compute_metadata_pubspec(): + raw_content = """ +--- +name: newtify +description: >- + Have you been turned into a newt? Would you like to be? + This package can help. It has all of the + newt-transmogrification functionality you have been looking + for. +keywords: + - polyfill + - shim + - compatibility + - portable + - mbstring +version: 1.2.3 +license: MIT +homepage: https://example-pet-store.com/newtify +documentation: https://example-pet-store.com/newtify/docs + +environment: + sdk: '>=2.10.0 <3.0.0' + +dependencies: + efts: ^2.0.4 + transmogrify: ^0.4.0 + +dev_dependencies: + test: '>=1.15.0 <2.0.0' + """.encode( + "utf-8" + ) + + result = MAPPINGS["PubMapping"]().translate(raw_content) + + expected = { + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "name": "newtify", + "keywords": [ + "polyfill", + "shim", + "compatibility", + "portable", + "mbstring", + ], + "description": """Have you been turned into a newt? Would you like to be? \ +This package can help. It has all of the \ +newt-transmogrification functionality you have been looking \ +for.""", + "url": "https://example-pet-store.com/newtify", + "license": "https://spdx.org/licenses/MIT", + } + + assert result == expected + + +def test_normalize_author_pubspec(): + raw_content = """ + author: Atlee Pine + """.encode( + "utf-8" + ) + + result = MAPPINGS["PubMapping"]().translate(raw_content) + + expected = { + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "author": [ + {"type": "Person", "name": "Atlee Pine", "email": "atlee@pine.com"}, + ], + } + + assert result == expected + + +def test_normalize_authors_pubspec(): + raw_content = """ + authors: + - Vicky Merzown + - Ron Bilius Weasley + """.encode( + "utf-8" + ) + + result = MAPPINGS["PubMapping"]().translate(raw_content) + + expected = { + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "author": [ + {"type": "Person", "name": "Vicky Merzown", "email": "vmz@yayy.com"}, + { + "type": "Person", + "name": "Ron Bilius Weasley", + "email": "ron.weasley@hp.com", + }, + ], + } + + assert result == expected diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py --- a/swh/indexer/tests/test_cli.py +++ b/swh/indexer/tests/test_cli.py @@ -101,6 +101,7 @@ "maven", "npm", "pkg-info", + "pubspec", "", ] # must be sorted for test to pass )