diff --git a/swh/indexer/data/pub.csv b/swh/indexer/data/pub.csv new file mode 100644 --- /dev/null +++ b/swh/indexer/data/pub.csv @@ -0,0 +1,68 @@ +Property,Pub +codeRepository,repository +programmingLanguage, +runtimePlatform, +targetProduct, +applicationCategory, +applicationSubCategory, +downloadUrl, +fileSize, +installUrl, +memoryRequirements, +operatingSystem, +permissions, +processorRequirements, +releaseNotes, +softwareHelp, +softwareRequirements, +softwareVersion,version +storageRequirements, +supportingData, +author,author +citation, +contributor, +copyrightHolder, +copyrightYear, +dateCreated, +dateModified, +datePublished, +editor, +encoding, +fileFormat, +funder, +keywords, +license, +producer, +provider, +publisher, +sponsor, +version,version +isAccessibleForFree, +isPartOf, +hasPart, +position, +description,description +identifier, +name,name +sameAs, +url,homepage +relatedLink, +givenName, +familyName, +email, +affiliation, +identifier, +name, +address, +type, +id, +softwareSuggestions, +maintainer, +contIntegration, +buildInstructions, +developmentStatus, +embargoDate, +funding, +issueTracker,issue_tracker +referencePublication, +readme, diff --git a/swh/indexer/metadata_dictionary/pub.py b/swh/indexer/metadata_dictionary/pub.py new file mode 100644 --- /dev/null +++ b/swh/indexer/metadata_dictionary/pub.py @@ -0,0 +1,85 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os.path + +import yaml + +from swh.indexer.codemeta import _DATA_DIR, CODEMETA_CONTEXT_URL, _read_crosstable + +from .base import DictMapping, SingleFileMapping + +PUB_TABLE_PATH = os.path.join(_DATA_DIR, "pub.csv") + +with open(PUB_TABLE_PATH) as fd: + (CODEMETA_TERMS, PUB_TABLE) = _read_crosstable(fd) + + +class SafeLoader(yaml.SafeLoader): + yaml_implicit_resolvers = { + k: [r for r in v if r[0] != "tag:yaml.org,2002:timestamp"] + for k, v in yaml.SafeLoader.yaml_implicit_resolvers.items() + } + + +class PubMapping(DictMapping, SingleFileMapping): + + name = "pub" + filename = b"pubspec.yaml" + mapping = PUB_TABLE["Pub"] + string_fields = ["repository", "keywords", "license", "description"] + + def translate(self, raw_content): + raw_content_string: str = raw_content.decode() + try: + content_dict = yaml.load(raw_content_string, Loader=SafeLoader) + except yaml.scanner.ScannerError: + return None + + if isinstance(content_dict, dict): + metadata = self._translate_dict(content_dict) + metadata["@context"] = CODEMETA_CONTEXT_URL + return metadata + + return None + + # def normalize_authors(self, d): + # result = [] + # for author in d: + # author_data = { + # "@type": SCHEMA_URI + "Person" + # } + # if "orcid" in author and isinstance(author["orcid"], str): + # author_data["@id"] = author["orcid"] + # if "affiliation" in author and isinstance(author["affiliation"], str): + # author_data[SCHEMA_URI + "affiliation"] = { + # "@type": SCHEMA_URI + "Organization", + # SCHEMA_URI + "name": author["affiliation"], + # } + # if "family-names" in author and isinstance(author["family-names"], str): + # author_data[SCHEMA_URI + "familyName"] = author["family-names"] + # if "given-names" in author and isinstance(author["given-names"], str): + # author_data[SCHEMA_URI + "givenName"] = author["given-names"] + + # result.append(author_data) + + # result_final = {"@list": result} + # return result_final + + # def normalize_doi(self, s): + # if isinstance(s, str): + # return {"@id": "https://doi.org/" + s} + + # def normalize_license(self, s): + # if isinstance(s, str): + # return {"@id": "https://spdx.org/licenses/" + s} + + # def normalize_repository_code(self, s): + # if isinstance(s, str): + # return {"@id": s} + + # def normalize_date_released(self, s): + # if isinstance(s, str): + # return {"@value": s, "@type": SCHEMA_URI + "Date"}