diff --git a/swh/indexer/data/podspec.csv b/swh/indexer/data/podspec.csv new file mode 100644 --- /dev/null +++ b/swh/indexer/data/podspec.csv @@ -0,0 +1,68 @@ +Property,Podspec +codeRepository,spec.source +programmingLanguage, +runtimePlatform, +targetProduct, +applicationCategory, +applicationSubCategory, +downloadUrl, +fileSize, +installUrl, +memoryRequirements, +operatingSystem, +permissions, +processorRequirements, +releaseNotes, +softwareHelp, +softwareRequirements, +softwareVersion, +storageRequirements, +supportingData, +author,spec.authors +citation, +contributor, +copyrightHolder, +copyrightYear, +dateCreated, +dateModified, +datePublished, +editor, +encoding, +fileFormat, +funder, +keywords, +license,spec.license +producer, +provider, +publisher, +sponsor, +version,spec.version +isAccessibleForFree, +isPartOf, +hasPart, +position, +description,spec.summary +identifier, +name,spec.name +sameAs, +url,spec.homepage +relatedLink, +givenName, +familyName, +email, +affiliation, +identifier, +name, +address, +type, +id, +softwareSuggestions, +maintainer, +contIntegration, +buildInstructions, +developmentStatus, +embargoDate, +funding, +issueTracker, +referencePublication, +readme, diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py --- a/swh/indexer/metadata_dictionary/__init__.py +++ b/swh/indexer/metadata_dictionary/__init__.py @@ -8,9 +8,24 @@ import click -from . import cff, codemeta, composer, dart, github, maven, npm, nuget, python, ruby +from . import ( + cff, + codemeta, + composer, + dart, + github, + maven, + npm, + nuget, + podspec, + python, + ruby, +) from .base import BaseExtrinsicMapping, BaseIntrinsicMapping, BaseMapping +# podspec, + + INTRINSIC_MAPPINGS: Dict[str, Type[BaseIntrinsicMapping]] = { "CffMapping": cff.CffMapping, "CodemetaMapping": codemeta.CodemetaMapping, @@ -21,6 +36,7 @@ "PythonPkginfoMapping": python.PythonPkginfoMapping, "ComposerMapping": composer.ComposerMapping, "NuGetMapping": nuget.NuGetMapping, + "PodspecMapping": podspec.PodspecMapping, } EXTRINSIC_MAPPINGS: Dict[str, Type[BaseExtrinsicMapping]] = { diff --git a/swh/indexer/metadata_dictionary/podspec.py b/swh/indexer/metadata_dictionary/podspec.py new file mode 100644 --- /dev/null +++ b/swh/indexer/metadata_dictionary/podspec.py @@ -0,0 +1,113 @@ +import ast +import itertools +import os.path +import re +from typing import List + +from rdflib import Graph, Literal, URIRef + +from swh.indexer.codemeta import _DATA_DIR, _read_crosstable +from swh.indexer.metadata_dictionary.base import DirectoryLsEntry +from swh.indexer.namespaces import CODEMETA, SCHEMA +from swh.indexer.storage.interface import Sha1 + +from .base import DictMapping, SingleFileIntrinsicMapping + +PODSPEC_TABLE_PATH = os.path.join(_DATA_DIR, "podspec.csv") + +with open(PODSPEC_TABLE_PATH) as fd: + (CODEMETA_TERMS, PODSPEC_TABLE) = _read_crosstable(fd) + + +class PodspecMapping(DictMapping, SingleFileIntrinsicMapping): + """ + dedicated class for Podspec mapping and translation + """ + + name = "podspec" + mapping = PODSPEC_TABLE["Podspec"] + string_fields = [ + "description", + "name", + "softwareVersion", + ] + + _re_spec_new = re.compile(r".*Pod::Spec.new +(do|\{) +\|.*\|.*") + _re_spec_entry = re.compile(r"\s*\w+\.(?P\w+)\s*=\s*(?P.*)") + + @classmethod + def detect_metadata_files(cls, file_entries: List[DirectoryLsEntry]) -> List[Sha1]: + for entry in file_entries: + if entry["name"].endswith(b".podspec"): + return [entry["sha1"]] + return [] + + def translate(self, raw_content): + try: + raw_content = raw_content.decode() + except UnicodeDecodeError: + self.log.warning("Error unidecoding from %s", self.log_suffix) + return + + lines = itertools.dropwhile( + lambda x: not self._re_spec_new.match(x), raw_content.split("\n") + ) + + try: + next(lines) + except StopIteration: + self.log.warning("Could not find Pod::Specification in %s", self.log_suffix) + return + + content_dict = {} + for line in lines: + match = self._re_spec_entry.match(line) + if match: + value = self.eval_podspec_expression(match.group("expr")) + if value: + content_dict[match.group("key")] = value + return self._translate_dict(content_dict) + + def eval_podspec_expression(self, expr): + def evaluator(node): + if isinstance(node, ast.Str): + return node.s + elif isinstance(node, ast.List): + res = [] + for element in node.elts: + val = evaluator(element) + if not val: + return + res.append(val) + return res + + expr = expr.replace(".freeze", "") + try: + tree = ast.parse(expr, mode="eval") + except (SyntaxError, ValueError): + return + if isinstance(tree, ast.Expression): + return evaluator(tree.body) + + def translate_summary(self, graph: Graph, root, s): + if isinstance(s, str): + graph.add((root, SCHEMA.description, Literal(s))) + + def parse_enum(self, enum_string): + if enum_string.startswith("{"): + items = enum_string.strip("{ }\n").split(",") + parsed = {} + for item in items: + parsed[item.split("=>")[0].strip("\n ")] = item.split("=>")[1].strip( + "\n " + )[1:-1] + + return parsed + + def translate_source(self, graph: Graph, root, s): + if isinstance(s, str): + parsed = self.parse_enum(s) + if parsed: + if ":git" in parsed: + s = parsed[":git"] + graph.add((root, CODEMETA.codeRepository, URIRef(s))) diff --git a/swh/indexer/tests/metadata_dictionary/test_podspec.py b/swh/indexer/tests/metadata_dictionary/test_podspec.py new file mode 100644 --- /dev/null +++ b/swh/indexer/tests/metadata_dictionary/test_podspec.py @@ -0,0 +1,65 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +from swh.indexer.metadata_dictionary import MAPPINGS + + +def test_compute_metadata_podspec(): + raw_content = b"""Pod::Spec.new do |spec| + spec.name = 'Reachability' + spec.version = '3.1.0' + spec.license = { :type => 'BSD' } + spec.homepage = 'https://github.com/tonymillion/Reachability' + spec.authors = { 'Tony Million' => 'tonymillion@gmail.com' } + spec.summary = 'ARC and GCD Compatible Reachability Class for iOS and OS X.' + spec.source = { :git => 'https://github.com/tonymillion/Reachability.git' } + spec.module_name = 'Rich' + spec.swift_version = '4.0' + + spec.ios.deployment_target = '9.0' + spec.osx.deployment_target = '10.10' + + spec.source_files = 'Reachability/common/*.swift' + spec.ios.source_files = 'Reachability/ios/*.swift', 'Reachability/extensions/*.swift' + spec.osx.source_files = 'Reachability/osx/*.swift' + + spec.framework = 'SystemConfiguration' + spec.ios.framework = 'UIKit' + spec.osx.framework = 'AppKit' + + spec.dependency 'SomeOtherPod' +end""" + result = MAPPINGS["PodspecMapping"]().translate(raw_content) + expected = { + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "author": [ + {"type": "Person", "name": "Tony Million", "email": "tonymillion@gmail.com"} + ], + "description": "ARC and GCD Compatible Reachability Class for iOS and OS X.", + "url": "https://github.com/tonymillion/Reachability", + "codeRepository": "https://github.com/tonymillion/Reachability.git", + "name": "Reachability", + "softwareVersion": "3.1.0", + } + + assert result == expected + + +def test_parse_enum(): + raw_content = """{ + :git => 'https://github.com/tensorflow/tensorflow.git', + :commit => 'd8ce9f9c301d021a69953134185ab728c1c248d3' + } + """ + expected = { + ":git": "https://github.com/tensorflow/tensorflow.git", + ":commit": "d8ce9f9c301d021a69953134185ab728c1c248d3", + } + + result = MAPPINGS["PodspecMapping"]().parse_enum(raw_content) + + assert result == expected diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py --- a/swh/indexer/tests/test_cli.py +++ b/swh/indexer/tests/test_cli.py @@ -117,10 +117,13 @@ "nuget", "pkg-info", "pubspec", + "podspec", "sword-codemeta", "", ] # must be sorted for test to pass ) + # "podspec", + assert result.exit_code == 0, result.output assert result.output == expected_output