Page MenuHomeSoftware Heritage

D8079.id.diff
No OneTemporary

D8079.id.diff

diff --git a/swh/indexer/data/pubspec.csv b/swh/indexer/data/pubspec.csv
new file mode 100644
--- /dev/null
+++ b/swh/indexer/data/pubspec.csv
@@ -0,0 +1,68 @@
+Property,Pubspec
+codeRepository,repository
+programmingLanguage,
+runtimePlatform,platforms
+targetProduct,
+applicationCategory,
+applicationSubCategory,
+downloadUrl,
+fileSize,
+installUrl,
+memoryRequirements,
+operatingSystem,
+permissions,
+processorRequirements,
+releaseNotes,
+softwareHelp,
+softwareRequirements,
+softwareVersion,version
+storageRequirements,
+supportingData,
+author,author/authors
+citation,
+contributor,
+copyrightHolder,
+copyrightYear,
+dateCreated,
+dateModified,
+datePublished,
+editor,
+encoding,
+fileFormat,
+funder,
+keywords,keywords
+license,license
+producer,
+provider,
+publisher,
+sponsor,
+version,version
+isAccessibleForFree,
+isPartOf,
+hasPart,
+position,
+description,description
+identifier,
+name,name
+sameAs,
+url,homepage
+relatedLink,
+givenName,
+familyName,
+email,author.email/authors.email
+affiliation,
+identifier,
+name,
+address,
+type,
+id,
+softwareSuggestions,
+maintainer,
+contIntegration,
+buildInstructions,
+developmentStatus,
+embargoDate,
+funding,
+issueTracker,issue_tracker
+referencePublication,
+readme,
diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py
--- a/swh/indexer/metadata_dictionary/__init__.py
+++ b/swh/indexer/metadata_dictionary/__init__.py
@@ -8,7 +8,7 @@
import click
-from . import cff, codemeta, composer, github, maven, npm, python, ruby
+from . import cff, codemeta, composer, dart, github, maven, npm, python, ruby
from .base import BaseExtrinsicMapping, BaseIntrinsicMapping, BaseMapping
INTRINSIC_MAPPINGS: Dict[str, Type[BaseIntrinsicMapping]] = {
@@ -17,6 +17,7 @@
"GemspecMapping": ruby.GemspecMapping,
"MavenMapping": maven.MavenMapping,
"NpmMapping": npm.NpmMapping,
+ "PubMapping": dart.PubspecMapping,
"PythonPkginfoMapping": python.PythonPkginfoMapping,
"ComposerMapping": composer.ComposerMapping,
}
diff --git a/swh/indexer/metadata_dictionary/base.py b/swh/indexer/metadata_dictionary/base.py
--- a/swh/indexer/metadata_dictionary/base.py
+++ b/swh/indexer/metadata_dictionary/base.py
@@ -8,6 +8,7 @@
from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar
from typing_extensions import TypedDict
+import yaml
from swh.indexer.codemeta import SCHEMA_URI, compact, merge_values
from swh.indexer.storage.interface import Sha1
@@ -244,3 +245,26 @@
if isinstance(content_dict, dict):
return self._translate_dict(content_dict)
return None
+
+
+class SafeLoader(yaml.SafeLoader):
+ yaml_implicit_resolvers = {
+ k: [r for r in v if r[0] != "tag:yaml.org,2002:timestamp"]
+ for k, v in yaml.SafeLoader.yaml_implicit_resolvers.items()
+ }
+
+
+class YamlMapping(DictMapping, SingleFileIntrinsicMapping):
+ """Base class for all mappings that use Yaml data as input."""
+
+ def translate(self, raw_content: bytes) -> Optional[Dict[str, str]]:
+ raw_content_string: str = raw_content.decode()
+ try:
+ content_dict = yaml.load(raw_content_string, Loader=SafeLoader)
+ except yaml.scanner.ScannerError:
+ return None
+
+ if isinstance(content_dict, dict):
+ return self._translate_dict(content_dict)
+
+ return None
diff --git a/swh/indexer/metadata_dictionary/cff.py b/swh/indexer/metadata_dictionary/cff.py
--- a/swh/indexer/metadata_dictionary/cff.py
+++ b/swh/indexer/metadata_dictionary/cff.py
@@ -1,20 +1,11 @@
from typing import Dict, List, Optional, Union
-import yaml
-
from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
-from .base import DictMapping, SingleFileIntrinsicMapping
-
-
-class SafeLoader(yaml.SafeLoader):
- yaml_implicit_resolvers = {
- k: [r for r in v if r[0] != "tag:yaml.org,2002:timestamp"]
- for k, v in yaml.SafeLoader.yaml_implicit_resolvers.items()
- }
+from .base import YamlMapping
-class CffMapping(DictMapping, SingleFileIntrinsicMapping):
+class CffMapping(YamlMapping):
"""Dedicated class for Citation (CITATION.cff) mapping and translation"""
name = "cff"
@@ -22,18 +13,6 @@
mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"]
string_fields = ["keywords", "license", "abstract", "version", "doi"]
- def translate(self, raw_content: bytes) -> Optional[Dict[str, str]]:
- raw_content_string: str = raw_content.decode()
- try:
- content_dict = yaml.load(raw_content_string, Loader=SafeLoader)
- except yaml.scanner.ScannerError:
- return None
-
- if isinstance(content_dict, dict):
- return self._translate_dict(content_dict)
-
- return None
-
def normalize_authors(self, d: List[dict]) -> Dict[str, list]:
result = []
for author in d:
diff --git a/swh/indexer/metadata_dictionary/dart.py b/swh/indexer/metadata_dictionary/dart.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/metadata_dictionary/dart.py
@@ -0,0 +1,74 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os.path
+import re
+
+from swh.indexer.codemeta import _DATA_DIR, SCHEMA_URI, _read_crosstable
+
+from .base import YamlMapping
+
+PUB_TABLE_PATH = os.path.join(_DATA_DIR, "pubspec.csv")
+
+with open(PUB_TABLE_PATH) as fd:
+ (CODEMETA_TERMS, PUB_TABLE) = _read_crosstable(fd)
+
+
+def name_to_person(name):
+ return {
+ "@type": SCHEMA_URI + "Person",
+ SCHEMA_URI + "name": name,
+ }
+
+
+class PubspecMapping(YamlMapping):
+
+ name = "pubspec"
+ filename = b"pubspec.yaml"
+ mapping = PUB_TABLE["Pubspec"]
+ string_fields = [
+ "repository",
+ "keywords",
+ "description",
+ "name",
+ "homepage",
+ "issue_tracker",
+ "platforms",
+ "license"
+ # license will only be used with the SPDX Identifier
+ ]
+
+ def normalize_license(self, s):
+ if isinstance(s, str):
+ return {"@id": "https://spdx.org/licenses/" + s}
+
+ def normalize_homepage(self, s):
+ if isinstance(s, str):
+ return {"@id": s}
+
+ def normalize_author(self, s):
+ name_email_regex = "(?P<name>.*?)( <(?P<email>.*)>)"
+ author = {"@type": SCHEMA_URI + "Person"}
+ if isinstance(s, str):
+ match = re.search(name_email_regex, s)
+ if match:
+ name = match.group("name")
+ email = match.group("email")
+ author[SCHEMA_URI + "email"] = email
+ else:
+ name = s
+
+ author[SCHEMA_URI + "name"] = name
+
+ return {"@list": [author]}
+
+ def normalize_authors(self, authors_list):
+ authors = {"@list": []}
+
+ if isinstance(authors_list, list):
+ for s in authors_list:
+ author = self.normalize_author(s)["@list"]
+ authors["@list"] += author
+ return authors
diff --git a/swh/indexer/tests/metadata_dictionary/test_dart.py b/swh/indexer/tests/metadata_dictionary/test_dart.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/tests/metadata_dictionary/test_dart.py
@@ -0,0 +1,157 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.indexer.metadata_dictionary import MAPPINGS
+
+
+def test_compute_metadata_pubspec():
+ raw_content = """
+---
+name: newtify
+description: >-
+ Have you been turned into a newt? Would you like to be?
+ This package can help. It has all of the
+ newt-transmogrification functionality you have been looking
+ for.
+keywords:
+ - polyfill
+ - shim
+ - compatibility
+ - portable
+ - mbstring
+version: 1.2.3
+license: MIT
+homepage: https://example-pet-store.com/newtify
+documentation: https://example-pet-store.com/newtify/docs
+
+environment:
+ sdk: '>=2.10.0 <3.0.0'
+
+dependencies:
+ efts: ^2.0.4
+ transmogrify: ^0.4.0
+
+dev_dependencies:
+ test: '>=1.15.0 <2.0.0'
+ """.encode(
+ "utf-8"
+ )
+
+ result = MAPPINGS["PubMapping"]().translate(raw_content)
+
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "name": "newtify",
+ "keywords": [
+ "polyfill",
+ "shim",
+ "compatibility",
+ "portable",
+ "mbstring",
+ ],
+ "description": """Have you been turned into a newt? Would you like to be? \
+This package can help. It has all of the \
+newt-transmogrification functionality you have been looking \
+for.""",
+ "url": "https://example-pet-store.com/newtify",
+ "license": "https://spdx.org/licenses/MIT",
+ }
+
+ assert result == expected
+
+
+def test_normalize_author_pubspec():
+ raw_content = """
+ author: Atlee Pine <atlee@example.org>
+ """.encode(
+ "utf-8"
+ )
+
+ result = MAPPINGS["PubMapping"]().translate(raw_content)
+
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "author": [
+ {"type": "Person", "name": "Atlee Pine", "email": "atlee@example.org"},
+ ],
+ }
+
+ assert result == expected
+
+
+def test_normalize_authors_pubspec():
+ raw_content = """
+ authors:
+ - Vicky Merzown <vmz@example.org>
+ - Ron Bilius Weasley
+ """.encode(
+ "utf-8"
+ )
+
+ result = MAPPINGS["PubMapping"]().translate(raw_content)
+
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "author": [
+ {"type": "Person", "name": "Vicky Merzown", "email": "vmz@example.org"},
+ {
+ "type": "Person",
+ "name": "Ron Bilius Weasley",
+ },
+ ],
+ }
+
+ assert result == expected
+
+
+def test_normalize_author_authors_pubspec():
+ raw_content = """
+ authors:
+ - Vicky Merzown <vmz@example.org>
+ - Ron Bilius Weasley
+ author: Hermione Granger
+ """.encode(
+ "utf-8"
+ )
+
+ result = MAPPINGS["PubMapping"]().translate(raw_content)
+
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "author": [
+ {"type": "Person", "name": "Vicky Merzown", "email": "vmz@example.org"},
+ {
+ "type": "Person",
+ "name": "Ron Bilius Weasley",
+ },
+ {
+ "type": "Person",
+ "name": "Hermione Granger",
+ },
+ ],
+ }
+
+ assert result == expected
+
+
+def test_normalize_empty_authors():
+ raw_content = """
+ authors:
+ """.encode(
+ "utf-8"
+ )
+
+ result = MAPPINGS["PubMapping"]().translate(raw_content)
+
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ }
+
+ assert result == expected
diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py
--- a/swh/indexer/tests/test_cli.py
+++ b/swh/indexer/tests/test_cli.py
@@ -101,6 +101,7 @@
"maven",
"npm",
"pkg-info",
+ "pubspec",
"",
] # must be sorted for test to pass
)

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 7:39 AM (8 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225348

Event Timeline