Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066396
D8079.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
11 KB
Subscribers
None
D8079.id.diff
View Options
diff --git a/swh/indexer/data/pubspec.csv b/swh/indexer/data/pubspec.csv
new file mode 100644
--- /dev/null
+++ b/swh/indexer/data/pubspec.csv
@@ -0,0 +1,68 @@
+Property,Pubspec
+codeRepository,repository
+programmingLanguage,
+runtimePlatform,platforms
+targetProduct,
+applicationCategory,
+applicationSubCategory,
+downloadUrl,
+fileSize,
+installUrl,
+memoryRequirements,
+operatingSystem,
+permissions,
+processorRequirements,
+releaseNotes,
+softwareHelp,
+softwareRequirements,
+softwareVersion,version
+storageRequirements,
+supportingData,
+author,author/authors
+citation,
+contributor,
+copyrightHolder,
+copyrightYear,
+dateCreated,
+dateModified,
+datePublished,
+editor,
+encoding,
+fileFormat,
+funder,
+keywords,keywords
+license,license
+producer,
+provider,
+publisher,
+sponsor,
+version,version
+isAccessibleForFree,
+isPartOf,
+hasPart,
+position,
+description,description
+identifier,
+name,name
+sameAs,
+url,homepage
+relatedLink,
+givenName,
+familyName,
+email,author.email/authors.email
+affiliation,
+identifier,
+name,
+address,
+type,
+id,
+softwareSuggestions,
+maintainer,
+contIntegration,
+buildInstructions,
+developmentStatus,
+embargoDate,
+funding,
+issueTracker,issue_tracker
+referencePublication,
+readme,
diff --git a/swh/indexer/metadata_dictionary/__init__.py b/swh/indexer/metadata_dictionary/__init__.py
--- a/swh/indexer/metadata_dictionary/__init__.py
+++ b/swh/indexer/metadata_dictionary/__init__.py
@@ -8,7 +8,7 @@
import click
-from . import cff, codemeta, composer, github, maven, npm, python, ruby
+from . import cff, codemeta, composer, dart, github, maven, npm, python, ruby
from .base import BaseExtrinsicMapping, BaseIntrinsicMapping, BaseMapping
INTRINSIC_MAPPINGS: Dict[str, Type[BaseIntrinsicMapping]] = {
@@ -17,6 +17,7 @@
"GemspecMapping": ruby.GemspecMapping,
"MavenMapping": maven.MavenMapping,
"NpmMapping": npm.NpmMapping,
+ "PubMapping": dart.PubspecMapping,
"PythonPkginfoMapping": python.PythonPkginfoMapping,
"ComposerMapping": composer.ComposerMapping,
}
diff --git a/swh/indexer/metadata_dictionary/base.py b/swh/indexer/metadata_dictionary/base.py
--- a/swh/indexer/metadata_dictionary/base.py
+++ b/swh/indexer/metadata_dictionary/base.py
@@ -8,6 +8,7 @@
from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar
from typing_extensions import TypedDict
+import yaml
from swh.indexer.codemeta import SCHEMA_URI, compact, merge_values
from swh.indexer.storage.interface import Sha1
@@ -244,3 +245,26 @@
if isinstance(content_dict, dict):
return self._translate_dict(content_dict)
return None
+
+
+class SafeLoader(yaml.SafeLoader):
+ yaml_implicit_resolvers = {
+ k: [r for r in v if r[0] != "tag:yaml.org,2002:timestamp"]
+ for k, v in yaml.SafeLoader.yaml_implicit_resolvers.items()
+ }
+
+
+class YamlMapping(DictMapping, SingleFileIntrinsicMapping):
+ """Base class for all mappings that use Yaml data as input."""
+
+ def translate(self, raw_content: bytes) -> Optional[Dict[str, str]]:
+ raw_content_string: str = raw_content.decode()
+ try:
+ content_dict = yaml.load(raw_content_string, Loader=SafeLoader)
+ except yaml.scanner.ScannerError:
+ return None
+
+ if isinstance(content_dict, dict):
+ return self._translate_dict(content_dict)
+
+ return None
diff --git a/swh/indexer/metadata_dictionary/cff.py b/swh/indexer/metadata_dictionary/cff.py
--- a/swh/indexer/metadata_dictionary/cff.py
+++ b/swh/indexer/metadata_dictionary/cff.py
@@ -1,20 +1,11 @@
from typing import Dict, List, Optional, Union
-import yaml
-
from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
-from .base import DictMapping, SingleFileIntrinsicMapping
-
-
-class SafeLoader(yaml.SafeLoader):
- yaml_implicit_resolvers = {
- k: [r for r in v if r[0] != "tag:yaml.org,2002:timestamp"]
- for k, v in yaml.SafeLoader.yaml_implicit_resolvers.items()
- }
+from .base import YamlMapping
-class CffMapping(DictMapping, SingleFileIntrinsicMapping):
+class CffMapping(YamlMapping):
"""Dedicated class for Citation (CITATION.cff) mapping and translation"""
name = "cff"
@@ -22,18 +13,6 @@
mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"]
string_fields = ["keywords", "license", "abstract", "version", "doi"]
- def translate(self, raw_content: bytes) -> Optional[Dict[str, str]]:
- raw_content_string: str = raw_content.decode()
- try:
- content_dict = yaml.load(raw_content_string, Loader=SafeLoader)
- except yaml.scanner.ScannerError:
- return None
-
- if isinstance(content_dict, dict):
- return self._translate_dict(content_dict)
-
- return None
-
def normalize_authors(self, d: List[dict]) -> Dict[str, list]:
result = []
for author in d:
diff --git a/swh/indexer/metadata_dictionary/dart.py b/swh/indexer/metadata_dictionary/dart.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/metadata_dictionary/dart.py
@@ -0,0 +1,74 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os.path
+import re
+
+from swh.indexer.codemeta import _DATA_DIR, SCHEMA_URI, _read_crosstable
+
+from .base import YamlMapping
+
+PUB_TABLE_PATH = os.path.join(_DATA_DIR, "pubspec.csv")
+
+with open(PUB_TABLE_PATH) as fd:
+ (CODEMETA_TERMS, PUB_TABLE) = _read_crosstable(fd)
+
+
+def name_to_person(name):
+ return {
+ "@type": SCHEMA_URI + "Person",
+ SCHEMA_URI + "name": name,
+ }
+
+
+class PubspecMapping(YamlMapping):
+
+ name = "pubspec"
+ filename = b"pubspec.yaml"
+ mapping = PUB_TABLE["Pubspec"]
+ string_fields = [
+ "repository",
+ "keywords",
+ "description",
+ "name",
+ "homepage",
+ "issue_tracker",
+ "platforms",
+ "license"
+ # license will only be used with the SPDX Identifier
+ ]
+
+ def normalize_license(self, s):
+ if isinstance(s, str):
+ return {"@id": "https://spdx.org/licenses/" + s}
+
+ def normalize_homepage(self, s):
+ if isinstance(s, str):
+ return {"@id": s}
+
+ def normalize_author(self, s):
+ name_email_regex = "(?P<name>.*?)( <(?P<email>.*)>)"
+ author = {"@type": SCHEMA_URI + "Person"}
+ if isinstance(s, str):
+ match = re.search(name_email_regex, s)
+ if match:
+ name = match.group("name")
+ email = match.group("email")
+ author[SCHEMA_URI + "email"] = email
+ else:
+ name = s
+
+ author[SCHEMA_URI + "name"] = name
+
+ return {"@list": [author]}
+
+ def normalize_authors(self, authors_list):
+ authors = {"@list": []}
+
+ if isinstance(authors_list, list):
+ for s in authors_list:
+ author = self.normalize_author(s)["@list"]
+ authors["@list"] += author
+ return authors
diff --git a/swh/indexer/tests/metadata_dictionary/test_dart.py b/swh/indexer/tests/metadata_dictionary/test_dart.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/tests/metadata_dictionary/test_dart.py
@@ -0,0 +1,157 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.indexer.metadata_dictionary import MAPPINGS
+
+
+def test_compute_metadata_pubspec():
+ raw_content = """
+---
+name: newtify
+description: >-
+ Have you been turned into a newt? Would you like to be?
+ This package can help. It has all of the
+ newt-transmogrification functionality you have been looking
+ for.
+keywords:
+ - polyfill
+ - shim
+ - compatibility
+ - portable
+ - mbstring
+version: 1.2.3
+license: MIT
+homepage: https://example-pet-store.com/newtify
+documentation: https://example-pet-store.com/newtify/docs
+
+environment:
+ sdk: '>=2.10.0 <3.0.0'
+
+dependencies:
+ efts: ^2.0.4
+ transmogrify: ^0.4.0
+
+dev_dependencies:
+ test: '>=1.15.0 <2.0.0'
+ """.encode(
+ "utf-8"
+ )
+
+ result = MAPPINGS["PubMapping"]().translate(raw_content)
+
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "name": "newtify",
+ "keywords": [
+ "polyfill",
+ "shim",
+ "compatibility",
+ "portable",
+ "mbstring",
+ ],
+ "description": """Have you been turned into a newt? Would you like to be? \
+This package can help. It has all of the \
+newt-transmogrification functionality you have been looking \
+for.""",
+ "url": "https://example-pet-store.com/newtify",
+ "license": "https://spdx.org/licenses/MIT",
+ }
+
+ assert result == expected
+
+
+def test_normalize_author_pubspec():
+ raw_content = """
+ author: Atlee Pine <atlee@example.org>
+ """.encode(
+ "utf-8"
+ )
+
+ result = MAPPINGS["PubMapping"]().translate(raw_content)
+
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "author": [
+ {"type": "Person", "name": "Atlee Pine", "email": "atlee@example.org"},
+ ],
+ }
+
+ assert result == expected
+
+
+def test_normalize_authors_pubspec():
+ raw_content = """
+ authors:
+ - Vicky Merzown <vmz@example.org>
+ - Ron Bilius Weasley
+ """.encode(
+ "utf-8"
+ )
+
+ result = MAPPINGS["PubMapping"]().translate(raw_content)
+
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "author": [
+ {"type": "Person", "name": "Vicky Merzown", "email": "vmz@example.org"},
+ {
+ "type": "Person",
+ "name": "Ron Bilius Weasley",
+ },
+ ],
+ }
+
+ assert result == expected
+
+
+def test_normalize_author_authors_pubspec():
+ raw_content = """
+ authors:
+ - Vicky Merzown <vmz@example.org>
+ - Ron Bilius Weasley
+ author: Hermione Granger
+ """.encode(
+ "utf-8"
+ )
+
+ result = MAPPINGS["PubMapping"]().translate(raw_content)
+
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ "author": [
+ {"type": "Person", "name": "Vicky Merzown", "email": "vmz@example.org"},
+ {
+ "type": "Person",
+ "name": "Ron Bilius Weasley",
+ },
+ {
+ "type": "Person",
+ "name": "Hermione Granger",
+ },
+ ],
+ }
+
+ assert result == expected
+
+
+def test_normalize_empty_authors():
+ raw_content = """
+ authors:
+ """.encode(
+ "utf-8"
+ )
+
+ result = MAPPINGS["PubMapping"]().translate(raw_content)
+
+ expected = {
+ "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+ "type": "SoftwareSourceCode",
+ }
+
+ assert result == expected
diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py
--- a/swh/indexer/tests/test_cli.py
+++ b/swh/indexer/tests/test_cli.py
@@ -101,6 +101,7 @@
"maven",
"npm",
"pkg-info",
+ "pubspec",
"",
] # must be sorted for test to pass
)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 5 2024, 7:39 AM (8 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225348
Attached To
D8079: Metadata Indexer for Pub (pubspec.yaml)
Event Timeline
Log In to Comment