Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F11023649
D956.id3159.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D956.id3159.diff
View Options
diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py
--- a/swh/indexer/codemeta.py
+++ b/swh/indexer/codemeta.py
@@ -6,6 +6,7 @@
import csv
import json
import os.path
+import re
import swh.indexer
from pyld import jsonld
@@ -34,6 +35,8 @@
SCHEMA_URI + 'creator',
}
+_codemeta_field_separator = re.compile(r'\s*[,/]\s*')
+
def make_absolute_uri(local_name):
definition = CODEMETA_CONTEXT['@context'][local_name]
@@ -76,7 +79,7 @@
for (col, value) in zip(header, line): # For each cell in the row
if col in data_sources:
# If that's not the parentType/property/type/description
- for local_name in value.split('/'):
+ for local_name in _codemeta_field_separator.split(value):
# For each of the data source's properties that maps
# to this canonical name
if local_name.strip():
diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -6,8 +6,10 @@
import os
import re
import abc
+import ast
import json
import logging
+import itertools
import email.parser
import xml.parsers.expat
@@ -471,6 +473,116 @@
return [{'@id': license} for license in licenses]
+@register_mapping
+class GemspecMapping(DictMapping):
+ _re_spec_new = re.compile(r'.*Gem::Specification.new do \|.*\|.*')
+ _re_spec_entry = re.compile(r'\s*\w+\.(?P<key>\w+)\s*=\s*(?P<expr>.*)')
+
+ mapping = CROSSWALK_TABLE['Ruby Gem']
+
+ def detect_metadata_files(self, file_entries):
+ for entry in file_entries:
+ if entry['name'].endswith(b'.gemspec'):
+ return [entry['sha1']]
+ return []
+
+ def translate(self, raw_content):
+ try:
+ raw_content = raw_content.decode()
+ except UnicodeDecodeError:
+ self.log.warning('Error unidecoding %r', raw_content)
+ return
+
+ # Skip lines before 'Gem::Specification.new'
+ lines = itertools.dropwhile(
+ lambda x: not self._re_spec_new.match(x),
+ raw_content.split('\n'))
+
+ try:
+ next(lines) # Consume 'Gem::Specification.new'
+ except StopIteration:
+ self.log.warning('Could not find Gem::Specification in %r',
+ raw_content)
+ return
+
+ content_dict = {}
+ for line in lines:
+ match = self._re_spec_entry.match(line)
+ if match:
+ value = self.eval_ruby_expression(match.group('expr'))
+ if value:
+ content_dict[match.group('key')] = value
+ return self.translate_dict(content_dict)
+
+ def eval_ruby_expression(self, expr):
+ """Very simple evaluator of Ruby expressions.
+
+ >>> GemspecMapping().eval_ruby_expression('"Foo bar"')
+ 'Foo bar'
+ >>> GemspecMapping().eval_ruby_expression("'Foo bar'")
+ 'Foo bar'
+ >>> GemspecMapping().eval_ruby_expression("['Foo', 'bar']")
+ ['Foo', 'bar']
+ >>> GemspecMapping().eval_ruby_expression("'Foo bar'.freeze")
+ 'Foo bar'
+ >>> GemspecMapping().eval_ruby_expression( \
+ "['Foo'.freeze, 'bar'.freeze]")
+ ['Foo', 'bar']
+ """
+ def evaluator(node):
+ if isinstance(node, ast.Str):
+ return node.s
+ elif isinstance(node, ast.List):
+ res = []
+ for element in node.elts:
+ val = evaluator(element)
+ if not val:
+ return
+ res.append(val)
+ return res
+
+ expr = expr.replace('.freeze', '')
+ try:
+ # We're parsing Ruby expressions here, but Python's
+ # ast.parse works for very simple Ruby expressions
+ # (mainly strings delimited with " or ', and lists
+ # of such strings).
+ tree = ast.parse(expr, mode='eval')
+ except (SyntaxError, ValueError):
+ return
+ if isinstance(tree, ast.Expression):
+ return evaluator(tree.body)
+
+ def normalize_homepage(self, s):
+ return {"@id": s}
+
+ def normalize_license(self, s):
+ if isinstance(s, str):
+ return [{"@id": "https://spdx.org/licenses/" + s}]
+
+ def normalize_licenses(self, licenses):
+ if isinstance(licenses, list):
+ return [{"@id": "https://spdx.org/licenses/" + license}
+ for license in licenses
+ if isinstance(license, str)]
+
+ def translate_author(self, translated_metadata, v):
+ k = self.mapping['author']
+ translated_metadata.setdefault(k, {"@list": []})["@list"].append(v)
+
+ def translate_authors(self, translated_metadata, v):
+ k = self.mapping['authors']
+ translated_metadata.setdefault(k, {"@list": []})["@list"].extend(v)
+
+ def translate_summary(self, translated_metadata, v):
+ k = self.mapping['summary']
+ translated_metadata.setdefault(k, []).append(v)
+
+ def translate_description(self, translated_metadata, v):
+ k = self.mapping['description']
+ translated_metadata.setdefault(k, []).append(v)
+
+
def main():
raw_content = """{"name": "test_name", "unknown_term": "ut"}"""
raw_content1 = b"""{"name": "test_name",
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -727,6 +727,61 @@
'license': 'MIT',
})
+ def test_gemspec_base(self):
+ raw_content = b"""
+Gem::Specification.new do |s|
+ s.name = 'example'
+ s.version = '0.1.0'
+ s.licenses = ['MIT']
+ s.summary = "This is an example!"
+ s.description = "Much longer explanation of the example!"
+ s.authors = ["Ruby Coder"]
+ s.email = 'rubycoder@example.com'
+ s.files = ["lib/example.rb"]
+ s.homepage = 'https://rubygems.org/gems/example'
+ s.metadata = { "source_code_uri" => "https://github.com/example/example" }
+end"""
+ result = MAPPINGS['GemspecMapping'].translate(raw_content)
+ self.assertCountEqual(result.pop('description'), [
+ "This is an example!",
+ "Much longer explanation of the example!"
+ ])
+ self.assertEqual(result, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'type': 'SoftwareSourceCode',
+ 'author': ['Ruby Coder'],
+ 'name': 'example',
+ 'license': 'https://spdx.org/licenses/MIT',
+ 'codeRepository': 'https://rubygems.org/gems/example',
+ 'email': 'rubycoder@example.com',
+ 'version': '0.1.0',
+ })
+
+ def test_gemspec_two_author_fields(self):
+ raw_content = b"""
+Gem::Specification.new do |s|
+ s.authors = ["Ruby Coder1"]
+ s.author = "Ruby Coder2"
+end"""
+ result = MAPPINGS['GemspecMapping'].translate(raw_content)
+ self.assertCountEqual(result.pop('author'), [
+ 'Ruby Coder1', 'Ruby Coder2'])
+ self.assertEqual(result, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'type': 'SoftwareSourceCode',
+ })
+
+ def test_gemspec_invalid_author(self):
+ raw_content = b"""
+Gem::Specification.new do |s|
+ s.author = "Ruby Coder1",
+end"""
+ result = MAPPINGS['GemspecMapping'].translate(raw_content)
+ self.assertEqual(result, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'type': 'SoftwareSourceCode',
+ })
+
def test_revision_metadata_indexer(self):
metadata_indexer = RevisionMetadataTestIndexer()
fill_obj_storage(metadata_indexer.objstorage)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Sep 17, 4:55 PM (11 h, 42 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225739
Attached To
D956: Add gemspec mapping.
Event Timeline
Log In to Comment