Page MenuHomeSoftware Heritage

D956.id3067.diff
No OneTemporary

D956.id3067.diff

diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py
--- a/swh/indexer/codemeta.py
+++ b/swh/indexer/codemeta.py
@@ -6,6 +6,7 @@
import csv
import json
import os.path
+import re
import swh.indexer
from pyld import jsonld
@@ -34,6 +35,8 @@
SCHEMA_URI + 'creator',
}
+_codemeta_field_separator = re.compile(r'\s*[,/]\s*')
+
def make_absolute_uri(local_name):
definition = CODEMETA_CONTEXT['@context'][local_name]
@@ -76,7 +79,7 @@
for (col, value) in zip(header, line): # For each cell in the row
if col in data_sources:
# If that's not the parentType/property/type/description
- for local_name in value.split('/'):
+ for local_name in _codemeta_field_separator.split(value):
# For each of the data source's properties that maps
# to this canonical name
if local_name.strip():
diff --git a/swh/indexer/data/codemeta/crosswalk.csv b/swh/indexer/data/codemeta/crosswalk.csv
--- a/swh/indexer/data/codemeta/crosswalk.csv
+++ b/swh/indexer/data/codemeta/crosswalk.csv
@@ -18,7 +18,7 @@
schema:SoftwareApplication,softwareVersion,Text,Version of the software instance.,,,,,,,,,,,,,,,,,,,,,release,software version,
schema:SoftwareApplication,storageRequirements,Text or URL,Storage requirements (free space required).,,,,,,,,,,,,,,,,,,,,,,,
schema:SoftwareApplication,supportingData,DataFeed,Supporting data for a SoftwareApplication.,,,,,,,,,,,,,,,,,,,,,,,
-schema:CreativeWork,author,Organization or Person,The author of this content or rating. Please note that author is special in that HTML 5 provides a special mechanism for indicating authorship via the rel tag. That is equivalent to this and may be used interchangeably.,agents,creators,,creators,login,,,,,[aut] in Author,,author,Author,,,author,,,author,,developer,,authors
+schema:CreativeWork,author,Organization or Person,The author of this content or rating. Please note that author is special in that HTML 5 provides a special mechanism for indicating authorship via the rel tag. That is equivalent to this and may be used interchangeably.,agents,creators,,creators,login,,,,,[aut] in Author,,author,Author,,,author,,,author/authors,,developer,,authors
schema:CreativeWork,citation,CreativeWork or URL,"A citation or reference to another creative work, such as another publication, web page, scholarly article, etc.",relatedLink,,,,,,,,,,,,,,,,,,,,,,
schema:CreativeWork,contributor,Organization or Person,A secondary contributor to the CreativeWork or Event.,,,,,,,,,,[ctb] in Author,,,,,,contributor,,,,,developer,,
schema:CreativeWork,copyrightHolder,Organization or Person,The party holding the legal copyright to the CreativeWork.,agents [role=copyrightHolder],,,,,,,,,,,,,,,,,,,,,,
diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -6,8 +6,10 @@
import os
import re
import abc
+import ast
import json
import logging
+import itertools
import email.parser
import xmltodict
@@ -405,6 +407,116 @@
return [{'@id': license} for license in licenses]
+@register_mapping
+class GemspecMapping(DictMapping):
+ _re_spec_new = re.compile(r'.*Gem::Specification.new do \|.*\|.*')
+ _re_spec_entry = re.compile(r'\s*\w+\.(?P<key>\w+)\s*=\s*(?P<expr>.*)')
+
+ mapping = CROSSWALK_TABLE['Ruby Gem']
+
+ def detect_metadata_files(self, file_entries):
+ for entry in file_entries:
+ if entry['name'].endswith(b'.gemspec'):
+ return [entry['sha1']]
+ return []
+
+ def translate(self, raw_content):
+ try:
+ raw_content = raw_content.decode()
+ except UnicodeDecodeError:
+ self.log.warning('Error unidecoding %r', raw_content)
+ return
+
+ # Skip lines before 'Gem::Specification.new'
+ lines = itertools.dropwhile(
+ lambda x: not self._re_spec_new.match(x),
+ raw_content.split('\n'))
+
+ try:
+ next(lines) # Consume 'Gem::Specification.new'
+ except StopIteration:
+ self.log.warning('Could not find Gem::Specification in %r',
+ raw_content)
+ return
+
+ content_dict = {}
+ for line in lines:
+ match = self._re_spec_entry.match(line)
+ if match:
+ value = self.eval_ruby_expression(match.group('expr'))
+ if value:
+ content_dict[match.group('key')] = value
+ return self.translate_dict(content_dict)
+
+ def eval_ruby_expression(self, expr):
+ """Very simple evaluator of Ruby expressions.
+
+ >>> GemspecMapping().eval_ruby_expression('"Foo bar"')
+ 'Foo bar'
+ >>> GemspecMapping().eval_ruby_expression("'Foo bar'")
+ 'Foo bar'
+ >>> GemspecMapping().eval_ruby_expression("['Foo', 'bar']")
+ ['Foo', 'bar']
+ >>> GemspecMapping().eval_ruby_expression("'Foo bar'.freeze")
+ 'Foo bar'
+ >>> GemspecMapping().eval_ruby_expression( \
+ "['Foo'.freeze, 'bar'.freeze]")
+ ['Foo', 'bar']
+ """
+ def evaluator(node):
+ if isinstance(node, ast.Str):
+ return node.s
+ elif isinstance(node, ast.List):
+ res = []
+ for element in node.elts:
+ val = evaluator(element)
+ if not val:
+ return
+ res.append(val)
+ return res
+
+ expr = expr.replace('.freeze', '')
+ try:
+ # We're parsing Ruby expressions here, but Python's
+ # ast.parse works for very simple Ruby expressions
+ # (mainly strings delimited with " or ', and lists
+ # of such strings).
+ tree = ast.parse(expr, mode='eval')
+ except (SyntaxError, ValueError):
+ return
+ if isinstance(tree, ast.Expression):
+ return evaluator(tree.body)
+
+ def normalize_homepage(self, s):
+ return {"@id": s}
+
+ def normalize_license(self, s):
+ if isinstance(s, str):
+ return [{"@id": "https://spdx.org/licenses/" + s}]
+
+ def normalize_licenses(self, licenses):
+ if isinstance(licenses, list):
+ return [{"@id": "https://spdx.org/licenses/" + license}
+ for license in licenses
+ if isinstance(license, str)]
+
+ def translate_author(self, translated_metadata, v):
+ k = self.mapping['author']
+ translated_metadata.setdefault(k, {"@list": []})["@list"].append(v)
+
+ def translate_authors(self, translated_metadata, v):
+ k = self.mapping['authors']
+ translated_metadata.setdefault(k, {"@list": []})["@list"].extend(v)
+
+ def translate_summary(self, translated_metadata, v):
+ k = self.mapping['summary']
+ translated_metadata.setdefault(k, []).append(v)
+
+ def translate_description(self, translated_metadata, v):
+ k = self.mapping['description']
+ translated_metadata.setdefault(k, []).append(v)
+
+
def main():
raw_content = """{"name": "test_name", "unknown_term": "ut"}"""
raw_content1 = b"""{"name": "test_name",
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -696,6 +696,61 @@
'license': 'MIT',
})
+ def test_gemspec_base(self):
+ raw_content = b"""
+Gem::Specification.new do |s|
+ s.name = 'example'
+ s.version = '0.1.0'
+ s.licenses = ['MIT']
+ s.summary = "This is an example!"
+ s.description = "Much longer explanation of the example!"
+ s.authors = ["Ruby Coder"]
+ s.email = 'rubycoder@example.com'
+ s.files = ["lib/example.rb"]
+ s.homepage = 'https://rubygems.org/gems/example'
+ s.metadata = { "source_code_uri" => "https://github.com/example/example" }
+end"""
+ result = MAPPINGS['GemspecMapping'].translate(raw_content)
+ self.assertCountEqual(result.pop('description'), [
+ "This is an example!",
+ "Much longer explanation of the example!"
+ ])
+ self.assertEqual(result, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'type': 'SoftwareSourceCode',
+ 'author': ['Ruby Coder'],
+ 'name': 'example',
+ 'license': 'https://spdx.org/licenses/MIT',
+ 'codeRepository': 'https://rubygems.org/gems/example',
+ 'email': 'rubycoder@example.com',
+ 'version': '0.1.0',
+ })
+
+ def test_gemspec_two_author_fields(self):
+ raw_content = b"""
+Gem::Specification.new do |s|
+ s.authors = ["Ruby Coder1"]
+ s.author = "Ruby Coder2"
+end"""
+ result = MAPPINGS['GemspecMapping'].translate(raw_content)
+ self.assertCountEqual(result.pop('author'), [
+ 'Ruby Coder1', 'Ruby Coder2'])
+ self.assertEqual(result, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'type': 'SoftwareSourceCode',
+ })
+
+ def test_gemspec_invalid_author(self):
+ raw_content = b"""
+Gem::Specification.new do |s|
+ s.author = "Ruby Coder1",
+end"""
+ result = MAPPINGS['GemspecMapping'].translate(raw_content)
+ self.assertEqual(result, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'type': 'SoftwareSourceCode',
+ })
+
def test_revision_metadata_indexer(self):
metadata_indexer = RevisionMetadataTestIndexer()
fill_obj_storage(metadata_indexer.objstorage)

File Metadata

Mime Type
text/plain
Expires
Mar 17 2025, 7:11 PM (7 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225738

Event Timeline