Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9340319
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
34 KB
Subscribers
None
View Options
diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
index 7141c79..f4a6edc 100644
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -1,295 +1,352 @@
# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import re
import abc
import json
import logging
import xmltodict
from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
from swh.indexer.codemeta import compact, expand
MAPPINGS = {}
def register_mapping(cls):
MAPPINGS[cls.__name__] = cls()
return cls
class BaseMapping(metaclass=abc.ABCMeta):
"""Base class for mappings to inherit from
To implement a new mapping:
- inherit this class
- override translate function
"""
def __init__(self):
self.log = logging.getLogger('%s.%s' % (
self.__class__.__module__,
self.__class__.__name__))
@abc.abstractmethod
def detect_metadata_files(self, files):
"""
Detects files potentially containing metadata
Args:
file_entries (list): list of files
Returns:
list: list of sha1 (possibly empty)
"""
pass
@abc.abstractmethod
def translate(self, file_content):
pass
def normalize_translation(self, metadata):
return compact(metadata)
class SingleFileMapping(BaseMapping):
"""Base class for all mappings that use a single file as input."""
@property
@abc.abstractmethod
def filename(self):
"""The .json file to extract metadata from."""
pass
def detect_metadata_files(self, file_entries):
for entry in file_entries:
if entry['name'] == self.filename:
return [entry['sha1']]
return []
class DictMapping(BaseMapping):
"""Base class for mappings that take as input a file that is mostly
a key-value store (eg. a shallow JSON dict)."""
@property
@abc.abstractmethod
def mapping(self):
"""A translation dict to map dict keys into a canonical name."""
pass
def translate_dict(self, content_dict, *, normalize=True):
"""
Translates content by parsing content from a dict object
and translating with the appropriate mapping
Args:
content_dict (dict): content dict to translate
Returns:
dict: translated metadata in json-friendly form needed for
the indexer
"""
translated_metadata = {'@type': SCHEMA_URI + 'SoftwareSourceCode'}
for k, v in content_dict.items():
# First, check if there is a specific translation
# method for this key
translation_method = getattr(self, 'translate_' + k, None)
if translation_method:
translation_method(translated_metadata, v)
elif k in self.mapping:
# if there is no method, but the key is known from the
# crosswalk table
# if there is a normalization method, use it on the value
normalization_method = getattr(self, 'normalize_' + k, None)
if normalization_method:
v = normalization_method(v)
# set the translation metadata with the normalized value
translated_metadata[self.mapping[k]] = v
if normalize:
return self.normalize_translation(translated_metadata)
else:
return translated_metadata
class JsonMapping(DictMapping, SingleFileMapping):
"""Base class for all mappings that use a JSON file as input."""
def translate(self, raw_content):
"""
Translates content by parsing content from a bytestring containing
json data and translating with the appropriate mapping
Args:
raw_content (bytes): raw content to translate
Returns:
dict: translated metadata in json-friendly form needed for
the indexer
"""
try:
raw_content = raw_content.decode()
except UnicodeDecodeError:
self.log.warning('Error unidecoding %r', raw_content)
return
try:
content_dict = json.loads(raw_content)
except json.JSONDecodeError:
self.log.warning('Error unjsoning %r' % raw_content)
return
return self.translate_dict(content_dict)
@register_mapping
class NpmMapping(JsonMapping):
"""
dedicated class for NPM (package.json) mapping and translation
"""
mapping = CROSSWALK_TABLE['NodeJS']
filename = b'package.json'
_schema_shortcuts = {
'github': 'https://github.com/',
'gist': 'https://gist.github.com/',
'bitbucket': 'https://bitbucket.org/',
'gitlab': 'https://gitlab.com/',
}
def normalize_repository(self, d):
"""https://docs.npmjs.com/files/package.json#repository"""
if isinstance(d, dict):
url = '{type}+{url}'.format(**d)
elif isinstance(d, str):
if '://' in d:
url = d
elif ':' in d:
(schema, rest) = d.split(':', 1)
if schema in self._schema_shortcuts:
url = self._schema_shortcuts[schema] + rest
else:
return None
else:
url = self._schema_shortcuts['github'] + d
else:
return None
return {'@id': url}
def normalize_bugs(self, d):
return {'@id': '{url}'.format(**d)}
_parse_author = re.compile(r'^ *'
r'(?P<name>.*?)'
r'( +<(?P<email>.*)>)?'
r'( +\((?P<url>.*)\))?'
r' *$')
def normalize_author(self, d):
'https://docs.npmjs.com/files/package.json' \
'#people-fields-author-contributors'
author = {'@type': SCHEMA_URI+'Person'}
if isinstance(d, dict):
name = d.get('name', None)
email = d.get('email', None)
url = d.get('url', None)
elif isinstance(d, str):
match = self._parse_author.match(d)
name = match.group('name')
email = match.group('email')
url = match.group('url')
else:
return None
if name:
author[SCHEMA_URI+'name'] = name
if email:
author[SCHEMA_URI+'email'] = email
if url:
author[SCHEMA_URI+'url'] = {'@id': url}
return {"@list": [author]}
def normalize_license(self, s):
return {"@id": "https://spdx.org/licenses/" + s}
def normalize_homepage(self, s):
return {"@id": s}
@register_mapping
class CodemetaMapping(SingleFileMapping):
"""
dedicated class for CodeMeta (codemeta.json) mapping and translation
"""
filename = b'codemeta.json'
def translate(self, content):
return self.normalize_translation(expand(json.loads(content.decode())))
@register_mapping
class MavenMapping(DictMapping, SingleFileMapping):
"""
dedicated class for Maven (pom.xml) mapping and translation
"""
filename = b'pom.xml'
mapping = CROSSWALK_TABLE['Java (Maven)']
def translate(self, content):
d = xmltodict.parse(content)['project']
metadata = self.translate_dict(d, normalize=False)
metadata[SCHEMA_URI+'codeRepository'] = self.parse_repositories(d)
+ metadata[SCHEMA_URI+'license'] = self.parse_licenses(d)
return self.normalize_translation(metadata)
_default_repository = {'url': 'https://repo.maven.apache.org/maven2/'}
def parse_repositories(self, d):
"""https://maven.apache.org/pom.html#Repositories"""
if 'repositories' not in d:
return [self.parse_repository(d, self._default_repository)]
else:
repositories = d['repositories'].get('repository', [])
if not isinstance(repositories, list):
repositories = [repositories]
results = []
for repo in repositories:
res = self.parse_repository(d, repo)
if res:
results.append(res)
return results
def parse_repository(self, d, repo):
if repo.get('layout', 'default') != 'default':
return # TODO ?
url = repo['url']
if d['groupId']:
url = os.path.join(url, *d['groupId'].split('.'))
if d['artifactId']:
url = os.path.join(url, d['artifactId'])
return {"@id": url}
def normalize_groupId(self, id_):
return {"@id": id_}
+ def parse_licenses(self, d):
+ """https://maven.apache.org/pom.html#Licenses
+
+ The origin XML has the form:
+
+ <licenses>
+ <license>
+ <name>Apache License, Version 2.0</name>
+ <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ </license>
+ </licenses>
+
+ Which was translated to a dict by xmltodict and is given as `d`:
+
+ >>> d = {
+ ... # ...
+ ... "licenses": {
+ ... "license": {
+ ... "name": "Apache License, Version 2.0",
+ ... "url":
+ ... "https://www.apache.org/licenses/LICENSE-2.0.txt"
+ ... }
+ ... }
+ ... }
+ >>> MavenMapping().parse_licenses(d)
+ [{'@id': 'https://www.apache.org/licenses/LICENSE-2.0.txt'}]
+
+ or, if there are more than one license:
+
+ >>> from pprint import pprint
+ >>> d = {
+ ... # ...
+ ... "licenses": {
+ ... "license": [
+ ... {
+ ... "name": "Apache License, Version 2.0",
+ ... "url":
+ ... "https://www.apache.org/licenses/LICENSE-2.0.txt"
+ ... },
+ ... {
+ ... "name": "MIT License, ",
+ ... "url": "https://opensource.org/licenses/MIT"
+ ... }
+ ... ]
+ ... }
+ ... }
+ >>> pprint(MavenMapping().parse_licenses(d))
+ [{'@id': 'https://www.apache.org/licenses/LICENSE-2.0.txt'},
+ {'@id': 'https://opensource.org/licenses/MIT'}]
+ """
+
+ licenses = d.get('licenses', {}).get('license', [])
+ if isinstance(licenses, dict):
+ licenses = [licenses]
+ return [{"@id": license['url']} for license in licenses]
+
def main():
raw_content = """{"name": "test_name", "unknown_term": "ut"}"""
raw_content1 = b"""{"name": "test_name",
"unknown_term": "ut",
"prerequisites" :"packageXYZ"}"""
result = MAPPINGS["NpmMapping"].translate(raw_content)
result1 = MAPPINGS["MavenMapping"].translate(raw_content1)
print(result)
print(result1)
if __name__ == "__main__":
main()
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
index f6f8878..bcc23aa 100644
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -1,499 +1,585 @@
# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import unittest
from swh.model.hashutil import hash_to_bytes
from swh.indexer.metadata_dictionary import CROSSWALK_TABLE, MAPPINGS
from swh.indexer.metadata_detector import (
detect_metadata, extract_minimal_metadata_dict
)
from swh.indexer.metadata import (
ContentMetadataIndexer, RevisionMetadataIndexer
)
from .test_utils import (
BASE_TEST_CONFIG, fill_obj_storage, fill_storage
)
TRANSLATOR_TOOL = {
'name': 'swh-metadata-translator',
'version': '0.0.2',
'configuration': {
'type': 'local',
'context': 'NpmMapping'
}
}
class ContentMetadataTestIndexer(ContentMetadataIndexer):
"""Specific Metadata whose configuration is enough to satisfy the
indexing tests.
"""
def parse_config_file(self, *args, **kwargs):
assert False, 'should not be called; the rev indexer configures it.'
class RevisionMetadataTestIndexer(RevisionMetadataIndexer):
"""Specific indexer whose configuration is enough to satisfy the
indexing tests.
"""
ContentMetadataIndexer = ContentMetadataTestIndexer
def parse_config_file(self, *args, **kwargs):
return {
**BASE_TEST_CONFIG,
'tools': TRANSLATOR_TOOL,
}
class Metadata(unittest.TestCase):
"""
Tests metadata_mock_tool tool for Metadata detection
"""
def setUp(self):
"""
shows the entire diff in the results
"""
self.maxDiff = None
def test_crosstable(self):
self.assertEqual(CROSSWALK_TABLE['NodeJS'], {
'repository': 'http://schema.org/codeRepository',
'os': 'http://schema.org/operatingSystem',
'cpu': 'http://schema.org/processorRequirements',
'engines':
'http://schema.org/processorRequirements',
'author': 'http://schema.org/author',
'author.email': 'http://schema.org/email',
'author.name': 'http://schema.org/name',
'contributor': 'http://schema.org/contributor',
'keywords': 'http://schema.org/keywords',
'license': 'http://schema.org/license',
'version': 'http://schema.org/version',
'description': 'http://schema.org/description',
'name': 'http://schema.org/name',
'bugs': 'https://codemeta.github.io/terms/issueTracker',
'homepage': 'http://schema.org/url'
})
def test_compute_metadata_none(self):
"""
testing content empty content is empty
should return None
"""
# given
content = b""
# None if no metadata was found or an error occurred
declared_metadata = None
# when
result = MAPPINGS["NpmMapping"].translate(content)
# then
self.assertEqual(declared_metadata, result)
def test_compute_metadata_npm(self):
"""
testing only computation of metadata with hard_mapping_npm
"""
# given
content = b"""
{
"name": "test_metadata",
"version": "0.0.2",
"description": "Simple package.json test for indexer",
"repository": {
"type": "git",
"url": "https://github.com/moranegg/metadata_test"
},
"author": {
"email": "moranegg@example.com",
"name": "Morane G"
}
}
"""
declared_metadata = {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
'name': 'test_metadata',
'version': '0.0.2',
'description': 'Simple package.json test for indexer',
'codeRepository':
'git+https://github.com/moranegg/metadata_test',
'author': [{
'type': 'Person',
'name': 'Morane G',
'email': 'moranegg@example.com',
}],
}
# when
result = MAPPINGS["NpmMapping"].translate(content)
# then
self.assertEqual(declared_metadata, result)
def test_extract_minimal_metadata_dict(self):
"""
Test the creation of a coherent minimal metadata set
"""
# given
metadata_list = [{
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'name': 'test_1',
'version': '0.0.2',
'description': 'Simple package.json test for indexer',
'codeRepository':
'git+https://github.com/moranegg/metadata_test',
}, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'name': 'test_0_1',
'version': '0.0.2',
'description': 'Simple package.json test for indexer',
'codeRepository':
'git+https://github.com/moranegg/metadata_test'
}, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'name': 'test_metadata',
'version': '0.0.2',
'author': 'moranegg',
}]
# when
results = extract_minimal_metadata_dict(metadata_list)
# then
expected_results = {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
"version": '0.0.2',
"description": 'Simple package.json test for indexer',
"name": ['test_1', 'test_0_1', 'test_metadata'],
"author": ['moranegg'],
"codeRepository":
'git+https://github.com/moranegg/metadata_test',
}
self.assertEqual(expected_results, results)
def test_index_content_metadata_npm(self):
"""
testing NPM with package.json
- one sha1 uses a file that can't be translated to metadata and
should return None in the translated metadata
"""
# given
sha1s = [
hash_to_bytes('26a9f72a7c87cc9205725cfd879f514ff4f3d8d5'),
hash_to_bytes('d4c647f0fc257591cc9ba1722484229780d1c607'),
hash_to_bytes('02fb2c89e14f7fab46701478c83779c7beb7b069'),
]
# this metadata indexer computes only metadata for package.json
# in npm context with a hard mapping
metadata_indexer = ContentMetadataTestIndexer(
tool=TRANSLATOR_TOOL, config=BASE_TEST_CONFIG.copy())
fill_obj_storage(metadata_indexer.objstorage)
fill_storage(metadata_indexer.storage)
# when
metadata_indexer.run(sha1s, policy_update='ignore-dups')
results = list(metadata_indexer.idx_storage.content_metadata_get(
sha1s))
expected_results = [{
'translated_metadata': {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
'codeRepository':
'git+https://github.com/moranegg/metadata_test',
'description': 'Simple package.json test for indexer',
'name': 'test_metadata',
'version': '0.0.1'
},
'id': hash_to_bytes('26a9f72a7c87cc9205725cfd879f514ff4f3d8d5')
}, {
'translated_metadata': {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
'issueTracker':
'https://github.com/npm/npm/issues',
'author': [{
'type': 'Person',
'name': 'Isaac Z. Schlueter',
'email': 'i@izs.me',
'url': 'http://blog.izs.me',
}],
'codeRepository':
'git+https://github.com/npm/npm',
'description': 'a package manager for JavaScript',
'license': 'https://spdx.org/licenses/Artistic-2.0',
'version': '5.0.3',
'name': 'npm',
'keywords': [
'install',
'modules',
'package manager',
'package.json'
],
'url': 'https://docs.npmjs.com/'
},
'id': hash_to_bytes('d4c647f0fc257591cc9ba1722484229780d1c607')
}, {
'translated_metadata': None,
'id': hash_to_bytes('02fb2c89e14f7fab46701478c83779c7beb7b069')
}]
for result in results:
del result['tool']
# The assertion below returns False sometimes because of nested lists
self.assertEqual(expected_results, results)
def test_detect_metadata_package_json(self):
# given
df = [{
'sha1_git': b'abc',
'name': b'index.js',
'target': b'abc',
'length': 897,
'status': 'visible',
'type': 'file',
'perms': 33188,
'dir_id': b'dir_a',
'sha1': b'bcd'
},
{
'sha1_git': b'aab',
'name': b'package.json',
'target': b'aab',
'length': 712,
'status': 'visible',
'type': 'file',
'perms': 33188,
'dir_id': b'dir_a',
'sha1': b'cde'
}]
# when
results = detect_metadata(df)
expected_results = {
'NpmMapping': [
b'cde'
]
}
# then
self.assertEqual(expected_results, results)
def test_compute_metadata_valid_codemeta(self):
raw_content = (
b"""{
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
"@type": "SoftwareSourceCode",
"identifier": "CodeMeta",
"description": "CodeMeta is a concept vocabulary that can be used to standardize the exchange of software metadata across repositories and organizations.",
"name": "CodeMeta: Minimal metadata schemas for science software and code, in JSON-LD",
"codeRepository": "https://github.com/codemeta/codemeta",
"issueTracker": "https://github.com/codemeta/codemeta/issues",
"license": "https://spdx.org/licenses/Apache-2.0",
"version": "2.0",
"author": [
{
"@type": "Person",
"givenName": "Carl",
"familyName": "Boettiger",
"email": "cboettig@gmail.com",
"@id": "http://orcid.org/0000-0002-1642-628X"
},
{
"@type": "Person",
"givenName": "Matthew B.",
"familyName": "Jones",
"email": "jones@nceas.ucsb.edu",
"@id": "http://orcid.org/0000-0003-0077-4738"
}
],
"maintainer": {
"@type": "Person",
"givenName": "Carl",
"familyName": "Boettiger",
"email": "cboettig@gmail.com",
"@id": "http://orcid.org/0000-0002-1642-628X"
},
"contIntegration": "https://travis-ci.org/codemeta/codemeta",
"developmentStatus": "active",
"downloadUrl": "https://github.com/codemeta/codemeta/archive/2.0.zip",
"funder": {
"@id": "https://doi.org/10.13039/100000001",
"@type": "Organization",
"name": "National Science Foundation"
},
"funding":"1549758; Codemeta: A Rosetta Stone for Metadata in Scientific Software",
"keywords": [
"metadata",
"software"
],
"version":"2.0",
"dateCreated":"2017-06-05",
"datePublished":"2017-06-05",
"programmingLanguage": "JSON-LD"
}""") # noqa
expected_result = {
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
"type": "SoftwareSourceCode",
"identifier": "CodeMeta",
"description":
"CodeMeta is a concept vocabulary that can "
"be used to standardize the exchange of software metadata "
"across repositories and organizations.",
"name":
"CodeMeta: Minimal metadata schemas for science "
"software and code, in JSON-LD",
"codeRepository": "https://github.com/codemeta/codemeta",
"issueTracker": "https://github.com/codemeta/codemeta/issues",
"license": "https://spdx.org/licenses/Apache-2.0",
"version": "2.0",
"author": [
{
"type": "Person",
"givenName": "Carl",
"familyName": "Boettiger",
"email": "cboettig@gmail.com",
"id": "http://orcid.org/0000-0002-1642-628X"
},
{
"type": "Person",
"givenName": "Matthew B.",
"familyName": "Jones",
"email": "jones@nceas.ucsb.edu",
"id": "http://orcid.org/0000-0003-0077-4738"
}
],
"maintainer": {
"type": "Person",
"givenName": "Carl",
"familyName": "Boettiger",
"email": "cboettig@gmail.com",
"id": "http://orcid.org/0000-0002-1642-628X"
},
"contIntegration": "https://travis-ci.org/codemeta/codemeta",
"developmentStatus": "active",
"downloadUrl":
"https://github.com/codemeta/codemeta/archive/2.0.zip",
"funder": {
"id": "https://doi.org/10.13039/100000001",
"type": "Organization",
"name": "National Science Foundation"
},
"funding": "1549758; Codemeta: A Rosetta Stone for Metadata "
"in Scientific Software",
"keywords": [
"metadata",
"software"
],
"version": "2.0",
"dateCreated": "2017-06-05",
"datePublished": "2017-06-05",
"programmingLanguage": "JSON-LD"
}
result = MAPPINGS["CodemetaMapping"].translate(raw_content)
self.assertEqual(result, expected_result)
def test_compute_metadata_maven(self):
raw_content = b"""
<project>
<name>Maven Default Project</name>
<modelVersion>4.0.0</modelVersion>
<groupId>com.mycompany.app</groupId>
<artifactId>my-app</artifactId>
<version>1.2.3</version>
<repositories>
<repository>
<id>central</id>
<name>Maven Repository Switchboard</name>
<layout>default</layout>
<url>http://repo1.maven.org/maven2</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>
+ <licenses>
+ <license>
+ <name>Apache License, Version 2.0</name>
+ <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments>A business-friendly OSS license</comments>
+ </license>
+ </licenses>
</project>"""
result = MAPPINGS["MavenMapping"].translate(raw_content)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
'name': 'Maven Default Project',
'identifier': 'com.mycompany.app',
'version': '1.2.3',
+ 'license': 'https://www.apache.org/licenses/LICENSE-2.0.txt',
'codeRepository':
'http://repo1.maven.org/maven2/com/mycompany/app/my-app',
- })
+ })
+
+ def test_compute_metadata_maven_minimal(self):
+ raw_content = b"""
+ <project>
+ <name>Maven Default Project</name>
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>com.mycompany.app</groupId>
+ <artifactId>my-app</artifactId>
+ <version>1.2.3</version>
+ </project>"""
+ result = MAPPINGS["MavenMapping"].translate(raw_content)
+ self.assertEqual(result, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'type': 'SoftwareSourceCode',
+ 'name': 'Maven Default Project',
+ 'identifier': 'com.mycompany.app',
+ 'version': '1.2.3',
+ 'codeRepository':
+ 'https://repo.maven.apache.org/maven2/com/mycompany/app/my-app',
+ 'license': [],
+ })
+
+ def test_compute_metadata_maven_multiple(self):
+ '''Tests when there are multiple code repos and licenses.'''
+ raw_content = b"""
+ <project>
+ <name>Maven Default Project</name>
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>com.mycompany.app</groupId>
+ <artifactId>my-app</artifactId>
+ <version>1.2.3</version>
+ <repositories>
+ <repository>
+ <id>central</id>
+ <name>Maven Repository Switchboard</name>
+ <layout>default</layout>
+ <url>http://repo1.maven.org/maven2</url>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </repository>
+ <repository>
+ <id>example</id>
+ <name>Example Maven Repo</name>
+ <layout>default</layout>
+ <url>http://example.org/maven2</url>
+ </repository>
+ </repositories>
+ <licenses>
+ <license>
+ <name>Apache License, Version 2.0</name>
+ <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments>A business-friendly OSS license</comments>
+ </license>
+ <license>
+ <name>MIT license</name>
+ <url>https://opensource.org/licenses/MIT</url>
+ </license>
+ </licenses>
+ </project>"""
+ result = MAPPINGS["MavenMapping"].translate(raw_content)
+ self.assertEqual(result, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'type': 'SoftwareSourceCode',
+ 'name': 'Maven Default Project',
+ 'identifier': 'com.mycompany.app',
+ 'version': '1.2.3',
+ 'license': [
+ 'https://www.apache.org/licenses/LICENSE-2.0.txt',
+ 'https://opensource.org/licenses/MIT',
+ ],
+ 'codeRepository': [
+ 'http://repo1.maven.org/maven2/com/mycompany/app/my-app',
+ 'http://example.org/maven2/com/mycompany/app/my-app',
+ ]
+ })
def test_revision_metadata_indexer(self):
metadata_indexer = RevisionMetadataTestIndexer()
fill_obj_storage(metadata_indexer.objstorage)
fill_storage(metadata_indexer.storage)
tool = metadata_indexer.idx_storage.indexer_configuration_get(
{'tool_'+k: v for (k, v) in TRANSLATOR_TOOL.items()})
assert tool is not None
metadata_indexer.idx_storage.content_metadata_add([{
'indexer_configuration_id': tool['id'],
'id': b'cde',
'translated_metadata': {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
'issueTracker':
'https://github.com/librariesio/yarn-parser/issues',
'version': '1.0.0',
'name': 'yarn-parser',
'author': ['Andrew Nesbitt'],
'url':
'https://github.com/librariesio/yarn-parser#readme',
'processorRequirements': {'node': '7.5'},
'license': 'AGPL-3.0',
'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
'codeRepository':
'git+https://github.com/librariesio/yarn-parser.git',
'description':
'Tiny web service for parsing yarn.lock files',
}
}])
sha1_gits = [
hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
]
metadata_indexer.run(sha1_gits, 'update-dups')
results = list(metadata_indexer.idx_storage.revision_metadata_get(
sha1_gits))
expected_results = [{
'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
'tool': TRANSLATOR_TOOL,
'translated_metadata': {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'url':
'https://github.com/librariesio/yarn-parser#readme',
'codeRepository':
'git+https://github.com/librariesio/yarn-parser.git',
'author': ['Andrew Nesbitt'],
'license': 'AGPL-3.0',
'version': '1.0.0',
'description':
'Tiny web service for parsing yarn.lock files',
'issueTracker':
'https://github.com/librariesio/yarn-parser/issues',
'name': 'yarn-parser',
'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
},
}]
for result in results:
del result['tool']['id']
# then
self.assertEqual(expected_results, results)
diff --git a/tox.ini b/tox.ini
index a2d8b63..8bc693c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,33 +1,33 @@
[tox]
envlist=flake8,py3
[testenv:py3]
deps =
.[testing]
pytest-cov
pifpaf
commands =
- pifpaf run postgresql -- pytest --hypothesis-profile=fast --cov=swh --cov-branch {posargs}
+ pifpaf run postgresql -- pytest --doctest-modules --hypothesis-profile=fast --cov=swh --cov-branch {posargs}
[testenv:py3-slow]
deps =
.[testing]
pytest-cov
pifpaf
commands =
- pifpaf run postgresql -- pytest --hypothesis-profile=slow --cov=swh --cov-branch {posargs}
+ pifpaf run postgresql -- pytest --doctest-modules --hypothesis-profile=slow --cov=swh --cov-branch {posargs}
[testenv:py3-prop]
deps =
.[testing]
pytest-cov
pifpaf
commands =
- pifpaf run postgresql -- pytest --hypothesis-profile=fast -m property_based --disable-warnings
+ pifpaf run postgresql -- pytest --doctest-modules --hypothesis-profile=fast -m property_based --disable-warnings
[testenv:flake8]
skip_install = true
deps =
flake8
commands =
{envpython} -m flake8
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jul 4 2025, 10:32 AM (4 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3212363
Attached To
rDCIDX Metadata indexer
Event Timeline
Log In to Comment