Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
index 7141c79..f4a6edc 100644
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -1,295 +1,352 @@
# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import re
import abc
import json
import logging
import xmltodict
from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
from swh.indexer.codemeta import compact, expand
MAPPINGS = {}
def register_mapping(cls):
MAPPINGS[cls.__name__] = cls()
return cls
class BaseMapping(metaclass=abc.ABCMeta):
"""Base class for mappings to inherit from
To implement a new mapping:
- inherit this class
- override translate function
"""
def __init__(self):
self.log = logging.getLogger('%s.%s' % (
self.__class__.__module__,
self.__class__.__name__))
@abc.abstractmethod
def detect_metadata_files(self, files):
"""
Detects files potentially containing metadata
Args:
file_entries (list): list of files
Returns:
list: list of sha1 (possibly empty)
"""
pass
@abc.abstractmethod
def translate(self, file_content):
pass
def normalize_translation(self, metadata):
return compact(metadata)
class SingleFileMapping(BaseMapping):
"""Base class for all mappings that use a single file as input."""
@property
@abc.abstractmethod
def filename(self):
"""The .json file to extract metadata from."""
pass
def detect_metadata_files(self, file_entries):
for entry in file_entries:
if entry['name'] == self.filename:
return [entry['sha1']]
return []
class DictMapping(BaseMapping):
"""Base class for mappings that take as input a file that is mostly
a key-value store (eg. a shallow JSON dict)."""
@property
@abc.abstractmethod
def mapping(self):
"""A translation dict to map dict keys into a canonical name."""
pass
def translate_dict(self, content_dict, *, normalize=True):
"""
Translates content by parsing content from a dict object
and translating with the appropriate mapping
Args:
content_dict (dict): content dict to translate
Returns:
dict: translated metadata in json-friendly form needed for
the indexer
"""
translated_metadata = {'@type': SCHEMA_URI + 'SoftwareSourceCode'}
for k, v in content_dict.items():
# First, check if there is a specific translation
# method for this key
translation_method = getattr(self, 'translate_' + k, None)
if translation_method:
translation_method(translated_metadata, v)
elif k in self.mapping:
# if there is no method, but the key is known from the
# crosswalk table
# if there is a normalization method, use it on the value
normalization_method = getattr(self, 'normalize_' + k, None)
if normalization_method:
v = normalization_method(v)
# set the translation metadata with the normalized value
translated_metadata[self.mapping[k]] = v
if normalize:
return self.normalize_translation(translated_metadata)
else:
return translated_metadata
class JsonMapping(DictMapping, SingleFileMapping):
"""Base class for all mappings that use a JSON file as input."""
def translate(self, raw_content):
"""
Translates content by parsing content from a bytestring containing
json data and translating with the appropriate mapping
Args:
raw_content (bytes): raw content to translate
Returns:
dict: translated metadata in json-friendly form needed for
the indexer
"""
try:
raw_content = raw_content.decode()
except UnicodeDecodeError:
self.log.warning('Error unidecoding %r', raw_content)
return
try:
content_dict = json.loads(raw_content)
except json.JSONDecodeError:
self.log.warning('Error unjsoning %r' % raw_content)
return
return self.translate_dict(content_dict)
@register_mapping
class NpmMapping(JsonMapping):
"""
dedicated class for NPM (package.json) mapping and translation
"""
mapping = CROSSWALK_TABLE['NodeJS']
filename = b'package.json'
_schema_shortcuts = {
'github': 'https://github.com/',
'gist': 'https://gist.github.com/',
'bitbucket': 'https://bitbucket.org/',
'gitlab': 'https://gitlab.com/',
}
def normalize_repository(self, d):
"""https://docs.npmjs.com/files/package.json#repository"""
if isinstance(d, dict):
url = '{type}+{url}'.format(**d)
elif isinstance(d, str):
if '://' in d:
url = d
elif ':' in d:
(schema, rest) = d.split(':', 1)
if schema in self._schema_shortcuts:
url = self._schema_shortcuts[schema] + rest
else:
return None
else:
url = self._schema_shortcuts['github'] + d
else:
return None
return {'@id': url}
def normalize_bugs(self, d):
return {'@id': '{url}'.format(**d)}
_parse_author = re.compile(r'^ *'
r'(?P<name>.*?)'
r'( +<(?P<email>.*)>)?'
r'( +\((?P<url>.*)\))?'
r' *$')
def normalize_author(self, d):
'https://docs.npmjs.com/files/package.json' \
'#people-fields-author-contributors'
author = {'@type': SCHEMA_URI+'Person'}
if isinstance(d, dict):
name = d.get('name', None)
email = d.get('email', None)
url = d.get('url', None)
elif isinstance(d, str):
match = self._parse_author.match(d)
name = match.group('name')
email = match.group('email')
url = match.group('url')
else:
return None
if name:
author[SCHEMA_URI+'name'] = name
if email:
author[SCHEMA_URI+'email'] = email
if url:
author[SCHEMA_URI+'url'] = {'@id': url}
return {"@list": [author]}
def normalize_license(self, s):
return {"@id": "https://spdx.org/licenses/" + s}
def normalize_homepage(self, s):
return {"@id": s}
@register_mapping
class CodemetaMapping(SingleFileMapping):
"""
dedicated class for CodeMeta (codemeta.json) mapping and translation
"""
filename = b'codemeta.json'
def translate(self, content):
return self.normalize_translation(expand(json.loads(content.decode())))
@register_mapping
class MavenMapping(DictMapping, SingleFileMapping):
"""
dedicated class for Maven (pom.xml) mapping and translation
"""
filename = b'pom.xml'
mapping = CROSSWALK_TABLE['Java (Maven)']
def translate(self, content):
d = xmltodict.parse(content)['project']
metadata = self.translate_dict(d, normalize=False)
metadata[SCHEMA_URI+'codeRepository'] = self.parse_repositories(d)
+ metadata[SCHEMA_URI+'license'] = self.parse_licenses(d)
return self.normalize_translation(metadata)
_default_repository = {'url': 'https://repo.maven.apache.org/maven2/'}
def parse_repositories(self, d):
"""https://maven.apache.org/pom.html#Repositories"""
if 'repositories' not in d:
return [self.parse_repository(d, self._default_repository)]
else:
repositories = d['repositories'].get('repository', [])
if not isinstance(repositories, list):
repositories = [repositories]
results = []
for repo in repositories:
res = self.parse_repository(d, repo)
if res:
results.append(res)
return results
def parse_repository(self, d, repo):
if repo.get('layout', 'default') != 'default':
return # TODO ?
url = repo['url']
if d['groupId']:
url = os.path.join(url, *d['groupId'].split('.'))
if d['artifactId']:
url = os.path.join(url, d['artifactId'])
return {"@id": url}
def normalize_groupId(self, id_):
return {"@id": id_}
+ def parse_licenses(self, d):
+ """https://maven.apache.org/pom.html#Licenses
+
+ The origin XML has the form:
+
+ <licenses>
+ <license>
+ <name>Apache License, Version 2.0</name>
+ <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ </license>
+ </licenses>
+
+ Which was translated to a dict by xmltodict and is given as `d`:
+
+ >>> d = {
+ ... # ...
+ ... "licenses": {
+ ... "license": {
+ ... "name": "Apache License, Version 2.0",
+ ... "url":
+ ... "https://www.apache.org/licenses/LICENSE-2.0.txt"
+ ... }
+ ... }
+ ... }
+ >>> MavenMapping().parse_licenses(d)
+ [{'@id': 'https://www.apache.org/licenses/LICENSE-2.0.txt'}]
+
+ or, if there are more than one license:
+
+ >>> from pprint import pprint
+ >>> d = {
+ ... # ...
+ ... "licenses": {
+ ... "license": [
+ ... {
+ ... "name": "Apache License, Version 2.0",
+ ... "url":
+ ... "https://www.apache.org/licenses/LICENSE-2.0.txt"
+ ... },
+ ... {
+ ... "name": "MIT License, ",
+ ... "url": "https://opensource.org/licenses/MIT"
+ ... }
+ ... ]
+ ... }
+ ... }
+ >>> pprint(MavenMapping().parse_licenses(d))
+ [{'@id': 'https://www.apache.org/licenses/LICENSE-2.0.txt'},
+ {'@id': 'https://opensource.org/licenses/MIT'}]
+ """
+
+ licenses = d.get('licenses', {}).get('license', [])
+ if isinstance(licenses, dict):
+ licenses = [licenses]
+ return [{"@id": license['url']} for license in licenses]
+
def main():
raw_content = """{"name": "test_name", "unknown_term": "ut"}"""
raw_content1 = b"""{"name": "test_name",
"unknown_term": "ut",
"prerequisites" :"packageXYZ"}"""
result = MAPPINGS["NpmMapping"].translate(raw_content)
result1 = MAPPINGS["MavenMapping"].translate(raw_content1)
print(result)
print(result1)
if __name__ == "__main__":
main()
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
index f6f8878..bcc23aa 100644
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -1,499 +1,585 @@
# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import unittest
from swh.model.hashutil import hash_to_bytes
from swh.indexer.metadata_dictionary import CROSSWALK_TABLE, MAPPINGS
from swh.indexer.metadata_detector import (
detect_metadata, extract_minimal_metadata_dict
)
from swh.indexer.metadata import (
ContentMetadataIndexer, RevisionMetadataIndexer
)
from .test_utils import (
BASE_TEST_CONFIG, fill_obj_storage, fill_storage
)
TRANSLATOR_TOOL = {
'name': 'swh-metadata-translator',
'version': '0.0.2',
'configuration': {
'type': 'local',
'context': 'NpmMapping'
}
}
class ContentMetadataTestIndexer(ContentMetadataIndexer):
"""Specific Metadata whose configuration is enough to satisfy the
indexing tests.
"""
def parse_config_file(self, *args, **kwargs):
assert False, 'should not be called; the rev indexer configures it.'
class RevisionMetadataTestIndexer(RevisionMetadataIndexer):
"""Specific indexer whose configuration is enough to satisfy the
indexing tests.
"""
ContentMetadataIndexer = ContentMetadataTestIndexer
def parse_config_file(self, *args, **kwargs):
return {
**BASE_TEST_CONFIG,
'tools': TRANSLATOR_TOOL,
}
class Metadata(unittest.TestCase):
"""
Tests metadata_mock_tool tool for Metadata detection
"""
def setUp(self):
"""
shows the entire diff in the results
"""
self.maxDiff = None
def test_crosstable(self):
self.assertEqual(CROSSWALK_TABLE['NodeJS'], {
'repository': 'http://schema.org/codeRepository',
'os': 'http://schema.org/operatingSystem',
'cpu': 'http://schema.org/processorRequirements',
'engines':
'http://schema.org/processorRequirements',
'author': 'http://schema.org/author',
'author.email': 'http://schema.org/email',
'author.name': 'http://schema.org/name',
'contributor': 'http://schema.org/contributor',
'keywords': 'http://schema.org/keywords',
'license': 'http://schema.org/license',
'version': 'http://schema.org/version',
'description': 'http://schema.org/description',
'name': 'http://schema.org/name',
'bugs': 'https://codemeta.github.io/terms/issueTracker',
'homepage': 'http://schema.org/url'
})
def test_compute_metadata_none(self):
"""
testing content empty content is empty
should return None
"""
# given
content = b""
# None if no metadata was found or an error occurred
declared_metadata = None
# when
result = MAPPINGS["NpmMapping"].translate(content)
# then
self.assertEqual(declared_metadata, result)
def test_compute_metadata_npm(self):
"""
testing only computation of metadata with hard_mapping_npm
"""
# given
content = b"""
{
"name": "test_metadata",
"version": "0.0.2",
"description": "Simple package.json test for indexer",
"repository": {
"type": "git",
"url": "https://github.com/moranegg/metadata_test"
},
"author": {
"email": "moranegg@example.com",
"name": "Morane G"
}
}
"""
declared_metadata = {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
'name': 'test_metadata',
'version': '0.0.2',
'description': 'Simple package.json test for indexer',
'codeRepository':
'git+https://github.com/moranegg/metadata_test',
'author': [{
'type': 'Person',
'name': 'Morane G',
'email': 'moranegg@example.com',
}],
}
# when
result = MAPPINGS["NpmMapping"].translate(content)
# then
self.assertEqual(declared_metadata, result)
def test_extract_minimal_metadata_dict(self):
"""
Test the creation of a coherent minimal metadata set
"""
# given
metadata_list = [{
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'name': 'test_1',
'version': '0.0.2',
'description': 'Simple package.json test for indexer',
'codeRepository':
'git+https://github.com/moranegg/metadata_test',
}, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'name': 'test_0_1',
'version': '0.0.2',
'description': 'Simple package.json test for indexer',
'codeRepository':
'git+https://github.com/moranegg/metadata_test'
}, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'name': 'test_metadata',
'version': '0.0.2',
'author': 'moranegg',
}]
# when
results = extract_minimal_metadata_dict(metadata_list)
# then
expected_results = {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
"version": '0.0.2',
"description": 'Simple package.json test for indexer',
"name": ['test_1', 'test_0_1', 'test_metadata'],
"author": ['moranegg'],
"codeRepository":
'git+https://github.com/moranegg/metadata_test',
}
self.assertEqual(expected_results, results)
def test_index_content_metadata_npm(self):
"""
testing NPM with package.json
- one sha1 uses a file that can't be translated to metadata and
should return None in the translated metadata
"""
# given
sha1s = [
hash_to_bytes('26a9f72a7c87cc9205725cfd879f514ff4f3d8d5'),
hash_to_bytes('d4c647f0fc257591cc9ba1722484229780d1c607'),
hash_to_bytes('02fb2c89e14f7fab46701478c83779c7beb7b069'),
]
# this metadata indexer computes only metadata for package.json
# in npm context with a hard mapping
metadata_indexer = ContentMetadataTestIndexer(
tool=TRANSLATOR_TOOL, config=BASE_TEST_CONFIG.copy())
fill_obj_storage(metadata_indexer.objstorage)
fill_storage(metadata_indexer.storage)
# when
metadata_indexer.run(sha1s, policy_update='ignore-dups')
results = list(metadata_indexer.idx_storage.content_metadata_get(
sha1s))
expected_results = [{
'translated_metadata': {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
'codeRepository':
'git+https://github.com/moranegg/metadata_test',
'description': 'Simple package.json test for indexer',
'name': 'test_metadata',
'version': '0.0.1'
},
'id': hash_to_bytes('26a9f72a7c87cc9205725cfd879f514ff4f3d8d5')
}, {
'translated_metadata': {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
'issueTracker':
'https://github.com/npm/npm/issues',
'author': [{
'type': 'Person',
'name': 'Isaac Z. Schlueter',
'email': 'i@izs.me',
'url': 'http://blog.izs.me',
}],
'codeRepository':
'git+https://github.com/npm/npm',
'description': 'a package manager for JavaScript',
'license': 'https://spdx.org/licenses/Artistic-2.0',
'version': '5.0.3',
'name': 'npm',
'keywords': [
'install',
'modules',
'package manager',
'package.json'
],
'url': 'https://docs.npmjs.com/'
},
'id': hash_to_bytes('d4c647f0fc257591cc9ba1722484229780d1c607')
}, {
'translated_metadata': None,
'id': hash_to_bytes('02fb2c89e14f7fab46701478c83779c7beb7b069')
}]
for result in results:
del result['tool']
# The assertion below returns False sometimes because of nested lists
self.assertEqual(expected_results, results)
def test_detect_metadata_package_json(self):
# given
df = [{
'sha1_git': b'abc',
'name': b'index.js',
'target': b'abc',
'length': 897,
'status': 'visible',
'type': 'file',
'perms': 33188,
'dir_id': b'dir_a',
'sha1': b'bcd'
},
{
'sha1_git': b'aab',
'name': b'package.json',
'target': b'aab',
'length': 712,
'status': 'visible',
'type': 'file',
'perms': 33188,
'dir_id': b'dir_a',
'sha1': b'cde'
}]
# when
results = detect_metadata(df)
expected_results = {
'NpmMapping': [
b'cde'
]
}
# then
self.assertEqual(expected_results, results)
def test_compute_metadata_valid_codemeta(self):
raw_content = (
b"""{
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
"@type": "SoftwareSourceCode",
"identifier": "CodeMeta",
"description": "CodeMeta is a concept vocabulary that can be used to standardize the exchange of software metadata across repositories and organizations.",
"name": "CodeMeta: Minimal metadata schemas for science software and code, in JSON-LD",
"codeRepository": "https://github.com/codemeta/codemeta",
"issueTracker": "https://github.com/codemeta/codemeta/issues",
"license": "https://spdx.org/licenses/Apache-2.0",
"version": "2.0",
"author": [
{
"@type": "Person",
"givenName": "Carl",
"familyName": "Boettiger",
"email": "cboettig@gmail.com",
"@id": "http://orcid.org/0000-0002-1642-628X"
},
{
"@type": "Person",
"givenName": "Matthew B.",
"familyName": "Jones",
"email": "jones@nceas.ucsb.edu",
"@id": "http://orcid.org/0000-0003-0077-4738"
}
],
"maintainer": {
"@type": "Person",
"givenName": "Carl",
"familyName": "Boettiger",
"email": "cboettig@gmail.com",
"@id": "http://orcid.org/0000-0002-1642-628X"
},
"contIntegration": "https://travis-ci.org/codemeta/codemeta",
"developmentStatus": "active",
"downloadUrl": "https://github.com/codemeta/codemeta/archive/2.0.zip",
"funder": {
"@id": "https://doi.org/10.13039/100000001",
"@type": "Organization",
"name": "National Science Foundation"
},
"funding":"1549758; Codemeta: A Rosetta Stone for Metadata in Scientific Software",
"keywords": [
"metadata",
"software"
],
"version":"2.0",
"dateCreated":"2017-06-05",
"datePublished":"2017-06-05",
"programmingLanguage": "JSON-LD"
}""") # noqa
expected_result = {
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
"type": "SoftwareSourceCode",
"identifier": "CodeMeta",
"description":
"CodeMeta is a concept vocabulary that can "
"be used to standardize the exchange of software metadata "
"across repositories and organizations.",
"name":
"CodeMeta: Minimal metadata schemas for science "
"software and code, in JSON-LD",
"codeRepository": "https://github.com/codemeta/codemeta",
"issueTracker": "https://github.com/codemeta/codemeta/issues",
"license": "https://spdx.org/licenses/Apache-2.0",
"version": "2.0",
"author": [
{
"type": "Person",
"givenName": "Carl",
"familyName": "Boettiger",
"email": "cboettig@gmail.com",
"id": "http://orcid.org/0000-0002-1642-628X"
},
{
"type": "Person",
"givenName": "Matthew B.",
"familyName": "Jones",
"email": "jones@nceas.ucsb.edu",
"id": "http://orcid.org/0000-0003-0077-4738"
}
],
"maintainer": {
"type": "Person",
"givenName": "Carl",
"familyName": "Boettiger",
"email": "cboettig@gmail.com",
"id": "http://orcid.org/0000-0002-1642-628X"
},
"contIntegration": "https://travis-ci.org/codemeta/codemeta",
"developmentStatus": "active",
"downloadUrl":
"https://github.com/codemeta/codemeta/archive/2.0.zip",
"funder": {
"id": "https://doi.org/10.13039/100000001",
"type": "Organization",
"name": "National Science Foundation"
},
"funding": "1549758; Codemeta: A Rosetta Stone for Metadata "
"in Scientific Software",
"keywords": [
"metadata",
"software"
],
"version": "2.0",
"dateCreated": "2017-06-05",
"datePublished": "2017-06-05",
"programmingLanguage": "JSON-LD"
}
result = MAPPINGS["CodemetaMapping"].translate(raw_content)
self.assertEqual(result, expected_result)
def test_compute_metadata_maven(self):
raw_content = b"""
<project>
<name>Maven Default Project</name>
<modelVersion>4.0.0</modelVersion>
<groupId>com.mycompany.app</groupId>
<artifactId>my-app</artifactId>
<version>1.2.3</version>
<repositories>
<repository>
<id>central</id>
<name>Maven Repository Switchboard</name>
<layout>default</layout>
<url>http://repo1.maven.org/maven2</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>
+ <licenses>
+ <license>
+ <name>Apache License, Version 2.0</name>
+ <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments>A business-friendly OSS license</comments>
+ </license>
+ </licenses>
</project>"""
result = MAPPINGS["MavenMapping"].translate(raw_content)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
'name': 'Maven Default Project',
'identifier': 'com.mycompany.app',
'version': '1.2.3',
+ 'license': 'https://www.apache.org/licenses/LICENSE-2.0.txt',
'codeRepository':
'http://repo1.maven.org/maven2/com/mycompany/app/my-app',
- })
+ })
+
+ def test_compute_metadata_maven_minimal(self):
+ raw_content = b"""
+ <project>
+ <name>Maven Default Project</name>
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>com.mycompany.app</groupId>
+ <artifactId>my-app</artifactId>
+ <version>1.2.3</version>
+ </project>"""
+ result = MAPPINGS["MavenMapping"].translate(raw_content)
+ self.assertEqual(result, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'type': 'SoftwareSourceCode',
+ 'name': 'Maven Default Project',
+ 'identifier': 'com.mycompany.app',
+ 'version': '1.2.3',
+ 'codeRepository':
+ 'https://repo.maven.apache.org/maven2/com/mycompany/app/my-app',
+ 'license': [],
+ })
+
+ def test_compute_metadata_maven_multiple(self):
+ '''Tests when there are multiple code repos and licenses.'''
+ raw_content = b"""
+ <project>
+ <name>Maven Default Project</name>
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>com.mycompany.app</groupId>
+ <artifactId>my-app</artifactId>
+ <version>1.2.3</version>
+ <repositories>
+ <repository>
+ <id>central</id>
+ <name>Maven Repository Switchboard</name>
+ <layout>default</layout>
+ <url>http://repo1.maven.org/maven2</url>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </repository>
+ <repository>
+ <id>example</id>
+ <name>Example Maven Repo</name>
+ <layout>default</layout>
+ <url>http://example.org/maven2</url>
+ </repository>
+ </repositories>
+ <licenses>
+ <license>
+ <name>Apache License, Version 2.0</name>
+ <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments>A business-friendly OSS license</comments>
+ </license>
+ <license>
+ <name>MIT license</name>
+ <url>https://opensource.org/licenses/MIT</url>
+ </license>
+ </licenses>
+ </project>"""
+ result = MAPPINGS["MavenMapping"].translate(raw_content)
+ self.assertEqual(result, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'type': 'SoftwareSourceCode',
+ 'name': 'Maven Default Project',
+ 'identifier': 'com.mycompany.app',
+ 'version': '1.2.3',
+ 'license': [
+ 'https://www.apache.org/licenses/LICENSE-2.0.txt',
+ 'https://opensource.org/licenses/MIT',
+ ],
+ 'codeRepository': [
+ 'http://repo1.maven.org/maven2/com/mycompany/app/my-app',
+ 'http://example.org/maven2/com/mycompany/app/my-app',
+ ]
+ })
def test_revision_metadata_indexer(self):
metadata_indexer = RevisionMetadataTestIndexer()
fill_obj_storage(metadata_indexer.objstorage)
fill_storage(metadata_indexer.storage)
tool = metadata_indexer.idx_storage.indexer_configuration_get(
{'tool_'+k: v for (k, v) in TRANSLATOR_TOOL.items()})
assert tool is not None
metadata_indexer.idx_storage.content_metadata_add([{
'indexer_configuration_id': tool['id'],
'id': b'cde',
'translated_metadata': {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
'issueTracker':
'https://github.com/librariesio/yarn-parser/issues',
'version': '1.0.0',
'name': 'yarn-parser',
'author': ['Andrew Nesbitt'],
'url':
'https://github.com/librariesio/yarn-parser#readme',
'processorRequirements': {'node': '7.5'},
'license': 'AGPL-3.0',
'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
'codeRepository':
'git+https://github.com/librariesio/yarn-parser.git',
'description':
'Tiny web service for parsing yarn.lock files',
}
}])
sha1_gits = [
hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
]
metadata_indexer.run(sha1_gits, 'update-dups')
results = list(metadata_indexer.idx_storage.revision_metadata_get(
sha1_gits))
expected_results = [{
'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
'tool': TRANSLATOR_TOOL,
'translated_metadata': {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'url':
'https://github.com/librariesio/yarn-parser#readme',
'codeRepository':
'git+https://github.com/librariesio/yarn-parser.git',
'author': ['Andrew Nesbitt'],
'license': 'AGPL-3.0',
'version': '1.0.0',
'description':
'Tiny web service for parsing yarn.lock files',
'issueTracker':
'https://github.com/librariesio/yarn-parser/issues',
'name': 'yarn-parser',
'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
},
}]
for result in results:
del result['tool']['id']
# then
self.assertEqual(expected_results, results)
diff --git a/tox.ini b/tox.ini
index a2d8b63..8bc693c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,33 +1,33 @@
[tox]
envlist=flake8,py3
[testenv:py3]
deps =
.[testing]
pytest-cov
pifpaf
commands =
- pifpaf run postgresql -- pytest --hypothesis-profile=fast --cov=swh --cov-branch {posargs}
+ pifpaf run postgresql -- pytest --doctest-modules --hypothesis-profile=fast --cov=swh --cov-branch {posargs}
[testenv:py3-slow]
deps =
.[testing]
pytest-cov
pifpaf
commands =
- pifpaf run postgresql -- pytest --hypothesis-profile=slow --cov=swh --cov-branch {posargs}
+ pifpaf run postgresql -- pytest --doctest-modules --hypothesis-profile=slow --cov=swh --cov-branch {posargs}
[testenv:py3-prop]
deps =
.[testing]
pytest-cov
pifpaf
commands =
- pifpaf run postgresql -- pytest --hypothesis-profile=fast -m property_based --disable-warnings
+ pifpaf run postgresql -- pytest --doctest-modules --hypothesis-profile=fast -m property_based --disable-warnings
[testenv:flake8]
skip_install = true
deps =
flake8
commands =
{envpython} -m flake8

File Metadata

Mime Type
text/x-diff
Expires
Jul 4 2025, 10:32 AM (4 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3212363

Event Timeline