No OneTemporary
Actions

Size

34 KB

Subscribers

None

View Options

	diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
	index 7141c79..f4a6edc 100644
	--- a/swh/indexer/metadata_dictionary.py
	+++ b/swh/indexer/metadata_dictionary.py
	@@ -1,295 +1,352 @@
	# Copyright (C) 2017 The Software Heritage developers
	# See the AUTHORS file at the top-level directory of this distribution
	# License: GNU General Public License version 3, or any later version
	# See top-level LICENSE file for more information

	import os
	import re
	import abc
	import json
	import logging
	import xmltodict

	from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
	from swh.indexer.codemeta import compact, expand


	MAPPINGS = {}


	def register_mapping(cls):
	MAPPINGS[cls.__name__] = cls()
	return cls


	class BaseMapping(metaclass=abc.ABCMeta):
	"""Base class for mappings to inherit from

	To implement a new mapping:

	- inherit this class
	- override translate function
	"""
	def __init__(self):
	self.log = logging.getLogger('%s.%s' % (
	self.__class__.__module__,
	self.__class__.__name__))

	@abc.abstractmethod
	def detect_metadata_files(self, files):
	"""
	Detects files potentially containing metadata

	Args:
	file_entries (list): list of files

	Returns:
	list: list of sha1 (possibly empty)
	"""
	pass

	@abc.abstractmethod
	def translate(self, file_content):
	pass

	def normalize_translation(self, metadata):
	return compact(metadata)


	class SingleFileMapping(BaseMapping):
	"""Base class for all mappings that use a single file as input."""

	@property
	@abc.abstractmethod
	def filename(self):
	"""The .json file to extract metadata from."""
	pass

	def detect_metadata_files(self, file_entries):
	for entry in file_entries:
	if entry['name'] == self.filename:
	return [entry['sha1']]
	return []


	class DictMapping(BaseMapping):
	"""Base class for mappings that take as input a file that is mostly
	a key-value store (eg. a shallow JSON dict)."""

	@property
	@abc.abstractmethod
	def mapping(self):
	"""A translation dict to map dict keys into a canonical name."""
	pass

	def translate_dict(self, content_dict, *, normalize=True):
	"""
	Translates content by parsing content from a dict object
	and translating with the appropriate mapping

	Args:
	content_dict (dict): content dict to translate

	Returns:
	dict: translated metadata in json-friendly form needed for
	the indexer

	"""
	translated_metadata = {'@type': SCHEMA_URI + 'SoftwareSourceCode'}
	for k, v in content_dict.items():
	# First, check if there is a specific translation
	# method for this key
	translation_method = getattr(self, 'translate_' + k, None)
	if translation_method:
	translation_method(translated_metadata, v)
	elif k in self.mapping:
	# if there is no method, but the key is known from the
	# crosswalk table

	# if there is a normalization method, use it on the value
	normalization_method = getattr(self, 'normalize_' + k, None)
	if normalization_method:
	v = normalization_method(v)

	# set the translation metadata with the normalized value
	translated_metadata[self.mapping[k]] = v
	if normalize:
	return self.normalize_translation(translated_metadata)
	else:
	return translated_metadata


	class JsonMapping(DictMapping, SingleFileMapping):
	"""Base class for all mappings that use a JSON file as input."""

	def translate(self, raw_content):
	"""
	Translates content by parsing content from a bytestring containing
	json data and translating with the appropriate mapping

	Args:
	raw_content (bytes): raw content to translate

	Returns:
	dict: translated metadata in json-friendly form needed for
	the indexer

	"""
	try:
	raw_content = raw_content.decode()
	except UnicodeDecodeError:
	self.log.warning('Error unidecoding %r', raw_content)
	return
	try:
	content_dict = json.loads(raw_content)
	except json.JSONDecodeError:
	self.log.warning('Error unjsoning %r' % raw_content)
	return
	return self.translate_dict(content_dict)


	@register_mapping
	class NpmMapping(JsonMapping):
	"""
	dedicated class for NPM (package.json) mapping and translation
	"""
	mapping = CROSSWALK_TABLE['NodeJS']
	filename = b'package.json'

	_schema_shortcuts = {
	'github': 'https://github.com/',
	'gist': 'https://gist.github.com/',
	'bitbucket': 'https://bitbucket.org/',
	'gitlab': 'https://gitlab.com/',
	}

	def normalize_repository(self, d):
	"""https://docs.npmjs.com/files/package.json#repository"""
	if isinstance(d, dict):
	url = '{type}+{url}'.format(**d)
	elif isinstance(d, str):
	if '://' in d:
	url = d
	elif ':' in d:
	(schema, rest) = d.split(':', 1)
	if schema in self._schema_shortcuts:
	url = self._schema_shortcuts[schema] + rest
	else:
	return None
	else:
	url = self._schema_shortcuts['github'] + d

	else:
	return None

	return {'@id': url}

	def normalize_bugs(self, d):
	return {'@id': '{url}'.format(**d)}

	_parse_author = re.compile(r'^ *'
	r'(?P<name>.*?)'
	r'( +<(?P<email>.*)>)?'
	r'( +$(?P<url>.*)$)?'
	r' *$')

	def normalize_author(self, d):
	'https://docs.npmjs.com/files/package.json' \
	'#people-fields-author-contributors'
	author = {'@type': SCHEMA_URI+'Person'}
	if isinstance(d, dict):
	name = d.get('name', None)
	email = d.get('email', None)
	url = d.get('url', None)
	elif isinstance(d, str):
	match = self._parse_author.match(d)
	name = match.group('name')
	email = match.group('email')
	url = match.group('url')
	else:
	return None
	if name:
	author[SCHEMA_URI+'name'] = name
	if email:
	author[SCHEMA_URI+'email'] = email
	if url:
	author[SCHEMA_URI+'url'] = {'@id': url}
	return {"@list": [author]}

	def normalize_license(self, s):
	return {"@id": "https://spdx.org/licenses/" + s}

	def normalize_homepage(self, s):
	return {"@id": s}


	@register_mapping
	class CodemetaMapping(SingleFileMapping):
	"""
	dedicated class for CodeMeta (codemeta.json) mapping and translation
	"""
	filename = b'codemeta.json'

	def translate(self, content):
	return self.normalize_translation(expand(json.loads(content.decode())))


	@register_mapping
	class MavenMapping(DictMapping, SingleFileMapping):
	"""
	dedicated class for Maven (pom.xml) mapping and translation
	"""
	filename = b'pom.xml'
	mapping = CROSSWALK_TABLE['Java (Maven)']

	def translate(self, content):
	d = xmltodict.parse(content)['project']
	metadata = self.translate_dict(d, normalize=False)
	metadata[SCHEMA_URI+'codeRepository'] = self.parse_repositories(d)
	+ metadata[SCHEMA_URI+'license'] = self.parse_licenses(d)
	return self.normalize_translation(metadata)

	_default_repository = {'url': 'https://repo.maven.apache.org/maven2/'}

	def parse_repositories(self, d):
	"""https://maven.apache.org/pom.html#Repositories"""
	if 'repositories' not in d:
	return [self.parse_repository(d, self._default_repository)]
	else:
	repositories = d['repositories'].get('repository', [])
	if not isinstance(repositories, list):
	repositories = [repositories]
	results = []
	for repo in repositories:
	res = self.parse_repository(d, repo)
	if res:
	results.append(res)
	return results

	def parse_repository(self, d, repo):
	if repo.get('layout', 'default') != 'default':
	return # TODO ?
	url = repo['url']
	if d['groupId']:
	url = os.path.join(url, *d['groupId'].split('.'))
	if d['artifactId']:
	url = os.path.join(url, d['artifactId'])
	return {"@id": url}

	def normalize_groupId(self, id_):
	return {"@id": id_}

	+ def parse_licenses(self, d):
	+ """https://maven.apache.org/pom.html#Licenses
	+
	+ The origin XML has the form:
	+
	+ <licenses>
	+ <license>
	+ <name>Apache License, Version 2.0</name>
	+ <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
	+ </license>
	+ </licenses>
	+
	+ Which was translated to a dict by xmltodict and is given as `d`:
	+
	+ >>> d = {
	+ ... # ...
	+ ... "licenses": {
	+ ... "license": {
	+ ... "name": "Apache License, Version 2.0",
	+ ... "url":
	+ ... "https://www.apache.org/licenses/LICENSE-2.0.txt"
	+ ... }
	+ ... }
	+ ... }
	+ >>> MavenMapping().parse_licenses(d)
	+ [{'@id': 'https://www.apache.org/licenses/LICENSE-2.0.txt'}]
	+
	+ or, if there are more than one license:
	+
	+ >>> from pprint import pprint
	+ >>> d = {
	+ ... # ...
	+ ... "licenses": {
	+ ... "license": [
	+ ... {
	+ ... "name": "Apache License, Version 2.0",
	+ ... "url":
	+ ... "https://www.apache.org/licenses/LICENSE-2.0.txt"
	+ ... },
	+ ... {
	+ ... "name": "MIT License, ",
	+ ... "url": "https://opensource.org/licenses/MIT"
	+ ... }
	+ ... ]
	+ ... }
	+ ... }
	+ >>> pprint(MavenMapping().parse_licenses(d))
	+ [{'@id': 'https://www.apache.org/licenses/LICENSE-2.0.txt'},
	+ {'@id': 'https://opensource.org/licenses/MIT'}]
	+ """
	+
	+ licenses = d.get('licenses', {}).get('license', [])
	+ if isinstance(licenses, dict):
	+ licenses = [licenses]
	+ return [{"@id": license['url']} for license in licenses]
	+

	def main():
	raw_content = """{"name": "test_name", "unknown_term": "ut"}"""
	raw_content1 = b"""{"name": "test_name",
	"unknown_term": "ut",
	"prerequisites" :"packageXYZ"}"""
	result = MAPPINGS["NpmMapping"].translate(raw_content)
	result1 = MAPPINGS["MavenMapping"].translate(raw_content1)

	print(result)
	print(result1)


	if __name__ == "__main__":
	main()
	diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
	index f6f8878..bcc23aa 100644
	--- a/swh/indexer/tests/test_metadata.py
	+++ b/swh/indexer/tests/test_metadata.py
	@@ -1,499 +1,585 @@
	# Copyright (C) 2017-2018 The Software Heritage developers
	# See the AUTHORS file at the top-level directory of this distribution
	# License: GNU General Public License version 3, or any later version
	# See top-level LICENSE file for more information

	import unittest

	from swh.model.hashutil import hash_to_bytes

	from swh.indexer.metadata_dictionary import CROSSWALK_TABLE, MAPPINGS
	from swh.indexer.metadata_detector import (
	detect_metadata, extract_minimal_metadata_dict
	)
	from swh.indexer.metadata import (
	ContentMetadataIndexer, RevisionMetadataIndexer
	)

	from .test_utils import (
	BASE_TEST_CONFIG, fill_obj_storage, fill_storage
	)


	TRANSLATOR_TOOL = {
	'name': 'swh-metadata-translator',
	'version': '0.0.2',
	'configuration': {
	'type': 'local',
	'context': 'NpmMapping'
	}
	}


	class ContentMetadataTestIndexer(ContentMetadataIndexer):
	"""Specific Metadata whose configuration is enough to satisfy the
	indexing tests.
	"""
	def parse_config_file(self, args, *kwargs):
	assert False, 'should not be called; the rev indexer configures it.'


	class RevisionMetadataTestIndexer(RevisionMetadataIndexer):
	"""Specific indexer whose configuration is enough to satisfy the
	indexing tests.
	"""

	ContentMetadataIndexer = ContentMetadataTestIndexer

	def parse_config_file(self, args, *kwargs):
	return {
	**BASE_TEST_CONFIG,
	'tools': TRANSLATOR_TOOL,
	}


	class Metadata(unittest.TestCase):
	"""
	Tests metadata_mock_tool tool for Metadata detection
	"""
	def setUp(self):
	"""
	shows the entire diff in the results
	"""
	self.maxDiff = None

	def test_crosstable(self):
	self.assertEqual(CROSSWALK_TABLE['NodeJS'], {
	'repository': 'http://schema.org/codeRepository',
	'os': 'http://schema.org/operatingSystem',
	'cpu': 'http://schema.org/processorRequirements',
	'engines':
	'http://schema.org/processorRequirements',
	'author': 'http://schema.org/author',
	'author.email': 'http://schema.org/email',
	'author.name': 'http://schema.org/name',
	'contributor': 'http://schema.org/contributor',
	'keywords': 'http://schema.org/keywords',
	'license': 'http://schema.org/license',
	'version': 'http://schema.org/version',
	'description': 'http://schema.org/description',
	'name': 'http://schema.org/name',
	'bugs': 'https://codemeta.github.io/terms/issueTracker',
	'homepage': 'http://schema.org/url'
	})

	def test_compute_metadata_none(self):
	"""
	testing content empty content is empty
	should return None
	"""
	# given
	content = b""

	# None if no metadata was found or an error occurred
	declared_metadata = None
	# when
	result = MAPPINGS["NpmMapping"].translate(content)
	# then
	self.assertEqual(declared_metadata, result)

	def test_compute_metadata_npm(self):
	"""
	testing only computation of metadata with hard_mapping_npm
	"""
	# given
	content = b"""
	{
	"name": "test_metadata",
	"version": "0.0.2",
	"description": "Simple package.json test for indexer",
	"repository": {
	"type": "git",
	"url": "https://github.com/moranegg/metadata_test"
	},
	"author": {
	"email": "moranegg@example.com",
	"name": "Morane G"
	}
	}
	"""
	declared_metadata = {
	'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
	'type': 'SoftwareSourceCode',
	'name': 'test_metadata',
	'version': '0.0.2',
	'description': 'Simple package.json test for indexer',
	'codeRepository':
	'git+https://github.com/moranegg/metadata_test',
	'author': [{
	'type': 'Person',
	'name': 'Morane G',
	'email': 'moranegg@example.com',
	}],
	}

	# when
	result = MAPPINGS["NpmMapping"].translate(content)
	# then
	self.assertEqual(declared_metadata, result)

	def test_extract_minimal_metadata_dict(self):
	"""
	Test the creation of a coherent minimal metadata set
	"""
	# given
	metadata_list = [{
	'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
	'name': 'test_1',
	'version': '0.0.2',
	'description': 'Simple package.json test for indexer',
	'codeRepository':
	'git+https://github.com/moranegg/metadata_test',
	}, {
	'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
	'name': 'test_0_1',
	'version': '0.0.2',
	'description': 'Simple package.json test for indexer',
	'codeRepository':
	'git+https://github.com/moranegg/metadata_test'
	}, {
	'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
	'name': 'test_metadata',
	'version': '0.0.2',
	'author': 'moranegg',
	}]

	# when
	results = extract_minimal_metadata_dict(metadata_list)

	# then
	expected_results = {
	'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
	"version": '0.0.2',
	"description": 'Simple package.json test for indexer',
	"name": ['test_1', 'test_0_1', 'test_metadata'],
	"author": ['moranegg'],
	"codeRepository":
	'git+https://github.com/moranegg/metadata_test',
	}
	self.assertEqual(expected_results, results)

	def test_index_content_metadata_npm(self):
	"""
	testing NPM with package.json
	- one sha1 uses a file that can't be translated to metadata and
	should return None in the translated metadata
	"""
	# given
	sha1s = [
	hash_to_bytes('26a9f72a7c87cc9205725cfd879f514ff4f3d8d5'),
	hash_to_bytes('d4c647f0fc257591cc9ba1722484229780d1c607'),
	hash_to_bytes('02fb2c89e14f7fab46701478c83779c7beb7b069'),
	]
	# this metadata indexer computes only metadata for package.json
	# in npm context with a hard mapping
	metadata_indexer = ContentMetadataTestIndexer(
	tool=TRANSLATOR_TOOL, config=BASE_TEST_CONFIG.copy())
	fill_obj_storage(metadata_indexer.objstorage)
	fill_storage(metadata_indexer.storage)

	# when
	metadata_indexer.run(sha1s, policy_update='ignore-dups')
	results = list(metadata_indexer.idx_storage.content_metadata_get(
	sha1s))

	expected_results = [{
	'translated_metadata': {
	'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
	'type': 'SoftwareSourceCode',
	'codeRepository':
	'git+https://github.com/moranegg/metadata_test',
	'description': 'Simple package.json test for indexer',
	'name': 'test_metadata',
	'version': '0.0.1'
	},
	'id': hash_to_bytes('26a9f72a7c87cc9205725cfd879f514ff4f3d8d5')
	}, {
	'translated_metadata': {
	'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
	'type': 'SoftwareSourceCode',
	'issueTracker':
	'https://github.com/npm/npm/issues',
	'author': [{
	'type': 'Person',
	'name': 'Isaac Z. Schlueter',
	'email': 'i@izs.me',
	'url': 'http://blog.izs.me',
	}],
	'codeRepository':
	'git+https://github.com/npm/npm',
	'description': 'a package manager for JavaScript',
	'license': 'https://spdx.org/licenses/Artistic-2.0',
	'version': '5.0.3',
	'name': 'npm',
	'keywords': [
	'install',
	'modules',
	'package manager',
	'package.json'
	],
	'url': 'https://docs.npmjs.com/'
	},
	'id': hash_to_bytes('d4c647f0fc257591cc9ba1722484229780d1c607')
	}, {
	'translated_metadata': None,
	'id': hash_to_bytes('02fb2c89e14f7fab46701478c83779c7beb7b069')
	}]

	for result in results:
	del result['tool']

	# The assertion below returns False sometimes because of nested lists
	self.assertEqual(expected_results, results)

	def test_detect_metadata_package_json(self):
	# given
	df = [{
	'sha1_git': b'abc',
	'name': b'index.js',
	'target': b'abc',
	'length': 897,
	'status': 'visible',
	'type': 'file',
	'perms': 33188,
	'dir_id': b'dir_a',
	'sha1': b'bcd'
	},
	{
	'sha1_git': b'aab',
	'name': b'package.json',
	'target': b'aab',
	'length': 712,
	'status': 'visible',
	'type': 'file',
	'perms': 33188,
	'dir_id': b'dir_a',
	'sha1': b'cde'
	}]
	# when
	results = detect_metadata(df)

	expected_results = {
	'NpmMapping': [
	b'cde'
	]
	}
	# then
	self.assertEqual(expected_results, results)

	def test_compute_metadata_valid_codemeta(self):
	raw_content = (
	b"""{
	"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
	"@type": "SoftwareSourceCode",
	"identifier": "CodeMeta",
	"description": "CodeMeta is a concept vocabulary that can be used to standardize the exchange of software metadata across repositories and organizations.",
	"name": "CodeMeta: Minimal metadata schemas for science software and code, in JSON-LD",
	"codeRepository": "https://github.com/codemeta/codemeta",
	"issueTracker": "https://github.com/codemeta/codemeta/issues",
	"license": "https://spdx.org/licenses/Apache-2.0",
	"version": "2.0",
	"author": [
	{
	"@type": "Person",
	"givenName": "Carl",
	"familyName": "Boettiger",
	"email": "cboettig@gmail.com",
	"@id": "http://orcid.org/0000-0002-1642-628X"
	},
	{
	"@type": "Person",
	"givenName": "Matthew B.",
	"familyName": "Jones",
	"email": "jones@nceas.ucsb.edu",
	"@id": "http://orcid.org/0000-0003-0077-4738"
	}
	],
	"maintainer": {
	"@type": "Person",
	"givenName": "Carl",
	"familyName": "Boettiger",
	"email": "cboettig@gmail.com",
	"@id": "http://orcid.org/0000-0002-1642-628X"
	},
	"contIntegration": "https://travis-ci.org/codemeta/codemeta",
	"developmentStatus": "active",
	"downloadUrl": "https://github.com/codemeta/codemeta/archive/2.0.zip",
	"funder": {
	"@id": "https://doi.org/10.13039/100000001",
	"@type": "Organization",
	"name": "National Science Foundation"
	},
	"funding":"1549758; Codemeta: A Rosetta Stone for Metadata in Scientific Software",
	"keywords": [
	"metadata",
	"software"
	],
	"version":"2.0",
	"dateCreated":"2017-06-05",
	"datePublished":"2017-06-05",
	"programmingLanguage": "JSON-LD"
	}""") # noqa
	expected_result = {
	"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
	"type": "SoftwareSourceCode",
	"identifier": "CodeMeta",
	"description":
	"CodeMeta is a concept vocabulary that can "
	"be used to standardize the exchange of software metadata "
	"across repositories and organizations.",
	"name":
	"CodeMeta: Minimal metadata schemas for science "
	"software and code, in JSON-LD",
	"codeRepository": "https://github.com/codemeta/codemeta",
	"issueTracker": "https://github.com/codemeta/codemeta/issues",
	"license": "https://spdx.org/licenses/Apache-2.0",
	"version": "2.0",
	"author": [
	{
	"type": "Person",
	"givenName": "Carl",
	"familyName": "Boettiger",
	"email": "cboettig@gmail.com",
	"id": "http://orcid.org/0000-0002-1642-628X"
	},
	{
	"type": "Person",
	"givenName": "Matthew B.",
	"familyName": "Jones",
	"email": "jones@nceas.ucsb.edu",
	"id": "http://orcid.org/0000-0003-0077-4738"
	}
	],
	"maintainer": {
	"type": "Person",
	"givenName": "Carl",
	"familyName": "Boettiger",
	"email": "cboettig@gmail.com",
	"id": "http://orcid.org/0000-0002-1642-628X"
	},
	"contIntegration": "https://travis-ci.org/codemeta/codemeta",
	"developmentStatus": "active",
	"downloadUrl":
	"https://github.com/codemeta/codemeta/archive/2.0.zip",
	"funder": {
	"id": "https://doi.org/10.13039/100000001",
	"type": "Organization",
	"name": "National Science Foundation"
	},
	"funding": "1549758; Codemeta: A Rosetta Stone for Metadata "
	"in Scientific Software",
	"keywords": [
	"metadata",
	"software"
	],
	"version": "2.0",
	"dateCreated": "2017-06-05",
	"datePublished": "2017-06-05",
	"programmingLanguage": "JSON-LD"
	}
	result = MAPPINGS["CodemetaMapping"].translate(raw_content)
	self.assertEqual(result, expected_result)

	def test_compute_metadata_maven(self):
	raw_content = b"""
	<project>
	<name>Maven Default Project</name>
	<modelVersion>4.0.0</modelVersion>
	<groupId>com.mycompany.app</groupId>
	<artifactId>my-app</artifactId>
	<version>1.2.3</version>
	<repositories>
	<repository>
	<id>central</id>
	<name>Maven Repository Switchboard</name>
	<layout>default</layout>
	<url>http://repo1.maven.org/maven2</url>
	<snapshots>
	<enabled>false</enabled>
	</snapshots>
	</repository>
	</repositories>
	+ <licenses>
	+ <license>
	+ <name>Apache License, Version 2.0</name>
	+ <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
	+ <distribution>repo</distribution>
	+ <comments>A business-friendly OSS license</comments>
	+ </license>
	+ </licenses>
	</project>"""
	result = MAPPINGS["MavenMapping"].translate(raw_content)
	self.assertEqual(result, {
	'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
	'type': 'SoftwareSourceCode',
	'name': 'Maven Default Project',
	'identifier': 'com.mycompany.app',
	'version': '1.2.3',
	+ 'license': 'https://www.apache.org/licenses/LICENSE-2.0.txt',
	'codeRepository':
	'http://repo1.maven.org/maven2/com/mycompany/app/my-app',
	- })
	+ })
	+
	+ def test_compute_metadata_maven_minimal(self):
	+ raw_content = b"""
	+ <project>
	+ <name>Maven Default Project</name>
	+ <modelVersion>4.0.0</modelVersion>
	+ <groupId>com.mycompany.app</groupId>
	+ <artifactId>my-app</artifactId>
	+ <version>1.2.3</version>
	+ </project>"""
	+ result = MAPPINGS["MavenMapping"].translate(raw_content)
	+ self.assertEqual(result, {
	+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
	+ 'type': 'SoftwareSourceCode',
	+ 'name': 'Maven Default Project',
	+ 'identifier': 'com.mycompany.app',
	+ 'version': '1.2.3',
	+ 'codeRepository':
	+ 'https://repo.maven.apache.org/maven2/com/mycompany/app/my-app',
	+ 'license': [],
	+ })
	+
	+ def test_compute_metadata_maven_multiple(self):
	+ '''Tests when there are multiple code repos and licenses.'''
	+ raw_content = b"""
	+ <project>
	+ <name>Maven Default Project</name>
	+ <modelVersion>4.0.0</modelVersion>
	+ <groupId>com.mycompany.app</groupId>
	+ <artifactId>my-app</artifactId>
	+ <version>1.2.3</version>
	+ <repositories>
	+ <repository>
	+ <id>central</id>
	+ <name>Maven Repository Switchboard</name>
	+ <layout>default</layout>
	+ <url>http://repo1.maven.org/maven2</url>
	+ <snapshots>
	+ <enabled>false</enabled>
	+ </snapshots>
	+ </repository>
	+ <repository>
	+ <id>example</id>
	+ <name>Example Maven Repo</name>
	+ <layout>default</layout>
	+ <url>http://example.org/maven2</url>
	+ </repository>
	+ </repositories>
	+ <licenses>
	+ <license>
	+ <name>Apache License, Version 2.0</name>
	+ <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
	+ <distribution>repo</distribution>
	+ <comments>A business-friendly OSS license</comments>
	+ </license>
	+ <license>
	+ <name>MIT license</name>
	+ <url>https://opensource.org/licenses/MIT</url>
	+ </license>
	+ </licenses>
	+ </project>"""
	+ result = MAPPINGS["MavenMapping"].translate(raw_content)
	+ self.assertEqual(result, {
	+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
	+ 'type': 'SoftwareSourceCode',
	+ 'name': 'Maven Default Project',
	+ 'identifier': 'com.mycompany.app',
	+ 'version': '1.2.3',
	+ 'license': [
	+ 'https://www.apache.org/licenses/LICENSE-2.0.txt',
	+ 'https://opensource.org/licenses/MIT',
	+ ],
	+ 'codeRepository': [
	+ 'http://repo1.maven.org/maven2/com/mycompany/app/my-app',
	+ 'http://example.org/maven2/com/mycompany/app/my-app',
	+ ]
	+ })

	def test_revision_metadata_indexer(self):
	metadata_indexer = RevisionMetadataTestIndexer()
	fill_obj_storage(metadata_indexer.objstorage)
	fill_storage(metadata_indexer.storage)

	tool = metadata_indexer.idx_storage.indexer_configuration_get(
	{'tool_'+k: v for (k, v) in TRANSLATOR_TOOL.items()})
	assert tool is not None

	metadata_indexer.idx_storage.content_metadata_add([{
	'indexer_configuration_id': tool['id'],
	'id': b'cde',
	'translated_metadata': {
	'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
	'type': 'SoftwareSourceCode',
	'issueTracker':
	'https://github.com/librariesio/yarn-parser/issues',
	'version': '1.0.0',
	'name': 'yarn-parser',
	'author': ['Andrew Nesbitt'],
	'url':
	'https://github.com/librariesio/yarn-parser#readme',
	'processorRequirements': {'node': '7.5'},
	'license': 'AGPL-3.0',
	'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
	'codeRepository':
	'git+https://github.com/librariesio/yarn-parser.git',
	'description':
	'Tiny web service for parsing yarn.lock files',
	}
	}])

	sha1_gits = [
	hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
	]
	metadata_indexer.run(sha1_gits, 'update-dups')

	results = list(metadata_indexer.idx_storage.revision_metadata_get(
	sha1_gits))

	expected_results = [{
	'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
	'tool': TRANSLATOR_TOOL,
	'translated_metadata': {
	'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
	'url':
	'https://github.com/librariesio/yarn-parser#readme',
	'codeRepository':
	'git+https://github.com/librariesio/yarn-parser.git',
	'author': ['Andrew Nesbitt'],
	'license': 'AGPL-3.0',
	'version': '1.0.0',
	'description':
	'Tiny web service for parsing yarn.lock files',
	'issueTracker':
	'https://github.com/librariesio/yarn-parser/issues',
	'name': 'yarn-parser',
	'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
	},
	}]

	for result in results:
	del result['tool']['id']

	# then
	self.assertEqual(expected_results, results)
	diff --git a/tox.ini b/tox.ini
	index a2d8b63..8bc693c 100644
	--- a/tox.ini
	+++ b/tox.ini
	@@ -1,33 +1,33 @@
	[tox]
	envlist=flake8,py3

	[testenv:py3]
	deps =
	.[testing]
	pytest-cov
	pifpaf
	commands =
	- pifpaf run postgresql -- pytest --hypothesis-profile=fast --cov=swh --cov-branch {posargs}
	+ pifpaf run postgresql -- pytest --doctest-modules --hypothesis-profile=fast --cov=swh --cov-branch {posargs}

	[testenv:py3-slow]
	deps =
	.[testing]
	pytest-cov
	pifpaf
	commands =
	- pifpaf run postgresql -- pytest --hypothesis-profile=slow --cov=swh --cov-branch {posargs}
	+ pifpaf run postgresql -- pytest --doctest-modules --hypothesis-profile=slow --cov=swh --cov-branch {posargs}

	[testenv:py3-prop]
	deps =
	.[testing]
	pytest-cov
	pifpaf
	commands =
	- pifpaf run postgresql -- pytest --hypothesis-profile=fast -m property_based --disable-warnings
	+ pifpaf run postgresql -- pytest --doctest-modules --hypothesis-profile=fast -m property_based --disable-warnings

	[testenv:flake8]
	skip_install = true
	deps =
	flake8
	commands =
	{envpython} -m flake8

File Metadata

Mime Type: text/x-diff
Expires: Jul 4 2025, 10:32 AM (4 w, 5 d ago)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 3212363

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions