diff --git a/swh/indexer/tests/storage/conftest.py b/swh/indexer/tests/storage/conftest.py index 5d6f03c..d2f8f9f 100644 --- a/swh/indexer/tests/storage/conftest.py +++ b/swh/indexer/tests/storage/conftest.py @@ -1,120 +1,125 @@ +# Copyright (C) 2015-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + from os.path import join import pytest from . import SQL_DIR from swh.storage.tests.conftest import postgresql_fact from swh.indexer.storage import get_indexer_storage from swh.model.hashutil import hash_to_bytes from .generate_data_test import MIMETYPE_OBJECTS, FOSSOLOGY_LICENSES DUMP_FILES = join(SQL_DIR, '*.sql') TOOLS = [ { 'tool_name': 'universal-ctags', 'tool_version': '~git7859817b', 'tool_configuration': { "command_line": "ctags --fields=+lnz --sort=no --links=no " "--output-format=json "} }, { 'tool_name': 'swh-metadata-translator', 'tool_version': '0.0.1', 'tool_configuration': {"type": "local", "context": "NpmMapping"}, }, { 'tool_name': 'swh-metadata-detector', 'tool_version': '0.0.1', 'tool_configuration': { "type": "local", "context": ["NpmMapping", "CodemetaMapping"]}, }, { 'tool_name': 'swh-metadata-detector2', 'tool_version': '0.0.1', 'tool_configuration': { "type": "local", "context": ["NpmMapping", "CodemetaMapping"]}, }, { 'tool_name': 'file', 'tool_version': '5.22', 'tool_configuration': {"command_line": "file --mime "}, }, { 'tool_name': 'pygments', 'tool_version': '2.0.1+dfsg-1.1+deb8u1', 'tool_configuration': { "type": "library", "debian-package": "python3-pygments"}, }, { 'tool_name': 'pygments2', 'tool_version': '2.0.1+dfsg-1.1+deb8u1', 'tool_configuration': { "type": "library", "debian-package": "python3-pygments", "max_content_size": 10240 }, }, { 'tool_name': 'nomos', 'tool_version': '3.1.0rc2-31-ga2cbb8c', 'tool_configuration': {"command_line": "nomossa "}, } ] class DataObj(dict): def __getattr__(self, key): return self.__getitem__(key) def __setattr__(self, key, value): return self.__setitem__(key, value) @pytest.fixture def swh_indexer_storage_with_data(swh_indexer_storage): data = DataObj() tools = { tool['tool_name']: { 'id': tool['id'], 'name': tool['tool_name'], 'version': tool['tool_version'], 'configuration': tool['tool_configuration'], } for tool in swh_indexer_storage.indexer_configuration_add(TOOLS)} data.tools = tools data.sha1_1 = hash_to_bytes( '34973274ccef6ab4dfaaf86599792fa9c3fe4689') data.sha1_2 = hash_to_bytes( '61c2b3a30496d329e21af70dd2d7e097046d07b7') data.revision_id_1 = hash_to_bytes( '7026b7c1a2af56521e951c01ed20f255fa054238') data.revision_id_2 = hash_to_bytes( '7026b7c1a2af56521e9587659012345678904321') data.revision_id_3 = hash_to_bytes( '7026b7c1a2af56521e9587659012345678904320') data.origin_url_1 = 'file:///dev/0/zero' # 44434341 data.origin_url_2 = 'file:///dev/1/one' # 44434342 data.origin_url_3 = 'file:///dev/2/two' # 54974445 data.mimetypes = MIMETYPE_OBJECTS[:] swh_indexer_storage.content_mimetype_add( MIMETYPE_OBJECTS) data.fossology_licenses = FOSSOLOGY_LICENSES[:] swh_indexer_storage._test_data = data return (swh_indexer_storage, data) swh_indexer_storage_postgresql = postgresql_fact( 'postgresql_proc', dump_files=DUMP_FILES) @pytest.fixture def swh_indexer_storage(swh_indexer_storage_postgresql): storage_config = { 'cls': 'local', 'args': { 'db': swh_indexer_storage_postgresql.dsn, }, } return get_indexer_storage(**storage_config) diff --git a/swh/indexer/tests/storage/generate_data_test.py b/swh/indexer/tests/storage/generate_data_test.py index 26b4c18..5df332a 100644 --- a/swh/indexer/tests/storage/generate_data_test.py +++ b/swh/indexer/tests/storage/generate_data_test.py @@ -1,153 +1,153 @@ -# Copyright (C) 2018 The Software Heritage developers +# Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from uuid import uuid1 from swh.model.hashutil import MultiHash from hypothesis.strategies import (composite, sets, one_of, uuids, tuples, sampled_from) MIMETYPES = [ b'application/json', b'application/octet-stream', b'application/xml', b'text/plain', ] ENCODINGS = [ b'iso8859-1', b'iso8859-15', b'latin1', b'utf-8', ] def gen_mimetype(): """Generate one mimetype strategy. """ return one_of(sampled_from(MIMETYPES)) def gen_encoding(): """Generate one encoding strategy. """ return one_of(sampled_from(ENCODINGS)) def _init_content(uuid): """Given a uuid, initialize a content """ return { 'id': MultiHash.from_data(uuid.bytes, {'sha1'}).digest()['sha1'], 'indexer_configuration_id': 1, } @composite def gen_content_mimetypes(draw, *, min_size=0, max_size=100): """Generate valid and consistent content_mimetypes. Context: Test purposes Args: **draw** (callable): Used by hypothesis to generate data **min_size** (int): Minimal number of elements to generate (default: 0) **max_size** (int): Maximal number of elements to generate (default: 100) Returns: List of content_mimetypes as expected by the content_mimetype_add api endpoint. """ _ids = draw( sets( tuples( uuids(), gen_mimetype(), gen_encoding() ), min_size=min_size, max_size=max_size ) ) content_mimetypes = [] for uuid, mimetype, encoding in _ids: content_mimetypes.append({ **_init_content(uuid), 'mimetype': mimetype, 'encoding': encoding, }) return content_mimetypes MIMETYPE_OBJECTS = [ {'id': MultiHash.from_data(uuid1().bytes, {'sha1'}).digest()['sha1'], 'indexer_configuration_id': 1, 'mimetype': mt, 'encoding': enc, } for mt in MIMETYPES for enc in ENCODINGS] LICENSES = [ b'3DFX', b'BSD', b'GPL', b'Apache2', b'MIT', ] FOSSOLOGY_LICENSES = [ {'id': MultiHash.from_data(uuid1().bytes, {'sha1'}).digest()['sha1'], 'indexer_configuration_id': 1, 'licenses': [LICENSES[i % len(LICENSES)], ], } for i in range(10) ] def gen_license(): return one_of(sampled_from(LICENSES)) @composite def gen_content_fossology_licenses(draw, *, min_size=0, max_size=100): """Generate valid and consistent content_fossology_licenses. Context: Test purposes Args: **draw** (callable): Used by hypothesis to generate data **min_size** (int): Minimal number of elements to generate (default: 0) **max_size** (int): Maximal number of elements to generate (default: 100) Returns: List of content_fossology_licenses as expected by the content_fossology_license_add api endpoint. """ _ids = draw( sets( tuples( uuids(), gen_license(), ), min_size=min_size, max_size=max_size ) ) content_licenses = [] for uuid, license in _ids: content_licenses.append({ **_init_content(uuid), 'licenses': [license], }) return content_licenses diff --git a/swh/indexer/tests/storage/test_api_client.py b/swh/indexer/tests/storage/test_api_client.py index cdb72fd..4fe100c 100644 --- a/swh/indexer/tests/storage/test_api_client.py +++ b/swh/indexer/tests/storage/test_api_client.py @@ -1,42 +1,42 @@ -# Copyright (C) 2015-2018 The Software Heritage developers +# Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from swh.indexer.storage.api.client import RemoteStorage import swh.indexer.storage.api.server as server from swh.indexer.storage import get_indexer_storage from .test_storage import * # noqa @pytest.fixture def app(swh_indexer_storage_postgresql): storage_config = { 'cls': 'local', 'args': { 'db': swh_indexer_storage_postgresql.dsn, }, } server.storage = get_indexer_storage(**storage_config) return server.app @pytest.fixture def swh_rpc_client_class(): # these are needed for the swh_indexer_storage_with_data fixture assert hasattr(RemoteStorage, 'indexer_configuration_add') assert hasattr(RemoteStorage, 'content_mimetype_add') return RemoteStorage @pytest.fixture def swh_indexer_storage(swh_rpc_client, app): # This version of the swh_storage fixture uses the swh_rpc_client fixture # to instantiate a RemoteStorage (see swh_rpc_client_class above) that # proxies, via the swh.core RPC mechanism, the local (in memory) storage # configured in the app fixture above. return swh_rpc_client diff --git a/swh/indexer/tests/storage/test_converters.py b/swh/indexer/tests/storage/test_converters.py index 9c4cd88..0a5a0f7 100644 --- a/swh/indexer/tests/storage/test_converters.py +++ b/swh/indexer/tests/storage/test_converters.py @@ -1,188 +1,188 @@ -# Copyright (C) 2015-2018 The Software Heritage developers +# Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from swh.indexer.storage import converters class TestConverters(unittest.TestCase): def setUp(self): self.maxDiff = None def test_ctags_to_db(self): input_ctag = { 'id': b'some-id', 'indexer_configuration_id': 100, 'ctags': [ { 'name': 'some-name', 'kind': 'some-kind', 'line': 10, 'lang': 'Yaml', }, { 'name': 'main', 'kind': 'function', 'line': 12, 'lang': 'Yaml', }, ] } expected_ctags = [ { 'id': b'some-id', 'name': 'some-name', 'kind': 'some-kind', 'line': 10, 'lang': 'Yaml', 'indexer_configuration_id': 100, }, { 'id': b'some-id', 'name': 'main', 'kind': 'function', 'line': 12, 'lang': 'Yaml', 'indexer_configuration_id': 100, }] # when actual_ctags = list(converters.ctags_to_db(input_ctag)) # then self.assertEqual(actual_ctags, expected_ctags) def test_db_to_ctags(self): input_ctags = { 'id': b'some-id', 'name': 'some-name', 'kind': 'some-kind', 'line': 10, 'lang': 'Yaml', 'tool_id': 200, 'tool_name': 'some-toolname', 'tool_version': 'some-toolversion', 'tool_configuration': {} } expected_ctags = { 'id': b'some-id', 'name': 'some-name', 'kind': 'some-kind', 'line': 10, 'lang': 'Yaml', 'tool': { 'id': 200, 'name': 'some-toolname', 'version': 'some-toolversion', 'configuration': {}, } } # when actual_ctags = converters.db_to_ctags(input_ctags) # then self.assertEqual(actual_ctags, expected_ctags) def test_db_to_mimetype(self): input_mimetype = { 'id': b'some-id', 'tool_id': 10, 'tool_name': 'some-toolname', 'tool_version': 'some-toolversion', 'tool_configuration': {}, 'encoding': b'ascii', 'mimetype': b'text/plain', } expected_mimetype = { 'id': b'some-id', 'encoding': b'ascii', 'mimetype': b'text/plain', 'tool': { 'id': 10, 'name': 'some-toolname', 'version': 'some-toolversion', 'configuration': {}, } } actual_mimetype = converters.db_to_mimetype(input_mimetype) self.assertEqual(actual_mimetype, expected_mimetype) def test_db_to_language(self): input_language = { 'id': b'some-id', 'tool_id': 20, 'tool_name': 'some-toolname', 'tool_version': 'some-toolversion', 'tool_configuration': {}, 'lang': b'css', } expected_language = { 'id': b'some-id', 'lang': b'css', 'tool': { 'id': 20, 'name': 'some-toolname', 'version': 'some-toolversion', 'configuration': {}, } } actual_language = converters.db_to_language(input_language) self.assertEqual(actual_language, expected_language) def test_db_to_fossology_license(self): input_license = { 'id': b'some-id', 'tool_id': 20, 'tool_name': 'nomossa', 'tool_version': '5.22', 'tool_configuration': {}, 'licenses': ['GPL2.0'], } expected_license = { 'licenses': ['GPL2.0'], 'tool': { 'id': 20, 'name': 'nomossa', 'version': '5.22', 'configuration': {}, } } actual_license = converters.db_to_fossology_license(input_license) self.assertEqual(actual_license, expected_license) def test_db_to_metadata(self): input_metadata = { 'id': b'some-id', 'tool_id': 20, 'tool_name': 'some-toolname', 'tool_version': 'some-toolversion', 'tool_configuration': {}, 'metadata': b'metadata', } expected_metadata = { 'id': b'some-id', 'metadata': b'metadata', 'tool': { 'id': 20, 'name': 'some-toolname', 'version': 'some-toolversion', 'configuration': {}, } } actual_metadata = converters.db_to_metadata(input_metadata) self.assertEqual(actual_metadata, expected_metadata) diff --git a/swh/indexer/tests/storage/test_in_memory.py b/swh/indexer/tests/storage/test_in_memory.py index a7c4710..d49a079 100644 --- a/swh/indexer/tests/storage/test_in_memory.py +++ b/swh/indexer/tests/storage/test_in_memory.py @@ -1,16 +1,21 @@ +# Copyright (C) 2015-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + import pytest from swh.indexer.storage import get_indexer_storage from .test_storage import * # noqa @pytest.fixture def swh_indexer_storage(swh_indexer_storage_postgresql): storage_config = { 'cls': 'local', 'args': { 'db': swh_indexer_storage_postgresql.dsn, }, } return get_indexer_storage(**storage_config)