diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py --- a/swh/indexer/storage/__init__.py +++ b/swh/indexer/storage/__init__.py @@ -40,6 +40,8 @@ from .api.client import RemoteStorage as IndexerStorage elif cls == 'local': from . import IndexerStorage + elif cls == 'memory': + from .in_memory import IndexerStorage else: raise ValueError('Unknown indexer storage class `%s`' % cls) @@ -731,7 +733,7 @@ (free form dict) Returns: - The identifier of the tool if it exists, None otherwise. + The same dictionary with an `id` key, None otherwise. """ tool_conf = tool['tool_configuration'] diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py new file mode 100644 --- /dev/null +++ b/swh/indexer/storage/in_memory.py @@ -0,0 +1,59 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +class IndexerStorage: + """In-memory SWH indexer storage.""" + + def __init__(self): + self._tools = {} + + def indexer_configuration_add(self, tools): + """Add new tools to the storage. + + Args: + tools ([dict]): List of dictionary representing tool to + insert in the db. Dictionary with the following keys: + + - **tool_name** (str): tool's name + - **tool_version** (str): tool's version + - **tool_configuration** (dict): tool's configuration + (free form dict) + + Returns: + List of dict inserted in the db (holding the id key as + well). The order of the list is not guaranteed to match + the order of the initial list. + + """ + inserted = [] + for tool in tools: + tool = tool.copy() + tool['id'] = self._tool_key(tool) + self._tools[tool['id']] = tool + inserted.append(tool) + return inserted + + def indexer_configuration_get(self, tool, db=None, cur=None): + """Retrieve tool information. + + Args: + tool (dict): Dictionary representing a tool with the + following keys: + + - **tool_name** (str): tool's name + - **tool_version** (str): tool's version + - **tool_configuration** (dict): tool's configuration + (free form dict) + + Returns: + The same dictionary with an `id` key, None otherwise. + + """ + return self._tools.get(self._tool_key(tool)) + + def _tool_key(self, tool): + return (tool['tool_name'], tool['tool_version'], + tuple(sorted(tool['tool_configuration'].items()))) diff --git a/swh/indexer/tests/storage/test_api_client.py b/swh/indexer/tests/storage/test_api_client.py --- a/swh/indexer/tests/storage/test_api_client.py +++ b/swh/indexer/tests/storage/test_api_client.py @@ -10,11 +10,11 @@ from swh.indexer.storage.api.client import RemoteStorage from swh.indexer.storage.api.server import app -from .test_storage import CommonTestStorage +from .test_storage import CommonTestStorage, BasePgTestStorage class TestRemoteStorage(CommonTestStorage, ServerTestFixture, - unittest.TestCase): + BasePgTestStorage, unittest.TestCase): """Test the indexer's remote storage API. This class doesn't define any tests as we want identical diff --git a/swh/indexer/tests/storage/test_in_memory.py b/swh/indexer/tests/storage/test_in_memory.py new file mode 100644 --- /dev/null +++ b/swh/indexer/tests/storage/test_in_memory.py @@ -0,0 +1,179 @@ +from unittest import TestCase +import pytest + +from swh.indexer.storage.in_memory import IndexerStorage + +from .test_storage import CommonTestStorage, CommonPropBasedTestStorage + + +class IndexerTestInMemoryStorage(CommonTestStorage, TestCase): + def setUp(self): + super().setUp() + self.storage = IndexerStorage() + + @pytest.mark.xfail + def test_check_config(self): + pass + + @pytest.mark.xfail + def test_content_mimetype_missing(self): + pass + + @pytest.mark.xfail + def test_content_mimetype_add__drop_duplicate(self): + pass + + @pytest.mark.xfail + def test_content_mimetype_add__update_in_place_duplicate(self): + pass + + @pytest.mark.xfail + def test_content_mimetype_get(self): + pass + + @pytest.mark.xfail + def test_content_language_missing(self): + pass + + @pytest.mark.xfail + def test_content_language_get(self): + pass + + @pytest.mark.xfail + def test_content_language_add__drop_duplicate(self): + pass + + @pytest.mark.xfail + def test_content_language_add__update_in_place_duplicate(self): + pass + + @pytest.mark.xfail + def test_content_ctags_missing(self): + pass + + @pytest.mark.xfail + def test_content_ctags_get(self): + pass + + @pytest.mark.xfail + def test_content_ctags_search(self): + pass + + @pytest.mark.xfail + def test_content_ctags_search_no_result(self): + pass + + @pytest.mark.xfail + def test_content_ctags_add__add_new_ctags_added(self): + pass + + @pytest.mark.xfail + def test_content_ctags_add__update_in_place(self): + pass + + @pytest.mark.xfail + def test_content_fossology_license_get(self): + pass + + @pytest.mark.xfail + def test_content_fossology_license_add__new_license_added(self): + pass + + @pytest.mark.xfail + def test_content_fossology_license_add__update_in_place_duplicate(self): + pass + + @pytest.mark.xfail + def test_content_metadata_missing(self): + pass + + @pytest.mark.xfail + def test_content_metadata_get(self): + pass + + @pytest.mark.xfail + def test_content_metadata_add_drop_duplicate(self): + pass + + @pytest.mark.xfail + def test_content_metadata_add_update_in_place_duplicate(self): + pass + + @pytest.mark.xfail + def test_revision_metadata_missing(self): + pass + + @pytest.mark.xfail + def test_revision_metadata_get(self): + pass + + @pytest.mark.xfail + def test_revision_metadata_add_drop_duplicate(self): + pass + + @pytest.mark.xfail + def test_revision_metadata_add_update_in_place_duplicate(self): + pass + + @pytest.mark.xfail + def test_origin_intrinsic_metadata_get(self): + pass + + @pytest.mark.xfail + def test_origin_intrinsic_metadata_add_drop_duplicate(self): + pass + + @pytest.mark.xfail + def test_origin_intrinsic_metadata_add_update_in_place_duplicate(self): + pass + + @pytest.mark.xfail + def test_origin_intrinsic_metadata_search_fulltext(self): + pass + + @pytest.mark.xfail + def test_origin_intrinsic_metadata_search_fulltext_rank(self): + pass + + @pytest.mark.xfail + def test_indexer_configuration_metadata_get_missing_context(self): + pass + + @pytest.mark.xfail + def test_indexer_configuration_metadata_get(self): + pass + + +class PropIndexerTestInMemoryStorage(CommonPropBasedTestStorage, TestCase): + def setUp(self): + super().setUp() + self.storage = IndexerStorage() + + @pytest.mark.xfail + def test_generate_content_mimetype_get_range_limit_none(self): + pass + + @pytest.mark.xfail + def test_generate_content_mimetype_get_range_no_limit(self, mimetypes): + pass + + @pytest.mark.xfail + def test_generate_content_mimetype_get_range_limit(self, mimetypes): + pass + + @pytest.mark.xfail + def test_generate_content_fossology_license_get_range_limit_none(self): + pass + + @pytest.mark.xfail + def test_generate_content_fossology_license_get_range_no_limit(self): + pass + + @pytest.mark.xfail + def test_generate_content_fossology_license_get_range_no_limit_with_filter( + self): + pass + + @pytest.mark.xfail + def test_generate_fossology_license_get_range_limit(self): + pass diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py --- a/swh/indexer/tests/storage/test_storage.py +++ b/swh/indexer/tests/storage/test_storage.py @@ -20,7 +20,7 @@ @pytest.mark.db -class BaseTestStorage(SingleDbTestFixture): +class BasePgTestStorage(SingleDbTestFixture): """Base test class for most indexer tests. It adds support for Storage testing to the SingleDbTestFixture class. @@ -80,8 +80,7 @@ db.conn.commit() -@pytest.mark.db -class CommonTestStorage(BaseTestStorage): +class CommonTestStorage: """Base class for Indexer Storage testing. """ @@ -1707,10 +1706,11 @@ 'tool_configuration': {"command_line": "nomossa "}, } + self.storage.indexer_configuration_add([tool]) actual_tool = self.storage.indexer_configuration_get(tool) expected_tool = tool.copy() - expected_tool['id'] = 1 + del actual_tool['id'] self.assertEqual(expected_tool, actual_tool) @@ -1732,6 +1732,7 @@ 'tool_configuration': {"type": "local", "context": "NpmMapping"}, } + self.storage.indexer_configuration_add([tool]) actual_tool = self.storage.indexer_configuration_get(tool) expected_tool = tool.copy() @@ -1741,7 +1742,7 @@ @pytest.mark.property_based -class PropBasedTestStorage(BaseTestStorage, unittest.TestCase): +class CommonPropBasedTestStorage(BasePgTestStorage): """Properties-based tests """ @@ -1957,7 +1958,9 @@ self.assertEqual(expected_fossology_licenses2, actual_ids2) -class IndexerTestStorage(CommonTestStorage, unittest.TestCase): +@pytest.mark.db +class IndexerTestStorage(CommonTestStorage, BasePgTestStorage, + unittest.TestCase): """Running the tests locally. For the client api tests (remote storage), see @@ -1966,3 +1969,9 @@ """ pass + + +@pytest.mark.property_based +class PropIndexerTestStorage(CommonPropBasedTestStorage, BasePgTestStorage, + unittest.TestCase): + pass diff --git a/swh/indexer/tests/test_ctags.py b/swh/indexer/tests/test_ctags.py --- a/swh/indexer/tests/test_ctags.py +++ b/swh/indexer/tests/test_ctags.py @@ -4,7 +4,6 @@ # See top-level LICENSE file for more information import unittest -import logging from unittest.mock import patch from swh.indexer.ctags import ( @@ -14,7 +13,7 @@ from swh.indexer.tests.test_utils import ( BasicMockIndexerStorage, MockObjStorage, CommonContentIndexerTest, CommonIndexerWithErrorsTest, CommonIndexerNoTool, - SHA1_TO_CTAGS, NoDiskIndexer + SHA1_TO_CTAGS, NoDiskIndexer, BASE_TEST_CONFIG ) @@ -80,8 +79,9 @@ """Specific language whose configuration is enough to satisfy the indexing tests. """ - def prepare(self): - self.config = { + def parse_config_file(self, *args, **kwargs): + return { + **BASE_TEST_CONFIG, 'tools': { 'name': 'universal-ctags', 'version': '~git7859817b', @@ -95,16 +95,15 @@ 'python': 'python', 'haskell': 'haskell', 'bar': 'bar', - } + }, + 'workdir': '/nowhere', } + + def prepare(self): + super().prepare() self.idx_storage = BasicMockIndexerStorage() - self.log = logging.getLogger('swh.indexer') self.objstorage = MockObjStorage() self.tool_config = self.config['tools']['configuration'] - self.max_content_size = self.tool_config['max_content_size'] - self.tools = self.register_tools(self.config['tools']) - self.tool = self.tools[0] - self.language_map = self.config['languages'] class TestCtagsIndexer(CommonContentIndexerTest, unittest.TestCase): diff --git a/swh/indexer/tests/test_fossology_license.py b/swh/indexer/tests/test_fossology_license.py --- a/swh/indexer/tests/test_fossology_license.py +++ b/swh/indexer/tests/test_fossology_license.py @@ -16,7 +16,8 @@ from swh.indexer.tests.test_utils import ( MockObjStorage, BasicMockStorage, BasicMockIndexerStorage, SHA1_TO_LICENSES, CommonContentIndexerTest, CommonContentIndexerRangeTest, - CommonIndexerWithErrorsTest, CommonIndexerNoTool, NoDiskIndexer + CommonIndexerWithErrorsTest, CommonIndexerNoTool, NoDiskIndexer, + BASE_TEST_CONFIG ) @@ -64,8 +65,10 @@ the indexing checks. """ - def prepare(self): - self.config = { + def parse_config_file(self, *args, **kwargs): + return { + **BASE_TEST_CONFIG, + 'workdir': '/nowhere', 'tools': { 'name': 'nomos', 'version': '3.1.0rc2-31-ga2cbb8c', @@ -74,11 +77,12 @@ }, }, } + + def prepare(self): + super().prepare() self.idx_storage = BasicMockIndexerStorage() self.log = logging.getLogger('swh.indexer') self.objstorage = MockObjStorage() - self.tools = self.register_tools(self.config['tools']) - self.tool = self.tools[0] class TestFossologyLicenseIndexer(CommonContentIndexerTest, unittest.TestCase): @@ -120,8 +124,10 @@ """Testing the range indexer on fossology license. """ - def prepare(self): - self.config = { + def parse_config_file(self, *args, **kwargs): + return { + **BASE_TEST_CONFIG, + 'workdir': '/nowhere', 'tools': { 'name': 'nomos', 'version': '3.1.0rc2-31-ga2cbb8c', @@ -131,15 +137,15 @@ }, 'write_batch_size': 100, } + + def prepare(self): + super().prepare() self.idx_storage = BasicMockIndexerStorage() self.log = logging.getLogger('swh.indexer') # this hardcodes some contents, will use this to setup the storage self.objstorage = MockObjStorage() - # sync objstorage and storage contents = [{'sha1': c_id} for c_id in self.objstorage] self.storage = BasicMockStorage(contents) - self.tools = self.register_tools(self.config['tools']) - self.tool = self.tools[0] class TestFossologyLicenseRangeIndexer( diff --git a/swh/indexer/tests/test_language.py b/swh/indexer/tests/test_language.py --- a/swh/indexer/tests/test_language.py +++ b/swh/indexer/tests/test_language.py @@ -4,12 +4,11 @@ # See top-level LICENSE file for more information import unittest -import logging from swh.indexer import language from swh.indexer.language import ContentLanguageIndexer from swh.indexer.tests.test_utils import ( BasicMockIndexerStorage, MockObjStorage, CommonContentIndexerTest, - CommonIndexerWithErrorsTest, CommonIndexerNoTool + CommonIndexerWithErrorsTest, CommonIndexerNoTool, BASE_TEST_CONFIG ) @@ -17,8 +16,9 @@ """Specific language whose configuration is enough to satisfy the indexing tests. """ - def prepare(self): - self.config = { + def parse_config_file(self, *args, **kwargs): + return { + **BASE_TEST_CONFIG, 'tools': { 'name': 'pygments', 'version': '2.0.1+dfsg-1.1+deb8u1', @@ -29,13 +29,12 @@ }, } } + + def prepare(self): + super().prepare() self.idx_storage = BasicMockIndexerStorage() - self.log = logging.getLogger('swh.indexer') self.objstorage = MockObjStorage() self.tool_config = self.config['tools']['configuration'] - self.max_content_size = self.tool_config['max_content_size'] - self.tools = self.register_tools(self.config['tools']) - self.tool = self.tools[0] class Language(unittest.TestCase): diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -4,7 +4,6 @@ # See top-level LICENSE file for more information import unittest -import logging from swh.indexer.metadata_dictionary import CROSSWALK_TABLE, MAPPINGS from swh.indexer.metadata_detector import detect_metadata @@ -16,18 +15,20 @@ from swh.model.hashutil import hash_to_bytes +from .test_utils import BASE_TEST_CONFIG + class ContentMetadataTestIndexer(ContentMetadataIndexer): """Specific Metadata whose configuration is enough to satisfy the indexing tests. """ + def parse_config_file(self, *args, **kwargs): + assert False, 'should not be called; the rev indexer configures it.' + def prepare(self): - self.idx_storage = MockIndexerStorage() - self.log = logging.getLogger('swh.indexer') + super().prepare() self.objstorage = MockObjStorage() - self.tools = self.register_tools(self.config['tools']) - self.tool = self.tools[0] - self.results = [] + self.idx_storage = MockIndexerStorage() class RevisionMetadataTestIndexer(RevisionMetadataIndexer): @@ -37,11 +38,9 @@ ContentMetadataIndexer = ContentMetadataTestIndexer - def prepare(self): - self.config = { - 'storage': {}, - 'objstorage': {}, - 'indexer_storage': {}, + def parse_config_file(self, *args, **kwargs): + return { + **BASE_TEST_CONFIG, 'tools': { 'name': 'swh-metadata-detector', 'version': '0.0.2', @@ -51,12 +50,13 @@ } } } + + def prepare(self): + super().prepare() self.storage = MockStorage() self.idx_storage = MockIndexerStorage() - self.log = logging.getLogger('swh.indexer') self.objstorage = MockObjStorage() - self.tools = self.register_tools(self.config['tools']) - self.tool = self.tools[0] + self.tools = list(self.register_tools(self.config['tools'])) class Metadata(unittest.TestCase): @@ -207,14 +207,13 @@ # this metadata indexer computes only metadata for package.json # in npm context with a hard mapping metadata_indexer = ContentMetadataTestIndexer( - tool=self.content_tool, config={}) + tool=self.content_tool, config=BASE_TEST_CONFIG.copy()) # when metadata_indexer.run(sha1s, policy_update='ignore-dups') results = metadata_indexer.idx_storage.added_data expected_results = [('content_metadata', False, [{ - 'indexer_configuration_id': 30, 'translated_metadata': { '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', 'type': 'SoftwareSourceCode', @@ -226,7 +225,6 @@ }, 'id': '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5' }, { - 'indexer_configuration_id': 30, 'translated_metadata': { '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', 'type': 'SoftwareSourceCode', @@ -254,11 +252,15 @@ }, 'id': 'd4c647f0fc257591cc9ba1722484229780d1c607' }, { - 'indexer_configuration_id': 30, 'translated_metadata': None, 'id': '02fb2c89e14f7fab46701478c83779c7beb7b069' }])] + for result in results: + metadata = result[2] + for item in metadata: + del item['indexer_configuration_id'] + # The assertion below returns False sometimes because of nested lists self.assertEqual(expected_results, results) @@ -470,7 +472,12 @@ 'name': 'yarn-parser', 'keywords': ['yarn', 'parse', 'lock', 'dependencies'], }, - 'indexer_configuration_id': 7 }])] + + for result in results: + metadata = result[2] + for item in metadata: + del item['indexer_configuration_id'] + # then self.assertEqual(expected_results, results) diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py --- a/swh/indexer/tests/test_mimetype.py +++ b/swh/indexer/tests/test_mimetype.py @@ -15,7 +15,8 @@ from swh.indexer.tests.test_utils import ( MockObjStorage, BasicMockStorage, BasicMockIndexerStorage, CommonContentIndexerTest, CommonContentIndexerRangeTest, - CommonIndexerWithErrorsTest, CommonIndexerNoTool + CommonIndexerWithErrorsTest, CommonIndexerNoTool, + BASE_TEST_CONFIG ) @@ -47,8 +48,9 @@ satisfy the indexing tests. """ - def prepare(self): - self.config = { + def parse_config_file(self, *args, **kwargs): + return { + **BASE_TEST_CONFIG, 'tools': { 'name': 'file', 'version': '1:5.30-1+deb9u1', @@ -58,11 +60,12 @@ }, }, } + + def prepare(self): + super().prepare() self.idx_storage = BasicMockIndexerStorage() self.log = logging.getLogger('swh.indexer') self.objstorage = MockObjStorage() - self.tools = self.register_tools(self.config['tools']) - self.tool = self.tools[0] class TestMimetypeIndexer(CommonContentIndexerTest, unittest.TestCase): @@ -106,8 +109,9 @@ indexing tests. """ - def prepare(self): - self.config = { + def parse_config_file(self, *args, **kwargs): + return { + **BASE_TEST_CONFIG, 'tools': { 'name': 'file', 'version': '1:5.30-1+deb9u1', @@ -118,15 +122,15 @@ }, 'write_batch_size': 100, } + + def prepare(self): + super().prepare() self.idx_storage = BasicMockIndexerStorage() - self.log = logging.getLogger('swh.indexer') # this hardcodes some contents, will use this to setup the storage self.objstorage = MockObjStorage() # sync objstorage and storage contents = [{'sha1': c_id} for c_id in self.objstorage] self.storage = BasicMockStorage(contents) - self.tools = self.register_tools(self.config['tools']) - self.tool = self.tools[0] class TestMimetypeRangeIndexer( diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py --- a/swh/indexer/tests/test_origin_metadata.py +++ b/swh/indexer/tests/test_origin_metadata.py @@ -8,11 +8,11 @@ import unittest from celery import task -from swh.indexer.metadata import OriginMetadataIndexer +from swh.indexer.metadata import OriginMetadataIndexer, \ + RevisionMetadataIndexer, ContentMetadataIndexer from swh.indexer.tests.test_utils import MockObjStorage, MockStorage from swh.indexer.tests.test_utils import MockIndexerStorage from swh.indexer.tests.test_origin_head import OriginHeadTestIndexer -from swh.indexer.tests.test_metadata import RevisionMetadataTestIndexer from swh.scheduler.tests.scheduler_testing import SchedulerTestFixture @@ -38,6 +38,48 @@ self.results = [] +class ContentMetadataTestIndexer(ContentMetadataIndexer): + """Specific Metadata whose configuration is enough to satisfy the + indexing tests. + """ + def prepare(self): + self.idx_storage = MockIndexerStorage() + self.log = logging.getLogger('swh.indexer') + self.objstorage = MockObjStorage() + self.tools = self.register_tools(self.config['tools']) + self.tool = self.tools[0] + self.results = [] + + +class RevisionMetadataTestIndexer(RevisionMetadataIndexer): + """Specific indexer whose configuration is enough to satisfy the + indexing tests. + """ + + ContentMetadataIndexer = ContentMetadataTestIndexer + + def prepare(self): + self.config = { + 'storage': {}, + 'objstorage': {}, + 'indexer_storage': {}, + 'tools': { + 'name': 'swh-metadata-detector', + 'version': '0.0.2', + 'configuration': { + 'type': 'local', + 'context': 'NpmMapping' + } + } + } + self.storage = MockStorage() + self.idx_storage = MockIndexerStorage() + self.log = logging.getLogger('swh.indexer') + self.objstorage = MockObjStorage() + self.tools = self.register_tools(self.config['tools']) + self.tool = self.tools[0] + + @task def revision_metadata_test_task(*args, **kwargs): indexer = RevisionMetadataTestIndexer() diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py --- a/swh/indexer/tests/test_utils.py +++ b/swh/indexer/tests/test_utils.py @@ -7,6 +7,28 @@ from swh.model import hashutil from swh.model.hashutil import hash_to_bytes +from swh.indexer.storage import INDEXER_CFG_KEY + +BASE_TEST_CONFIG = { + 'storage': { + 'cls': 'remote', + 'args': { + 'url': 'http://nowhere/', + }, + }, + 'objstorage': { + 'cls': 'remote', + 'args': { + 'url': 'http://nowhere2/', + }, + }, + INDEXER_CFG_KEY: { + 'cls': 'memory', + 'args': { + }, + }, +} + ORIGINS = [ { 'id': 52189575,