diff --git a/swh/indexer/ctags.py b/swh/indexer/ctags.py --- a/swh/indexer/ctags.py +++ b/swh/indexer/ctags.py @@ -92,6 +92,12 @@ } for sha1 in ids )) + def compute_ctags(self, path, lang): + """Compute ctags on file at path with language lang. + + """ + return run_ctags(path, lang=lang) + def index(self, id, data): """Index sha1s' content and store result. diff --git a/swh/indexer/tests/test_ctags.py b/swh/indexer/tests/test_ctags.py new file mode 100644 --- /dev/null +++ b/swh/indexer/tests/test_ctags.py @@ -0,0 +1,104 @@ +# Copyright (C) 2017-2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import unittest +import logging +from swh.indexer.ctags import CtagsIndexer +from swh.indexer.tests.test_utils import ( + BasicMockIndexerStorage, MockObjStorage, CommonContentIndexerTest, + CommonIndexerWithErrorsTest, CommonIndexerNoTool, + SHA1_TO_CTAGS, NoDiskIndexer +) + + +class InjectCtagsIndexer: + """Override ctags computations. + + """ + def compute_ctags(self, path, lang): + """Inject fake ctags given path (sha1 identifier). + + """ + return { + 'lang': lang, + **SHA1_TO_CTAGS.get(path) + } + + +class CtagsIndexerTest(NoDiskIndexer, InjectCtagsIndexer, CtagsIndexer): + """Specific language whose configuration is enough to satisfy the + indexing tests. + """ + def prepare(self): + self.config = { + 'tools': { + 'name': 'universal-ctags', + 'version': '~git7859817b', + 'configuration': { + 'command_line': '''ctags --fields=+lnz --sort=no ''' + ''' --links=no ''', + 'max_content_size': 1000, + }, + }, + 'languages': { + 'python': 'python', + 'haskell': 'haskell', + 'bar': 'bar', + } + } + self.idx_storage = BasicMockIndexerStorage() + self.log = logging.getLogger('swh.indexer') + self.objstorage = MockObjStorage() + self.tool_config = self.config['tools']['configuration'] + self.max_content_size = self.tool_config['max_content_size'] + self.tools = self.register_tools(self.config['tools']) + self.tool = self.tools[0] + self.language_map = self.config['languages'] + + +class TestCtagsIndexer(CommonContentIndexerTest, unittest.TestCase): + """Ctags indexer test scenarios: + + - Known sha1s in the input list have their data indexed + - Unknown sha1 in the input list are not indexed + + """ + def setUp(self): + self.indexer = CtagsIndexerTest() + + # Prepare test input + self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5' + self.id1 = 'd4c647f0fc257591cc9ba1722484229780d1c607' + self.id2 = '688a5ef812c53907562fe379d4b3851e69c7cb15' + + tool_id = self.indexer.tool['id'] + self.expected_results = { + self.id0: { + 'id': self.id0, + 'indexer_configuration_id': tool_id, + 'ctags': SHA1_TO_CTAGS[self.id0], + }, + self.id1: { + 'id': self.id1, + 'indexer_configuration_id': tool_id, + 'ctags': SHA1_TO_CTAGS[self.id1], + }, + self.id2: { + 'id': self.id2, + 'indexer_configuration_id': tool_id, + 'ctags': SHA1_TO_CTAGS[self.id2], + } + } + + +class CtagsIndexerUnknownToolTestStorage( + CommonIndexerNoTool, CtagsIndexerTest): + """Fossology license indexer with wrong configuration""" + + +class TestCtagsIndexersErrors( + CommonIndexerWithErrorsTest, unittest.TestCase): + """Test the indexer raise the right errors when wrongly initialized""" + Indexer = CtagsIndexerUnknownToolTestStorage diff --git a/swh/indexer/tests/test_fossology_license.py b/swh/indexer/tests/test_fossology_license.py --- a/swh/indexer/tests/test_fossology_license.py +++ b/swh/indexer/tests/test_fossology_license.py @@ -12,23 +12,11 @@ from swh.indexer.tests.test_utils import ( MockObjStorage, BasicMockStorage, BasicMockIndexerStorage, - SHA1_TO_LICENSES, CommonContentIndexerTest, CommonContentIndexerRangeTest + SHA1_TO_LICENSES, CommonContentIndexerTest, CommonContentIndexerRangeTest, + CommonIndexerWithErrorsTest, CommonIndexerNoTool, NoDiskIndexer ) -class NoDiskIndexer: - """Mixin to override the DiskIndexer behavior avoiding side-effects in - tests. - - """ - - def write_to_temp(self, filename, data): # noop - return filename - - def cleanup(self, content_path): # noop - return None - - class InjectLicenseIndexer: """Override license computations. @@ -65,24 +53,6 @@ self.tool = self.tools[0] -class FossologyLicenseIndexerUnknownToolTestStorage( - FossologyLicenseTestIndexer): - """Specific fossology license indexer whose configuration is not - enough to satisfy the indexing checks - - """ - def prepare(self): - super().prepare() - self.tools = None - - -class TestFossologyLicenseIndexerWithErrors(unittest.TestCase): - def test_wrong_unknown_configuration_tool(self): - """Indexer with unknown configuration tool should fail the check""" - with self.assertRaisesRegex(ValueError, 'Tools None is unknown'): - FossologyLicenseIndexerUnknownToolTestStorage() - - class TestFossologyLicenseIndexer(CommonContentIndexerTest, unittest.TestCase): """Language indexer test scenarios: @@ -181,3 +151,20 @@ 'licenses': SHA1_TO_LICENSES[self.id2] } } + + +class FossologyLicenseIndexerUnknownToolTestStorage( + CommonIndexerNoTool, FossologyLicenseTestIndexer): + """Fossology license indexer with wrong configuration""" + + +class FossologyLicenseRangeIndexerUnknownToolTestStorage( + CommonIndexerNoTool, FossologyLicenseRangeIndexerTest): + """Fossology license range indexer with wrong configuration""" + + +class TestFossologyLicenseIndexersErrors( + CommonIndexerWithErrorsTest, unittest.TestCase): + """Test the indexer raise the right errors when wrongly initialized""" + Indexer = FossologyLicenseIndexerUnknownToolTestStorage + RangeIndexer = FossologyLicenseRangeIndexerUnknownToolTestStorage diff --git a/swh/indexer/tests/test_language.py b/swh/indexer/tests/test_language.py --- a/swh/indexer/tests/test_language.py +++ b/swh/indexer/tests/test_language.py @@ -8,7 +8,8 @@ from swh.indexer import language from swh.indexer.language import ContentLanguageIndexer from swh.indexer.tests.test_utils import ( - BasicMockIndexerStorage, MockObjStorage, CommonContentIndexerTest + BasicMockIndexerStorage, MockObjStorage, CommonContentIndexerTest, + CommonIndexerWithErrorsTest, CommonIndexerNoTool ) @@ -86,3 +87,14 @@ 'lang': 'text-only' } } + + +class LanguageIndexerUnknownToolTestStorage( + CommonIndexerNoTool, LanguageTestIndexer): + """Fossology license indexer with wrong configuration""" + + +class TestLanguageIndexersErrors( + CommonIndexerWithErrorsTest, unittest.TestCase): + """Test the indexer raise the right errors when wrongly initialized""" + Indexer = LanguageIndexerUnknownToolTestStorage diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py --- a/swh/indexer/tests/test_mimetype.py +++ b/swh/indexer/tests/test_mimetype.py @@ -12,7 +12,8 @@ from swh.indexer.tests.test_utils import ( MockObjStorage, BasicMockStorage, BasicMockIndexerStorage, - CommonContentIndexerTest, CommonContentIndexerRangeTest + CommonContentIndexerTest, CommonContentIndexerRangeTest, + CommonIndexerWithErrorsTest, CommonIndexerNoTool ) @@ -39,23 +40,6 @@ self.tool = self.tools[0] -class MimetypeIndexerUnknownToolTestStorage(MimetypeTestIndexer): - """Specific mimetype whose configuration is not enough to satisfy the - indexing checks. - - """ - def prepare(self): - super().prepare() - self.tools = None - - -class TestMimetypeIndexerWithErrors(unittest.TestCase): - def test_wrong_unknown_configuration_tool(self): - """Indexer with unknown configuration tool should fail the check""" - with self.assertRaisesRegex(ValueError, 'Tools None is unknown'): - MimetypeIndexerUnknownToolTestStorage() - - class TestMimetypeIndexer(CommonContentIndexerTest, unittest.TestCase): """Mimetype indexer test scenarios: @@ -157,3 +141,20 @@ 'indexer_configuration_id': 10, 'mimetype': b'text/plain'} } + + +class MimetypeIndexerUnknownToolTestStorage( + CommonIndexerNoTool, MimetypeTestIndexer): + """Fossology license indexer with wrong configuration""" + + +class MimetypeRangeIndexerUnknownToolTestStorage( + CommonIndexerNoTool, MimetypeRangeIndexerTest): + """Fossology license range indexer with wrong configuration""" + + +class TestMimetypeIndexersErrors( + CommonIndexerWithErrorsTest, unittest.TestCase): + """Test the indexer raise the right errors when wrongly initialized""" + Indexer = MimetypeIndexerUnknownToolTestStorage + RangeIndexer = MimetypeRangeIndexerUnknownToolTestStorage diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py --- a/swh/indexer/tests/test_utils.py +++ b/swh/indexer/tests/test_utils.py @@ -3,7 +3,6 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information - from swh.objstorage.exc import ObjNotFoundError from swh.model import hashutil @@ -134,6 +133,28 @@ } +SHA1_TO_CTAGS = { + '01c9379dfc33803963d07c1ccc748d3fe4c96bb5': [{ + 'name': 'foo', + 'kind': 'str', + 'line': 10, + 'lang': 'bar', + }], + 'd4c647f0fc257591cc9ba1722484229780d1c607': [{ + 'name': 'let', + 'kind': 'int', + 'line': 100, + 'lang': 'haskell', + }], + '688a5ef812c53907562fe379d4b3851e69c7cb15': [{ + 'name': 'symbol', + 'kind': 'float', + 'line': 99, + 'lang': 'python', + }], +} + + class MockObjStorage: """Mock an swh-objstorage objstorage with predefined contents. @@ -470,6 +491,9 @@ def content_language_add(self, data, conflict_update=None): self._internal_add(data, conflict_update=conflict_update) + def content_ctags_add(self, data, conflict_update=None): + self._internal_add(data, conflict_update=conflict_update) + def _internal_get_range(self, start, end, indexer_configuration_id, limit=1000): """Same logic as _internal_add, we retrieve indexed data given an @@ -518,6 +542,33 @@ }] +class CommonIndexerNoTool: + """Mixin to wronly initialize content indexer""" + def prepare(self): + super().prepare() + self.tools = None + + +class CommonIndexerWithErrorsTest: + """Test indexer configuration checks. + + """ + Indexer = None + RangeIndexer = None + + def test_wrong_unknown_configuration_tool(self): + """Indexer with unknown configuration tool fails check""" + with self.assertRaisesRegex(ValueError, 'Tools None is unknown'): + print('indexer: %s' % self.Indexer) + self.Indexer() + + def test_wrong_unknown_configuration_tool_range(self): + """Range Indexer with unknown configuration tool fails check""" + if self.RangeIndexer is not None: + with self.assertRaisesRegex(ValueError, 'Tools None is unknown'): + self.RangeIndexer() + + class CommonContentIndexerTest: def assert_results_ok(self, actual_results, expected_results=None): if expected_results is None: @@ -649,3 +700,16 @@ # then self.assertFalse(actual_results) + + +class NoDiskIndexer: + """Mixin to override the DiskIndexer behavior avoiding side-effects in + tests. + + """ + + def write_to_temp(self, filename, data): # noop + return filename + + def cleanup(self, content_path): # noop + return None