diff --git a/swh/indexer/tests/test_fossology_license.py b/swh/indexer/tests/test_fossology_license.py index 8131c36..10ad15f 100644 --- a/swh/indexer/tests/test_fossology_license.py +++ b/swh/indexer/tests/test_fossology_license.py @@ -1,170 +1,172 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import logging from swh.indexer.fossology_license import ( ContentFossologyLicenseIndexer, FossologyLicenseRangeIndexer ) from swh.indexer.tests.test_utils import ( MockObjStorage, BasicMockStorage, BasicMockIndexerStorage, SHA1_TO_LICENSES, CommonContentIndexerTest, CommonContentIndexerRangeTest, CommonIndexerWithErrorsTest, CommonIndexerNoTool, NoDiskIndexer ) class InjectLicenseIndexer: """Override license computations. """ def compute_license(self, path, log=None): """path is the content identifier """ return { 'licenses': SHA1_TO_LICENSES.get(path) } class FossologyLicenseTestIndexer( NoDiskIndexer, InjectLicenseIndexer, ContentFossologyLicenseIndexer): """Specific fossology license whose configuration is enough to satisfy the indexing checks. """ def prepare(self): self.config = { 'tools': { 'name': 'nomos', 'version': '3.1.0rc2-31-ga2cbb8c', 'configuration': { 'command_line': 'nomossa ', }, }, } self.idx_storage = BasicMockIndexerStorage() self.log = logging.getLogger('swh.indexer') self.objstorage = MockObjStorage() self.tools = self.register_tools(self.config['tools']) self.tool = self.tools[0] class TestFossologyLicenseIndexer(CommonContentIndexerTest, unittest.TestCase): """Language indexer test scenarios: - Known sha1s in the input list have their data indexed - Unknown sha1 in the input list are not indexed """ def setUp(self): self.indexer = FossologyLicenseTestIndexer() self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5' self.id1 = '688a5ef812c53907562fe379d4b3851e69c7cb15' self.id2 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709' # empty content + tool_id = self.indexer.tool['id'] # then self.expected_results = { self.id0: { 'id': self.id0, - 'indexer_configuration_id': 10, + 'indexer_configuration_id': tool_id, 'licenses': SHA1_TO_LICENSES[self.id0], }, self.id1: { 'id': self.id1, - 'indexer_configuration_id': 10, + 'indexer_configuration_id': tool_id, 'licenses': SHA1_TO_LICENSES[self.id1], }, self.id2: { 'id': self.id2, - 'indexer_configuration_id': 10, + 'indexer_configuration_id': tool_id, 'licenses': SHA1_TO_LICENSES[self.id2], } } class FossologyLicenseRangeIndexerTest( NoDiskIndexer, InjectLicenseIndexer, FossologyLicenseRangeIndexer): """Testing the range indexer on fossology license. """ def prepare(self): self.config = { 'tools': { 'name': 'nomos', 'version': '3.1.0rc2-31-ga2cbb8c', 'configuration': { 'command_line': 'nomossa ', }, }, 'write_batch_size': 100, } self.idx_storage = BasicMockIndexerStorage() self.log = logging.getLogger('swh.indexer') # this hardcodes some contents, will use this to setup the storage self.objstorage = MockObjStorage() # sync objstorage and storage contents = [{'sha1': c_id} for c_id in self.objstorage] self.storage = BasicMockStorage(contents) self.tools = self.register_tools(self.config['tools']) self.tool = self.tools[0] class TestFossologyLicenseRangeIndexer( CommonContentIndexerRangeTest, unittest.TestCase): """Range Fossology License Indexer tests. - new data within range are indexed - no data outside a range are indexed - with filtering existing indexed data prior to compute new index - without filtering existing indexed data prior to compute new index """ def setUp(self): self.indexer = FossologyLicenseRangeIndexerTest() # will play along with the objstorage's mocked contents for now self.contents = sorted(self.indexer.objstorage) # FIXME: leverage swh.objstorage.in_memory_storage's # InMemoryObjStorage, swh.storage.tests's gen_contents, and # hypothesis to generate data to actually run indexer on those self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5' self.id1 = '02fb2c89e14f7fab46701478c83779c7beb7b069' self.id2 = '103bc087db1d26afc3a0283f38663d081e9b01e6' + tool_id = self.indexer.tool['id'] self.expected_results = { self.id0: { 'id': self.id0, - 'indexer_configuration_id': 10, + 'indexer_configuration_id': tool_id, 'licenses': SHA1_TO_LICENSES[self.id0] }, self.id1: { 'id': self.id1, - 'indexer_configuration_id': 10, + 'indexer_configuration_id': tool_id, 'licenses': SHA1_TO_LICENSES[self.id1] }, self.id2: { 'id': self.id2, - 'indexer_configuration_id': 10, + 'indexer_configuration_id': tool_id, 'licenses': SHA1_TO_LICENSES[self.id2] } } class FossologyLicenseIndexerUnknownToolTestStorage( CommonIndexerNoTool, FossologyLicenseTestIndexer): """Fossology license indexer with wrong configuration""" class FossologyLicenseRangeIndexerUnknownToolTestStorage( CommonIndexerNoTool, FossologyLicenseRangeIndexerTest): """Fossology license range indexer with wrong configuration""" class TestFossologyLicenseIndexersErrors( CommonIndexerWithErrorsTest, unittest.TestCase): """Test the indexer raise the right errors when wrongly initialized""" Indexer = FossologyLicenseIndexerUnknownToolTestStorage RangeIndexer = FossologyLicenseRangeIndexerUnknownToolTestStorage diff --git a/swh/indexer/tests/test_language.py b/swh/indexer/tests/test_language.py index da03035..ac182ad 100644 --- a/swh/indexer/tests/test_language.py +++ b/swh/indexer/tests/test_language.py @@ -1,98 +1,99 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import logging from swh.indexer import language from swh.indexer.language import ContentLanguageIndexer from swh.indexer.tests.test_utils import ( BasicMockIndexerStorage, MockObjStorage, CommonContentIndexerTest, CommonIndexerWithErrorsTest, CommonIndexerNoTool ) class LanguageTestIndexer(ContentLanguageIndexer): """Specific language whose configuration is enough to satisfy the indexing tests. """ def prepare(self): self.config = { 'tools': { 'name': 'pygments', 'version': '2.0.1+dfsg-1.1+deb8u1', 'configuration': { 'type': 'library', 'debian-package': 'python3-pygments', 'max_content_size': 10240, }, } } self.idx_storage = BasicMockIndexerStorage() self.log = logging.getLogger('swh.indexer') self.objstorage = MockObjStorage() self.tool_config = self.config['tools']['configuration'] self.max_content_size = self.tool_config['max_content_size'] self.tools = self.register_tools(self.config['tools']) self.tool = self.tools[0] class Language(unittest.TestCase): """Tests pygments tool for language detection """ def test_compute_language_none(self): # given self.content = "" self.declared_language = { 'lang': None } # when result = language.compute_language(self.content) # then self.assertEqual(self.declared_language, result) class TestLanguageIndexer(CommonContentIndexerTest, unittest.TestCase): """Language indexer test scenarios: - Known sha1s in the input list have their data indexed - Unknown sha1 in the input list are not indexed """ def setUp(self): self.indexer = LanguageTestIndexer() self.id0 = '02fb2c89e14f7fab46701478c83779c7beb7b069' self.id1 = '103bc087db1d26afc3a0283f38663d081e9b01e6' self.id2 = 'd4c647f0fc257591cc9ba1722484229780d1c607' + tool_id = self.indexer.tool['id'] self.expected_results = { self.id0: { 'id': self.id0, - 'indexer_configuration_id': 10, + 'indexer_configuration_id': tool_id, 'lang': 'python', }, self.id1: { 'id': self.id1, - 'indexer_configuration_id': 10, + 'indexer_configuration_id': tool_id, 'lang': 'c' }, self.id2: { 'id': self.id2, - 'indexer_configuration_id': 10, + 'indexer_configuration_id': tool_id, 'lang': 'text-only' } } class LanguageIndexerUnknownToolTestStorage( CommonIndexerNoTool, LanguageTestIndexer): """Fossology license indexer with wrong configuration""" class TestLanguageIndexersErrors( CommonIndexerWithErrorsTest, unittest.TestCase): """Test the indexer raise the right errors when wrongly initialized""" Indexer = LanguageIndexerUnknownToolTestStorage diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py index cfdcd96..70d2e1d 100644 --- a/swh/indexer/tests/test_mimetype.py +++ b/swh/indexer/tests/test_mimetype.py @@ -1,160 +1,163 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import logging from swh.indexer.mimetype import ( ContentMimetypeIndexer, MimetypeRangeIndexer ) from swh.indexer.tests.test_utils import ( MockObjStorage, BasicMockStorage, BasicMockIndexerStorage, CommonContentIndexerTest, CommonContentIndexerRangeTest, CommonIndexerWithErrorsTest, CommonIndexerNoTool ) class MimetypeTestIndexer(ContentMimetypeIndexer): """Specific mimetype indexer instance whose configuration is enough to satisfy the indexing tests. """ def prepare(self): self.config = { 'tools': { 'name': 'file', 'version': '1:5.30-1+deb9u1', 'configuration': { "type": "library", "debian-package": "python3-magic" }, }, } self.idx_storage = BasicMockIndexerStorage() self.log = logging.getLogger('swh.indexer') self.objstorage = MockObjStorage() self.tools = self.register_tools(self.config['tools']) self.tool = self.tools[0] class TestMimetypeIndexer(CommonContentIndexerTest, unittest.TestCase): """Mimetype indexer test scenarios: - Known sha1s in the input list have their data indexed - Unknown sha1 in the input list are not indexed """ def setUp(self): self.indexer = MimetypeTestIndexer() self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5' self.id1 = '688a5ef812c53907562fe379d4b3851e69c7cb15' self.id2 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709' + tool_id = self.indexer.tool['id'] self.expected_results = { self.id0: { 'id': self.id0, - 'indexer_configuration_id': 10, + 'indexer_configuration_id': tool_id, 'mimetype': b'text/plain', 'encoding': b'us-ascii', }, self.id1: { 'id': self.id1, - 'indexer_configuration_id': 10, + 'indexer_configuration_id': tool_id, 'mimetype': b'text/plain', 'encoding': b'us-ascii', }, self.id2: { 'id': self.id2, - 'indexer_configuration_id': 10, + 'indexer_configuration_id': tool_id, 'mimetype': b'application/x-empty', 'encoding': b'binary', } } class MimetypeRangeIndexerTest(MimetypeRangeIndexer): """Specific mimetype whose configuration is enough to satisfy the indexing tests. """ def prepare(self): self.config = { 'tools': { 'name': 'file', 'version': '1:5.30-1+deb9u1', 'configuration': { "type": "library", "debian-package": "python3-magic" }, }, 'write_batch_size': 100, } self.idx_storage = BasicMockIndexerStorage() self.log = logging.getLogger('swh.indexer') # this hardcodes some contents, will use this to setup the storage self.objstorage = MockObjStorage() # sync objstorage and storage contents = [{'sha1': c_id} for c_id in self.objstorage] self.storage = BasicMockStorage(contents) self.tools = self.register_tools(self.config['tools']) self.tool = self.tools[0] class TestMimetypeRangeIndexer( CommonContentIndexerRangeTest, unittest.TestCase): """Range Mimetype Indexer tests. - new data within range are indexed - no data outside a range are indexed - with filtering existing indexed data prior to compute new index - without filtering existing indexed data prior to compute new index """ def setUp(self): self.indexer = MimetypeRangeIndexerTest() # will play along with the objstorage's mocked contents for now self.contents = sorted(self.indexer.objstorage) # FIXME: leverage swh.objstorage.in_memory_storage's # InMemoryObjStorage, swh.storage.tests's gen_contents, and # hypothesis to generate data to actually run indexer on those self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5' self.id1 = '02fb2c89e14f7fab46701478c83779c7beb7b069' self.id2 = '103bc087db1d26afc3a0283f38663d081e9b01e6' + tool_id = self.indexer.tool['id'] + self.expected_results = { self.id0: { 'encoding': b'us-ascii', 'id': self.id0, - 'indexer_configuration_id': 10, + 'indexer_configuration_id': tool_id, 'mimetype': b'text/plain'}, self.id1: { 'encoding': b'us-ascii', 'id': self.id1, - 'indexer_configuration_id': 10, + 'indexer_configuration_id': tool_id, 'mimetype': b'text/x-python'}, self.id2: { 'encoding': b'us-ascii', 'id': self.id2, - 'indexer_configuration_id': 10, + 'indexer_configuration_id': tool_id, 'mimetype': b'text/plain'} } class MimetypeIndexerUnknownToolTestStorage( CommonIndexerNoTool, MimetypeTestIndexer): """Fossology license indexer with wrong configuration""" class MimetypeRangeIndexerUnknownToolTestStorage( CommonIndexerNoTool, MimetypeRangeIndexerTest): """Fossology license range indexer with wrong configuration""" class TestMimetypeIndexersErrors( CommonIndexerWithErrorsTest, unittest.TestCase): """Test the indexer raise the right errors when wrongly initialized""" Indexer = MimetypeIndexerUnknownToolTestStorage RangeIndexer = MimetypeRangeIndexerUnknownToolTestStorage