diff --git a/swh/indexer/tests/test_language.py b/swh/indexer/tests/test_language.py new file mode 100644 index 0000000..84aca34 --- /dev/null +++ b/swh/indexer/tests/test_language.py @@ -0,0 +1,112 @@ +# Copyright (C) 2015-2017 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import unittest +import logging +from nose.tools import istest +from swh.indexer import language +from swh.indexer.language import ContentLanguageIndexer +from swh.indexer.tests.test_utils import MockObjStorage + + +class MockStorage(): + """Mock storage to simplify reading indexers' outputs. + """ + def content_language_add(self, languages, conflict_update=None): + self.state = languages + self.conflict_update = conflict_update + + def indexer_configuration_get(self, tool): + return { + 'id': 20, + } + + +class TestLanguageIndexer(ContentLanguageIndexer): + """Specific language whose configuration is enough to satisfy the + indexing tests. + """ + def prepare(self): + self.config = { + 'destination_queue': None, + 'rescheduling_task': None, + 'tools': { + 'name': 'pygments', + 'version': '2.0.1+dfsg-1.1+deb8u1', + 'configuration': { + 'type': 'library', + 'debian-package': 'python3-pygments', + 'max_content_size': 10240, + }, + } + } + self.storage = MockStorage() + self.log = logging.getLogger('swh.indexer') + self.objstorage = MockObjStorage() + self.task_destination = None + self.rescheduling_task = self.config['rescheduling_task'] + self.tool_config = self.config['tools']['configuration'] + self.max_content_size = self.tool_config['max_content_size'] + self.tools = self.retrieve_tools_information() + + +class Language(unittest.TestCase): + """ + Tests pygments tool for language detection + """ + def setUp(self): + self.maxDiff = None + + @istest + def test_compute_language_none(self): + # given + self.content = "" + self.declared_language = { + 'lang': None + } + # when + result = language.compute_language(self.content) + # then + self.assertEqual(self.declared_language, result) + + @istest + def test_index_content_language_python(self): + # given + # testing python + sha1s = ['02fb2c89e14f7fab46701478c83779c7beb7b069'] + lang_indexer = TestLanguageIndexer() + + # when + lang_indexer.run(sha1s, policy_update='ignore-dups') + results = lang_indexer.storage.state + + expected_results = [{ + 'id': '02fb2c89e14f7fab46701478c83779c7beb7b069', + 'indexer_configuration_id': 20, + 'lang': 'python' + }] + # then + self.assertEqual(expected_results, results) + + @istest + def test_index_content_language_c(self): + # given + # testing c + sha1s = ['103bc087db1d26afc3a0283f38663d081e9b01e6'] + lang_indexer = TestLanguageIndexer() + + # when + lang_indexer.run(sha1s, policy_update='ignore-dups') + results = lang_indexer.storage.state + + expected_results = [{ + 'id': '103bc087db1d26afc3a0283f38663d081e9b01e6', + 'indexer_configuration_id': 20, + 'lang': 'c' + }] + + # then + self.assertEqual('c', results[0]['lang']) + self.assertEqual(expected_results, results) diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py index fd8fa1d..e72fec9 100644 --- a/swh/indexer/tests/test_mimetype.py +++ b/swh/indexer/tests/test_mimetype.py @@ -1,170 +1,148 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import logging from nose.tools import istest from swh.indexer.mimetype import ContentMimetypeIndexer -from swh.objstorage.exc import ObjNotFoundError + +from swh.indexer.tests.test_utils import MockObjStorage class MockStorage(): """Mock storage to simplify reading indexers' outputs. """ def content_mimetype_add(self, mimetypes, conflict_update=None): self.state = mimetypes self.conflict_update = conflict_update def indexer_configuration_get(self, tool): return { 'id': 10, } -class MockStorageWrongConfiguration(): - def indexer_configuration_get(self, tool): - return None - - -class MockObjStorage(): - """Mock objstorage with predefined contents. - - """ - def __init__(self): - self.data = { - '01c9379dfc33803963d07c1ccc748d3fe4c96bb50': b'this is some text', - '688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text', - '8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text', - } - - def get(self, sha1): - raw_content = self.data.get(sha1) - if not raw_content: - raise ObjNotFoundError() - return raw_content - - class TestMimetypeIndexer(ContentMimetypeIndexer): """Specific mimetype whose configuration is enough to satisfy the indexing tests. """ def prepare(self): self.config = { 'destination_queue': None, 'rescheduling_task': None, 'tools': { 'name': 'file', 'version': '5.22', 'configuration': 'file --mime ', }, } self.storage = MockStorage() self.log = logging.getLogger('swh.indexer') self.objstorage = MockObjStorage() self.task_destination = None self.rescheduling_task = self.config['rescheduling_task'] self.destination_queue = self.config['destination_queue'] self.tools = self.retrieve_tools_information() class TestMimetypeIndexerWrongStorage(TestMimetypeIndexer): """Specific mimetype whose configuration is not enough to satisfy the indexing tests. """ def prepare(self): super().prepare() self.tools = None class TestMimetypeIndexerWithErrors(unittest.TestCase): @istest def test_index_fail_because_wrong_tool(self): try: TestMimetypeIndexerWrongStorage() except ValueError: pass else: self.fail('An error should be raised about wrong tool being used.') class TestMimetypeIndexerTest(unittest.TestCase): def setUp(self): self.indexer = TestMimetypeIndexer() @istest def test_index_no_update(self): # given sha1s = ['01c9379dfc33803963d07c1ccc748d3fe4c96bb50', '688a5ef812c53907562fe379d4b3851e69c7cb15'] # when self.indexer.run(sha1s, policy_update='ignore-dups') # then expected_results = [{ 'id': '01c9379dfc33803963d07c1ccc748d3fe4c96bb50', 'indexer_configuration_id': 10, 'mimetype': b'text/plain', 'encoding': b'us-ascii', }, { 'id': '688a5ef812c53907562fe379d4b3851e69c7cb15', 'indexer_configuration_id': 10, 'mimetype': b'text/plain', 'encoding': b'us-ascii', }] self.assertFalse(self.indexer.storage.conflict_update) self.assertEquals(expected_results, self.indexer.storage.state) @istest def test_index_update(self): # given sha1s = ['01c9379dfc33803963d07c1ccc748d3fe4c96bb50', '688a5ef812c53907562fe379d4b3851e69c7cb15'] # when self.indexer.run(sha1s, policy_update='update-dups') # then expected_results = [{ 'id': '01c9379dfc33803963d07c1ccc748d3fe4c96bb50', 'indexer_configuration_id': 10, 'mimetype': b'text/plain', 'encoding': b'us-ascii', }, { 'id': '688a5ef812c53907562fe379d4b3851e69c7cb15', 'indexer_configuration_id': 10, 'mimetype': b'text/plain', 'encoding': b'us-ascii', }] self.assertTrue(self.indexer.storage.conflict_update) self.assertEquals(expected_results, self.indexer.storage.state) @istest def test_index_one_unknown_sha1(self): # given sha1s = ['688a5ef812c53907562fe379d4b3851e69c7cb15', '799a5ef812c53907562fe379d4b3851e69c7cb15', # unknown '800a5ef812c53907562fe379d4b3851e69c7cb15'] # unknown # when self.indexer.run(sha1s, policy_update='update-dups') # then expected_results = [{ 'id': '688a5ef812c53907562fe379d4b3851e69c7cb15', 'indexer_configuration_id': 10, 'mimetype': b'text/plain', 'encoding': b'us-ascii', }] self.assertTrue(self.indexer.storage.conflict_update) self.assertEquals(expected_results, self.indexer.storage.state) diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py new file mode 100644 index 0000000..6f199b9 --- /dev/null +++ b/swh/indexer/tests/test_utils.py @@ -0,0 +1,62 @@ + +# Copyright (C) 2017 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.objstorage.exc import ObjNotFoundError + + +class MockStorageWrongConfiguration(): + def indexer_configuration_get(self, tool): + return None + + +class MockObjStorage(): + """Mock objstorage with predefined contents. + + """ + def __init__(self): + self.data = { + '01c9379dfc33803963d07c1ccc748d3fe4c96bb50': b'this is some text', + '688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text', + '8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text', + '02fb2c89e14f7fab46701478c83779c7beb7b069': b""" + import unittest + import logging + from nose.tools import istest + from swh.indexer.mimetype import ContentMimetypeIndexer + from swh.indexer.tests.test_utils import MockObjStorage + + class MockStorage(): + def content_mimetype_add(self, mimetypes): + self.state = mimetypes + self.conflict_update = conflict_update + + def indexer_configuration_get(self, tool): + return { + 'id': 10, + } + """, + '103bc087db1d26afc3a0283f38663d081e9b01e6': b""" + #ifndef __AVL__ + #define __AVL__ + + typedef struct _avl_tree avl_tree; + + typedef struct _data_t { + int content; + } data_t; + """, + '93666f74f1cf635c8c8ac118879da6ec5623c410': b""" + (should 'pygments (recognize 'lisp 'easily)) + + """ + + } + + def get(self, sha1): + raw_content = self.data.get(sha1) + if not raw_content: + raise ObjNotFoundError() + return raw_content