diff --git a/swh/indexer/tests/test_language.py b/swh/indexer/tests/test_language.py new file mode 100644 --- /dev/null +++ b/swh/indexer/tests/test_language.py @@ -0,0 +1,112 @@ +# Copyright (C) 2015-2017 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import unittest +import logging +from nose.tools import istest +from swh.indexer import language +from swh.indexer.language import ContentLanguageIndexer +from swh.indexer.tests.test_utils import MockObjStorage + + +class MockStorage(): + """Mock storage to simplify reading indexers' outputs. + """ + def content_language_add(self, languages, conflict_update=None): + self.state = languages + self.conflict_update = conflict_update + + def indexer_configuration_get(self, tool): + return { + 'id': 20, + } + + +class TestLanguageIndexer(ContentLanguageIndexer): + """Specific language whose configuration is enough to satisfy the + indexing tests. + """ + def prepare(self): + self.config = { + 'destination_queue': None, + 'rescheduling_task': None, + 'tools': { + 'name': 'pygments', + 'version': '2.0.1+dfsg-1.1+deb8u1', + 'configuration': { + 'type': 'library', + 'debian-package': 'python3-pygments', + 'max_content_size': 10240, + }, + } + } + self.storage = MockStorage() + self.log = logging.getLogger('swh.indexer') + self.objstorage = MockObjStorage() + self.task_destination = None + self.rescheduling_task = self.config['rescheduling_task'] + self.tool_config = self.config['tools']['configuration'] + self.max_content_size = self.tool_config['max_content_size'] + self.tools = self.retrieve_tools_information() + + +class Language(unittest.TestCase): + """ + Tests pygments tool for language detection + """ + def setUp(self): + self.maxDiff = None + + @istest + def test_compute_language_none(self): + # given + self.content = "" + self.declared_language = { + 'lang': None + } + # when + result = language.compute_language(self.content) + # then + self.assertEqual(self.declared_language, result) + + @istest + def test_index_content_language_python(self): + # given + # testing python + sha1s = ['02fb2c89e14f7fab46701478c83779c7beb7b069'] + lang_indexer = TestLanguageIndexer() + + # when + lang_indexer.run(sha1s, policy_update='ignore-dups') + results = lang_indexer.storage.state + + expected_results = [{ + 'id': '02fb2c89e14f7fab46701478c83779c7beb7b069', + 'indexer_configuration_id': 20, + 'lang': 'python' + }] + # then + self.assertEqual(expected_results, results) + + @istest + def test_index_content_language_c(self): + # given + # testing c + sha1s = ['103bc087db1d26afc3a0283f38663d081e9b01e6'] + lang_indexer = TestLanguageIndexer() + + # when + lang_indexer.run(sha1s, policy_update='ignore-dups') + results = lang_indexer.storage.state + + expected_results = [{ + 'id': '103bc087db1d26afc3a0283f38663d081e9b01e6', + 'indexer_configuration_id': 20, + 'lang': 'c' + }] + + # then + self.assertEqual('c', results[0]['lang']) + self.assertEqual(expected_results, results) diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py --- a/swh/indexer/tests/test_mimetype.py +++ b/swh/indexer/tests/test_mimetype.py @@ -8,7 +8,8 @@ from nose.tools import istest from swh.indexer.mimetype import ContentMimetypeIndexer -from swh.objstorage.exc import ObjNotFoundError + +from swh.indexer.tests.test_utils import MockObjStorage class MockStorage(): @@ -25,29 +26,6 @@ } -class MockStorageWrongConfiguration(): - def indexer_configuration_get(self, tool): - return None - - -class MockObjStorage(): - """Mock objstorage with predefined contents. - - """ - def __init__(self): - self.data = { - '01c9379dfc33803963d07c1ccc748d3fe4c96bb50': b'this is some text', - '688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text', - '8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text', - } - - def get(self, sha1): - raw_content = self.data.get(sha1) - if not raw_content: - raise ObjNotFoundError() - return raw_content - - class TestMimetypeIndexer(ContentMimetypeIndexer): """Specific mimetype whose configuration is enough to satisfy the indexing tests. diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py new file mode 100644 --- /dev/null +++ b/swh/indexer/tests/test_utils.py @@ -0,0 +1,62 @@ + +# Copyright (C) 2017 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.objstorage.exc import ObjNotFoundError + + +class MockStorageWrongConfiguration(): + def indexer_configuration_get(self, tool): + return None + + +class MockObjStorage(): + """Mock objstorage with predefined contents. + + """ + def __init__(self): + self.data = { + '01c9379dfc33803963d07c1ccc748d3fe4c96bb50': b'this is some text', + '688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text', + '8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text', + '02fb2c89e14f7fab46701478c83779c7beb7b069': b""" + import unittest + import logging + from nose.tools import istest + from swh.indexer.mimetype import ContentMimetypeIndexer + from swh.indexer.tests.test_utils import MockObjStorage + + class MockStorage(): + def content_mimetype_add(self, mimetypes): + self.state = mimetypes + self.conflict_update = conflict_update + + def indexer_configuration_get(self, tool): + return { + 'id': 10, + } + """, + '103bc087db1d26afc3a0283f38663d081e9b01e6': b""" + #ifndef __AVL__ + #define __AVL__ + + typedef struct _avl_tree avl_tree; + + typedef struct _data_t { + int content; + } data_t; + """, + '93666f74f1cf635c8c8ac118879da6ec5623c410': b""" + (should 'pygments (recognize 'lisp 'easily)) + + """ + + } + + def get(self, sha1): + raw_content = self.data.get(sha1) + if not raw_content: + raise ObjNotFoundError() + return raw_content