Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9348168
D210.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D210.diff
View Options
diff --git a/swh/indexer/tests/test_language.py b/swh/indexer/tests/test_language.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/tests/test_language.py
@@ -0,0 +1,112 @@
+# Copyright (C) 2015-2017 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+import logging
+from nose.tools import istest
+from swh.indexer import language
+from swh.indexer.language import ContentLanguageIndexer
+from swh.indexer.tests.test_utils import MockObjStorage
+
+
+class MockStorage():
+ """Mock storage to simplify reading indexers' outputs.
+ """
+ def content_language_add(self, languages, conflict_update=None):
+ self.state = languages
+ self.conflict_update = conflict_update
+
+ def indexer_configuration_get(self, tool):
+ return {
+ 'id': 20,
+ }
+
+
+class TestLanguageIndexer(ContentLanguageIndexer):
+ """Specific language whose configuration is enough to satisfy the
+ indexing tests.
+ """
+ def prepare(self):
+ self.config = {
+ 'destination_queue': None,
+ 'rescheduling_task': None,
+ 'tools': {
+ 'name': 'pygments',
+ 'version': '2.0.1+dfsg-1.1+deb8u1',
+ 'configuration': {
+ 'type': 'library',
+ 'debian-package': 'python3-pygments',
+ 'max_content_size': 10240,
+ },
+ }
+ }
+ self.storage = MockStorage()
+ self.log = logging.getLogger('swh.indexer')
+ self.objstorage = MockObjStorage()
+ self.task_destination = None
+ self.rescheduling_task = self.config['rescheduling_task']
+ self.tool_config = self.config['tools']['configuration']
+ self.max_content_size = self.tool_config['max_content_size']
+ self.tools = self.retrieve_tools_information()
+
+
+class Language(unittest.TestCase):
+ """
+ Tests pygments tool for language detection
+ """
+ def setUp(self):
+ self.maxDiff = None
+
+ @istest
+ def test_compute_language_none(self):
+ # given
+ self.content = ""
+ self.declared_language = {
+ 'lang': None
+ }
+ # when
+ result = language.compute_language(self.content)
+ # then
+ self.assertEqual(self.declared_language, result)
+
+ @istest
+ def test_index_content_language_python(self):
+ # given
+ # testing python
+ sha1s = ['02fb2c89e14f7fab46701478c83779c7beb7b069']
+ lang_indexer = TestLanguageIndexer()
+
+ # when
+ lang_indexer.run(sha1s, policy_update='ignore-dups')
+ results = lang_indexer.storage.state
+
+ expected_results = [{
+ 'id': '02fb2c89e14f7fab46701478c83779c7beb7b069',
+ 'indexer_configuration_id': 20,
+ 'lang': 'python'
+ }]
+ # then
+ self.assertEqual(expected_results, results)
+
+ @istest
+ def test_index_content_language_c(self):
+ # given
+ # testing c
+ sha1s = ['103bc087db1d26afc3a0283f38663d081e9b01e6']
+ lang_indexer = TestLanguageIndexer()
+
+ # when
+ lang_indexer.run(sha1s, policy_update='ignore-dups')
+ results = lang_indexer.storage.state
+
+ expected_results = [{
+ 'id': '103bc087db1d26afc3a0283f38663d081e9b01e6',
+ 'indexer_configuration_id': 20,
+ 'lang': 'c'
+ }]
+
+ # then
+ self.assertEqual('c', results[0]['lang'])
+ self.assertEqual(expected_results, results)
diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py
--- a/swh/indexer/tests/test_mimetype.py
+++ b/swh/indexer/tests/test_mimetype.py
@@ -8,7 +8,8 @@
from nose.tools import istest
from swh.indexer.mimetype import ContentMimetypeIndexer
-from swh.objstorage.exc import ObjNotFoundError
+
+from swh.indexer.tests.test_utils import MockObjStorage
class MockStorage():
@@ -25,29 +26,6 @@
}
-class MockStorageWrongConfiguration():
- def indexer_configuration_get(self, tool):
- return None
-
-
-class MockObjStorage():
- """Mock objstorage with predefined contents.
-
- """
- def __init__(self):
- self.data = {
- '01c9379dfc33803963d07c1ccc748d3fe4c96bb50': b'this is some text',
- '688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text',
- '8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text',
- }
-
- def get(self, sha1):
- raw_content = self.data.get(sha1)
- if not raw_content:
- raise ObjNotFoundError()
- return raw_content
-
-
class TestMimetypeIndexer(ContentMimetypeIndexer):
"""Specific mimetype whose configuration is enough to satisfy the
indexing tests.
diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/tests/test_utils.py
@@ -0,0 +1,62 @@
+
+# Copyright (C) 2017 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.objstorage.exc import ObjNotFoundError
+
+
+class MockStorageWrongConfiguration():
+ def indexer_configuration_get(self, tool):
+ return None
+
+
+class MockObjStorage():
+ """Mock objstorage with predefined contents.
+
+ """
+ def __init__(self):
+ self.data = {
+ '01c9379dfc33803963d07c1ccc748d3fe4c96bb50': b'this is some text',
+ '688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text',
+ '8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text',
+ '02fb2c89e14f7fab46701478c83779c7beb7b069': b"""
+ import unittest
+ import logging
+ from nose.tools import istest
+ from swh.indexer.mimetype import ContentMimetypeIndexer
+ from swh.indexer.tests.test_utils import MockObjStorage
+
+ class MockStorage():
+ def content_mimetype_add(self, mimetypes):
+ self.state = mimetypes
+ self.conflict_update = conflict_update
+
+ def indexer_configuration_get(self, tool):
+ return {
+ 'id': 10,
+ }
+ """,
+ '103bc087db1d26afc3a0283f38663d081e9b01e6': b"""
+ #ifndef __AVL__
+ #define __AVL__
+
+ typedef struct _avl_tree avl_tree;
+
+ typedef struct _data_t {
+ int content;
+ } data_t;
+ """,
+ '93666f74f1cf635c8c8ac118879da6ec5623c410': b"""
+ (should 'pygments (recognize 'lisp 'easily))
+
+ """
+
+ }
+
+ def get(self, sha1):
+ raw_content = self.data.get(sha1)
+ if not raw_content:
+ raise ObjNotFoundError()
+ return raw_content
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Jul 3 2025, 6:15 PM (5 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3214830
Attached To
D210: Added tests for language indexer (T722)
Event Timeline
Log In to Comment