Page MenuHomeSoftware Heritage

D210.diff
No OneTemporary

D210.diff

diff --git a/swh/indexer/tests/test_language.py b/swh/indexer/tests/test_language.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/tests/test_language.py
@@ -0,0 +1,112 @@
+# Copyright (C) 2015-2017 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+import logging
+from nose.tools import istest
+from swh.indexer import language
+from swh.indexer.language import ContentLanguageIndexer
+from swh.indexer.tests.test_utils import MockObjStorage
+
+
+class MockStorage():
+ """Mock storage to simplify reading indexers' outputs.
+ """
+ def content_language_add(self, languages, conflict_update=None):
+ self.state = languages
+ self.conflict_update = conflict_update
+
+ def indexer_configuration_get(self, tool):
+ return {
+ 'id': 20,
+ }
+
+
+class TestLanguageIndexer(ContentLanguageIndexer):
+ """Specific language whose configuration is enough to satisfy the
+ indexing tests.
+ """
+ def prepare(self):
+ self.config = {
+ 'destination_queue': None,
+ 'rescheduling_task': None,
+ 'tools': {
+ 'name': 'pygments',
+ 'version': '2.0.1+dfsg-1.1+deb8u1',
+ 'configuration': {
+ 'type': 'library',
+ 'debian-package': 'python3-pygments',
+ 'max_content_size': 10240,
+ },
+ }
+ }
+ self.storage = MockStorage()
+ self.log = logging.getLogger('swh.indexer')
+ self.objstorage = MockObjStorage()
+ self.task_destination = None
+ self.rescheduling_task = self.config['rescheduling_task']
+ self.tool_config = self.config['tools']['configuration']
+ self.max_content_size = self.tool_config['max_content_size']
+ self.tools = self.retrieve_tools_information()
+
+
+class Language(unittest.TestCase):
+ """
+ Tests pygments tool for language detection
+ """
+ def setUp(self):
+ self.maxDiff = None
+
+ @istest
+ def test_compute_language_none(self):
+ # given
+ self.content = ""
+ self.declared_language = {
+ 'lang': None
+ }
+ # when
+ result = language.compute_language(self.content)
+ # then
+ self.assertEqual(self.declared_language, result)
+
+ @istest
+ def test_index_content_language_python(self):
+ # given
+ # testing python
+ sha1s = ['02fb2c89e14f7fab46701478c83779c7beb7b069']
+ lang_indexer = TestLanguageIndexer()
+
+ # when
+ lang_indexer.run(sha1s, policy_update='ignore-dups')
+ results = lang_indexer.storage.state
+
+ expected_results = [{
+ 'id': '02fb2c89e14f7fab46701478c83779c7beb7b069',
+ 'indexer_configuration_id': 20,
+ 'lang': 'python'
+ }]
+ # then
+ self.assertEqual(expected_results, results)
+
+ @istest
+ def test_index_content_language_c(self):
+ # given
+ # testing c
+ sha1s = ['103bc087db1d26afc3a0283f38663d081e9b01e6']
+ lang_indexer = TestLanguageIndexer()
+
+ # when
+ lang_indexer.run(sha1s, policy_update='ignore-dups')
+ results = lang_indexer.storage.state
+
+ expected_results = [{
+ 'id': '103bc087db1d26afc3a0283f38663d081e9b01e6',
+ 'indexer_configuration_id': 20,
+ 'lang': 'c'
+ }]
+
+ # then
+ self.assertEqual('c', results[0]['lang'])
+ self.assertEqual(expected_results, results)
diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py
--- a/swh/indexer/tests/test_mimetype.py
+++ b/swh/indexer/tests/test_mimetype.py
@@ -8,7 +8,8 @@
from nose.tools import istest
from swh.indexer.mimetype import ContentMimetypeIndexer
-from swh.objstorage.exc import ObjNotFoundError
+
+from swh.indexer.tests.test_utils import MockObjStorage
class MockStorage():
@@ -25,29 +26,6 @@
}
-class MockStorageWrongConfiguration():
- def indexer_configuration_get(self, tool):
- return None
-
-
-class MockObjStorage():
- """Mock objstorage with predefined contents.
-
- """
- def __init__(self):
- self.data = {
- '01c9379dfc33803963d07c1ccc748d3fe4c96bb50': b'this is some text',
- '688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text',
- '8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text',
- }
-
- def get(self, sha1):
- raw_content = self.data.get(sha1)
- if not raw_content:
- raise ObjNotFoundError()
- return raw_content
-
-
class TestMimetypeIndexer(ContentMimetypeIndexer):
"""Specific mimetype whose configuration is enough to satisfy the
indexing tests.
diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/tests/test_utils.py
@@ -0,0 +1,62 @@
+
+# Copyright (C) 2017 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.objstorage.exc import ObjNotFoundError
+
+
+class MockStorageWrongConfiguration():
+ def indexer_configuration_get(self, tool):
+ return None
+
+
+class MockObjStorage():
+ """Mock objstorage with predefined contents.
+
+ """
+ def __init__(self):
+ self.data = {
+ '01c9379dfc33803963d07c1ccc748d3fe4c96bb50': b'this is some text',
+ '688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text',
+ '8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text',
+ '02fb2c89e14f7fab46701478c83779c7beb7b069': b"""
+ import unittest
+ import logging
+ from nose.tools import istest
+ from swh.indexer.mimetype import ContentMimetypeIndexer
+ from swh.indexer.tests.test_utils import MockObjStorage
+
+ class MockStorage():
+ def content_mimetype_add(self, mimetypes):
+ self.state = mimetypes
+ self.conflict_update = conflict_update
+
+ def indexer_configuration_get(self, tool):
+ return {
+ 'id': 10,
+ }
+ """,
+ '103bc087db1d26afc3a0283f38663d081e9b01e6': b"""
+ #ifndef __AVL__
+ #define __AVL__
+
+ typedef struct _avl_tree avl_tree;
+
+ typedef struct _data_t {
+ int content;
+ } data_t;
+ """,
+ '93666f74f1cf635c8c8ac118879da6ec5623c410': b"""
+ (should 'pygments (recognize 'lisp 'easily))
+
+ """
+
+ }
+
+ def get(self, sha1):
+ raw_content = self.data.get(sha1)
+ if not raw_content:
+ raise ObjNotFoundError()
+ return raw_content

File Metadata

Mime Type
text/plain
Expires
Jul 3 2025, 6:15 PM (5 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3214830

Event Timeline