diff --git a/swh/indexer/tests/storage/test_converters.py b/swh/indexer/tests/storage/test_converters.py index a5d6bfa..0f59050 100644 --- a/swh/indexer/tests/storage/test_converters.py +++ b/swh/indexer/tests/storage/test_converters.py @@ -1,198 +1,191 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest -from nose.tools import istest from nose.plugins.attrib import attr from swh.indexer.storage import converters @attr('!db') class TestConverters(unittest.TestCase): def setUp(self): self.maxDiff = None - @istest - def ctags_to_db(self): + def test_ctags_to_db(self): input_ctag = { 'id': b'some-id', 'indexer_configuration_id': 100, 'ctags': [ { 'name': 'some-name', 'kind': 'some-kind', 'line': 10, 'lang': 'Yaml', }, { 'name': 'main', 'kind': 'function', 'line': 12, 'lang': 'Yaml', }, ] } expected_ctags = [ { 'id': b'some-id', 'name': 'some-name', 'kind': 'some-kind', 'line': 10, 'lang': 'Yaml', 'indexer_configuration_id': 100, }, { 'id': b'some-id', 'name': 'main', 'kind': 'function', 'line': 12, 'lang': 'Yaml', 'indexer_configuration_id': 100, }] # when actual_ctags = list(converters.ctags_to_db(input_ctag)) # then self.assertEquals(actual_ctags, expected_ctags) - @istest - def db_to_ctags(self): + def test_db_to_ctags(self): input_ctags = { 'id': b'some-id', 'name': 'some-name', 'kind': 'some-kind', 'line': 10, 'lang': 'Yaml', 'tool_id': 200, 'tool_name': 'some-toolname', 'tool_version': 'some-toolversion', 'tool_configuration': {} } expected_ctags = { 'id': b'some-id', 'name': 'some-name', 'kind': 'some-kind', 'line': 10, 'lang': 'Yaml', 'tool': { 'id': 200, 'name': 'some-toolname', 'version': 'some-toolversion', 'configuration': {}, } } # when actual_ctags = converters.db_to_ctags(input_ctags) # then self.assertEquals(actual_ctags, expected_ctags) - @istest - def db_to_mimetype(self): + def test_db_to_mimetype(self): input_mimetype = { 'id': b'some-id', 'tool_id': 10, 'tool_name': 'some-toolname', 'tool_version': 'some-toolversion', 'tool_configuration': {}, 'encoding': b'ascii', 'mimetype': b'text/plain', } expected_mimetype = { 'id': b'some-id', 'encoding': b'ascii', 'mimetype': b'text/plain', 'tool': { 'id': 10, 'name': 'some-toolname', 'version': 'some-toolversion', 'configuration': {}, } } actual_mimetype = converters.db_to_mimetype(input_mimetype) self.assertEquals(actual_mimetype, expected_mimetype) - @istest - def db_to_language(self): + def test_db_to_language(self): input_language = { 'id': b'some-id', 'tool_id': 20, 'tool_name': 'some-toolname', 'tool_version': 'some-toolversion', 'tool_configuration': {}, 'lang': b'css', } expected_language = { 'id': b'some-id', 'lang': b'css', 'tool': { 'id': 20, 'name': 'some-toolname', 'version': 'some-toolversion', 'configuration': {}, } } actual_language = converters.db_to_language(input_language) self.assertEquals(actual_language, expected_language) - @istest - def db_to_fossology_license(self): + def test_db_to_fossology_license(self): input_license = { 'id': b'some-id', 'tool_id': 20, 'tool_name': 'nomossa', 'tool_version': '5.22', 'tool_configuration': {}, 'licenses': ['GPL2.0'], } expected_license = { 'licenses': ['GPL2.0'], 'tool': { 'id': 20, 'name': 'nomossa', 'version': '5.22', 'configuration': {}, } } actual_license = converters.db_to_fossology_license(input_license) self.assertEquals(actual_license, expected_license) - @istest - def db_to_metadata(self): + def test_db_to_metadata(self): input_metadata = { 'id': b'some-id', 'tool_id': 20, 'tool_name': 'some-toolname', 'tool_version': 'some-toolversion', 'tool_configuration': {}, 'translated_metadata': b'translated_metadata', } expected_metadata = { 'id': b'some-id', 'translated_metadata': b'translated_metadata', 'tool': { 'id': 20, 'name': 'some-toolname', 'version': 'some-toolversion', 'configuration': {}, } } actual_metadata = converters.db_to_metadata(input_metadata) self.assertEquals(actual_metadata, expected_metadata) diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py index 470c470..d284afc 100644 --- a/swh/indexer/tests/storage/test_storage.py +++ b/swh/indexer/tests/storage/test_storage.py @@ -1,1484 +1,1451 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pathlib import unittest -from nose.tools import istest from nose.plugins.attrib import attr from swh.model.hashutil import hash_to_bytes from swh.indexer.storage import get_indexer_storage from swh.core.tests.db_testing import DbTestFixture from swh.indexer.tests import DATA_DIR class StorageTestFixture: """Mix this in a test subject class to get Storage testing support. This fixture requires to come before DbTestFixture in the inheritance list as it uses its methods to setup its own internal database. Usage example: class TestStorage(StorageTestFixture, DbTestFixture): ... """ TEST_STORAGE_DB_NAME = 'softwareheritage-test-indexer' @classmethod def setUpClass(cls): if not hasattr(cls, 'DB_TEST_FIXTURE_IMPORTED'): raise RuntimeError("StorageTestFixture needs to be followed by " "DbTestFixture in the inheritance list.") test_data_dir = pathlib.Path(DATA_DIR).absolute() test_db_dump = test_data_dir / 'dumps/swh-indexer.sql' cls.add_db(cls.TEST_STORAGE_DB_NAME, str(test_db_dump), 'psql') super().setUpClass() def setUp(self): super().setUp() self.storage_config = { 'cls': 'local', 'args': { 'db': 'dbname=%s' % self.TEST_STORAGE_DB_NAME, }, } self.storage = get_indexer_storage(**self.storage_config) def tearDown(self): self.storage = None super().tearDown() def reset_storage_tables(self): excluded = {'indexer_configuration'} self.reset_db_tables(self.TEST_STORAGE_DB_NAME, excluded=excluded) db = self.test_db[self.TEST_STORAGE_DB_NAME] db.conn.commit() @attr('db') class BaseTestStorage(StorageTestFixture, DbTestFixture): def setUp(self): super().setUp() self.sha1_1 = hash_to_bytes('34973274ccef6ab4dfaaf86599792fa9c3fe4689') self.sha1_2 = hash_to_bytes('61c2b3a30496d329e21af70dd2d7e097046d07b7') self.revision_id_1 = hash_to_bytes( '7026b7c1a2af56521e951c01ed20f255fa054238') self.revision_id_2 = hash_to_bytes( '7026b7c1a2af56521e9587659012345678904321') cur = self.test_db[self.TEST_STORAGE_DB_NAME].cursor tools = {} cur.execute(''' select tool_name, id, tool_version, tool_configuration from indexer_configuration order by id''') for row in cur.fetchall(): key = row[0] while key in tools: key = '_' + key tools[key] = { 'id': row[1], 'name': row[0], 'version': row[2], 'configuration': row[3] } self.tools = tools def tearDown(self): self.reset_storage_tables() super().tearDown() @attr('db') class CommonTestStorage(BaseTestStorage): """Base class for Indexer Storage testing. """ - @istest - def check_config(self): + def test_check_config(self): self.assertTrue(self.storage.check_config(check_write=True)) self.assertTrue(self.storage.check_config(check_write=False)) - @istest - def content_mimetype_missing(self): + def test_content_mimetype_missing(self): # given tool_id = self.tools['file']['id'] mimetypes = [ { 'id': self.sha1_1, 'indexer_configuration_id': tool_id, }, { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, }] # when actual_missing = self.storage.content_mimetype_missing(mimetypes) # then self.assertEqual(list(actual_missing), [ self.sha1_1, self.sha1_2, ]) # given self.storage.content_mimetype_add([{ 'id': self.sha1_2, 'mimetype': b'text/plain', 'encoding': b'utf-8', 'indexer_configuration_id': tool_id, }]) # when actual_missing = self.storage.content_mimetype_missing(mimetypes) # then self.assertEqual(list(actual_missing), [self.sha1_1]) - @istest - def content_mimetype_add__drop_duplicate(self): + def test_content_mimetype_add__drop_duplicate(self): # given tool_id = self.tools['file']['id'] mimetype_v1 = { 'id': self.sha1_2, 'mimetype': b'text/plain', 'encoding': b'utf-8', 'indexer_configuration_id': tool_id, } # given self.storage.content_mimetype_add([mimetype_v1]) # when actual_mimetypes = list(self.storage.content_mimetype_get( [self.sha1_2])) # then expected_mimetypes_v1 = [{ 'id': self.sha1_2, 'mimetype': b'text/plain', 'encoding': b'utf-8', 'tool': self.tools['file'], }] self.assertEqual(actual_mimetypes, expected_mimetypes_v1) # given mimetype_v2 = mimetype_v1.copy() mimetype_v2.update({ 'mimetype': b'text/html', 'encoding': b'us-ascii', }) self.storage.content_mimetype_add([mimetype_v2]) actual_mimetypes = list(self.storage.content_mimetype_get( [self.sha1_2])) # mimetype did not change as the v2 was dropped. self.assertEqual(actual_mimetypes, expected_mimetypes_v1) - @istest - def content_mimetype_add__update_in_place_duplicate(self): + def test_content_mimetype_add__update_in_place_duplicate(self): # given tool_id = self.tools['file']['id'] mimetype_v1 = { 'id': self.sha1_2, 'mimetype': b'text/plain', 'encoding': b'utf-8', 'indexer_configuration_id': tool_id, } # given self.storage.content_mimetype_add([mimetype_v1]) # when actual_mimetypes = list(self.storage.content_mimetype_get( [self.sha1_2])) expected_mimetypes_v1 = [{ 'id': self.sha1_2, 'mimetype': b'text/plain', 'encoding': b'utf-8', 'tool': self.tools['file'], }] # then self.assertEqual(actual_mimetypes, expected_mimetypes_v1) # given mimetype_v2 = mimetype_v1.copy() mimetype_v2.update({ 'mimetype': b'text/html', 'encoding': b'us-ascii', }) self.storage.content_mimetype_add([mimetype_v2], conflict_update=True) actual_mimetypes = list(self.storage.content_mimetype_get( [self.sha1_2])) expected_mimetypes_v2 = [{ 'id': self.sha1_2, 'mimetype': b'text/html', 'encoding': b'us-ascii', 'tool': { 'id': 2, 'name': 'file', 'version': '5.22', 'configuration': {'command_line': 'file --mime '} } }] # mimetype did change as the v2 was used to overwrite v1 self.assertEqual(actual_mimetypes, expected_mimetypes_v2) - @istest - def content_mimetype_get(self): + def test_content_mimetype_get(self): # given tool_id = self.tools['file']['id'] mimetypes = [self.sha1_2, self.sha1_1] mimetype1 = { 'id': self.sha1_2, 'mimetype': b'text/plain', 'encoding': b'utf-8', 'indexer_configuration_id': tool_id, } # when self.storage.content_mimetype_add([mimetype1]) # then actual_mimetypes = list(self.storage.content_mimetype_get(mimetypes)) # then expected_mimetypes = [{ 'id': self.sha1_2, 'mimetype': b'text/plain', 'encoding': b'utf-8', 'tool': self.tools['file'] }] self.assertEqual(actual_mimetypes, expected_mimetypes) - @istest - def content_language_missing(self): + def test_content_language_missing(self): # given tool_id = self.tools['pygments']['id'] languages = [ { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, }, { 'id': self.sha1_1, 'indexer_configuration_id': tool_id, } ] # when actual_missing = list(self.storage.content_language_missing(languages)) # then self.assertEqual(list(actual_missing), [ self.sha1_2, self.sha1_1, ]) # given self.storage.content_language_add([{ 'id': self.sha1_2, 'lang': 'haskell', 'indexer_configuration_id': tool_id, }]) # when actual_missing = list(self.storage.content_language_missing(languages)) # then self.assertEqual(actual_missing, [self.sha1_1]) - @istest - def content_language_get(self): + def test_content_language_get(self): # given tool_id = self.tools['pygments']['id'] language1 = { 'id': self.sha1_2, 'lang': 'common-lisp', 'indexer_configuration_id': tool_id, } # when self.storage.content_language_add([language1]) # then actual_languages = list(self.storage.content_language_get( [self.sha1_2, self.sha1_1])) # then expected_languages = [{ 'id': self.sha1_2, 'lang': 'common-lisp', 'tool': self.tools['pygments'] }] self.assertEqual(actual_languages, expected_languages) - @istest - def content_language_add__drop_duplicate(self): + def test_content_language_add__drop_duplicate(self): # given tool_id = self.tools['pygments']['id'] language_v1 = { 'id': self.sha1_2, 'lang': 'emacslisp', 'indexer_configuration_id': tool_id, } # given self.storage.content_language_add([language_v1]) # when actual_languages = list(self.storage.content_language_get( [self.sha1_2])) # then expected_languages_v1 = [{ 'id': self.sha1_2, 'lang': 'emacslisp', 'tool': self.tools['pygments'] }] self.assertEqual(actual_languages, expected_languages_v1) # given language_v2 = language_v1.copy() language_v2.update({ 'lang': 'common-lisp', }) self.storage.content_language_add([language_v2]) actual_languages = list(self.storage.content_language_get( [self.sha1_2])) # language did not change as the v2 was dropped. self.assertEqual(actual_languages, expected_languages_v1) - @istest - def content_language_add__update_in_place_duplicate(self): + def test_content_language_add__update_in_place_duplicate(self): # given tool_id = self.tools['pygments']['id'] language_v1 = { 'id': self.sha1_2, 'lang': 'common-lisp', 'indexer_configuration_id': tool_id, } # given self.storage.content_language_add([language_v1]) # when actual_languages = list(self.storage.content_language_get( [self.sha1_2])) # then expected_languages_v1 = [{ 'id': self.sha1_2, 'lang': 'common-lisp', 'tool': self.tools['pygments'] }] self.assertEqual(actual_languages, expected_languages_v1) # given language_v2 = language_v1.copy() language_v2.update({ 'lang': 'emacslisp', }) self.storage.content_language_add([language_v2], conflict_update=True) actual_languages = list(self.storage.content_language_get( [self.sha1_2])) # language did not change as the v2 was dropped. expected_languages_v2 = [{ 'id': self.sha1_2, 'lang': 'emacslisp', 'tool': self.tools['pygments'] }] # language did change as the v2 was used to overwrite v1 self.assertEqual(actual_languages, expected_languages_v2) - @istest - def content_ctags_missing(self): + def test_content_ctags_missing(self): # given tool_id = self.tools['universal-ctags']['id'] ctags = [ { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, }, { 'id': self.sha1_1, 'indexer_configuration_id': tool_id, } ] # when actual_missing = self.storage.content_ctags_missing(ctags) # then self.assertEqual(list(actual_missing), [ self.sha1_2, self.sha1_1 ]) # given self.storage.content_ctags_add([ { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, 'ctags': [{ 'name': 'done', 'kind': 'variable', 'line': 119, 'lang': 'OCaml', }] }, ]) # when actual_missing = self.storage.content_ctags_missing(ctags) # then self.assertEqual(list(actual_missing), [self.sha1_1]) - @istest - def content_ctags_get(self): + def test_content_ctags_get(self): # given tool_id = self.tools['universal-ctags']['id'] ctags = [self.sha1_2, self.sha1_1] ctag1 = { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, 'ctags': [ { 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Python', }, { 'name': 'main', 'kind': 'function', 'line': 119, 'lang': 'Python', }] } # when self.storage.content_ctags_add([ctag1]) # then actual_ctags = list(self.storage.content_ctags_get(ctags)) # then expected_ctags = [ { 'id': self.sha1_2, 'tool': self.tools['universal-ctags'], 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Python', }, { 'id': self.sha1_2, 'tool': self.tools['universal-ctags'], 'name': 'main', 'kind': 'function', 'line': 119, 'lang': 'Python', } ] self.assertEqual(actual_ctags, expected_ctags) - @istest - def content_ctags_search(self): + def test_content_ctags_search(self): # 1. given tool = self.tools['universal-ctags'] tool_id = tool['id'] ctag1 = { 'id': self.sha1_1, 'indexer_configuration_id': tool_id, 'ctags': [ { 'name': 'hello', 'kind': 'function', 'line': 133, 'lang': 'Python', }, { 'name': 'counter', 'kind': 'variable', 'line': 119, 'lang': 'Python', }, ] } ctag2 = { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, 'ctags': [ { 'name': 'hello', 'kind': 'variable', 'line': 100, 'lang': 'C', }, ] } self.storage.content_ctags_add([ctag1, ctag2]) # 1. when actual_ctags = list(self.storage.content_ctags_search('hello', limit=1)) # 1. then self.assertEqual(actual_ctags, [ { 'id': ctag1['id'], 'tool': tool, 'name': 'hello', 'kind': 'function', 'line': 133, 'lang': 'Python', } ]) # 2. when actual_ctags = list(self.storage.content_ctags_search( 'hello', limit=1, last_sha1=ctag1['id'])) # 2. then self.assertEqual(actual_ctags, [ { 'id': ctag2['id'], 'tool': tool, 'name': 'hello', 'kind': 'variable', 'line': 100, 'lang': 'C', } ]) # 3. when actual_ctags = list(self.storage.content_ctags_search('hello')) # 3. then self.assertEqual(actual_ctags, [ { 'id': ctag1['id'], 'tool': tool, 'name': 'hello', 'kind': 'function', 'line': 133, 'lang': 'Python', }, { 'id': ctag2['id'], 'tool': tool, 'name': 'hello', 'kind': 'variable', 'line': 100, 'lang': 'C', }, ]) # 4. when actual_ctags = list(self.storage.content_ctags_search('counter')) # then self.assertEqual(actual_ctags, [{ 'id': ctag1['id'], 'tool': tool, 'name': 'counter', 'kind': 'variable', 'line': 119, 'lang': 'Python', }]) - @istest - def content_ctags_search_no_result(self): + def test_content_ctags_search_no_result(self): actual_ctags = list(self.storage.content_ctags_search('counter')) self.assertEquals(actual_ctags, []) - @istest - def content_ctags_add__add_new_ctags_added(self): + def test_content_ctags_add__add_new_ctags_added(self): # given tool = self.tools['universal-ctags'] tool_id = tool['id'] ctag_v1 = { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, 'ctags': [{ 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Scheme', }] } # given self.storage.content_ctags_add([ctag_v1]) self.storage.content_ctags_add([ctag_v1]) # conflict does nothing # when actual_ctags = list(self.storage.content_ctags_get( [self.sha1_2])) # then expected_ctags = [{ 'id': self.sha1_2, 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Scheme', 'tool': tool, }] self.assertEqual(actual_ctags, expected_ctags) # given ctag_v2 = ctag_v1.copy() ctag_v2.update({ 'ctags': [ { 'name': 'defn', 'kind': 'function', 'line': 120, 'lang': 'Scheme', } ] }) self.storage.content_ctags_add([ctag_v2]) expected_ctags = [ { 'id': self.sha1_2, 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Scheme', 'tool': tool, }, { 'id': self.sha1_2, 'name': 'defn', 'kind': 'function', 'line': 120, 'lang': 'Scheme', 'tool': tool, } ] actual_ctags = list(self.storage.content_ctags_get( [self.sha1_2])) self.assertEqual(actual_ctags, expected_ctags) - @istest - def content_ctags_add__update_in_place(self): + def test_content_ctags_add__update_in_place(self): # given tool = self.tools['universal-ctags'] tool_id = tool['id'] ctag_v1 = { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, 'ctags': [{ 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Scheme', }] } # given self.storage.content_ctags_add([ctag_v1]) # when actual_ctags = list(self.storage.content_ctags_get( [self.sha1_2])) # then expected_ctags = [ { 'id': self.sha1_2, 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Scheme', 'tool': tool } ] self.assertEqual(actual_ctags, expected_ctags) # given ctag_v2 = ctag_v1.copy() ctag_v2.update({ 'ctags': [ { 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Scheme', }, { 'name': 'defn', 'kind': 'function', 'line': 120, 'lang': 'Scheme', } ] }) self.storage.content_ctags_add([ctag_v2], conflict_update=True) actual_ctags = list(self.storage.content_ctags_get( [self.sha1_2])) # ctag did change as the v2 was used to overwrite v1 expected_ctags = [ { 'id': self.sha1_2, 'name': 'done', 'kind': 'variable', 'line': 100, 'lang': 'Scheme', 'tool': tool, }, { 'id': self.sha1_2, 'name': 'defn', 'kind': 'function', 'line': 120, 'lang': 'Scheme', 'tool': tool, } ] self.assertEqual(actual_ctags, expected_ctags) - @istest - def content_fossology_license_get(self): + def test_content_fossology_license_get(self): # given tool = self.tools['nomos'] tool_id = tool['id'] license1 = { 'id': self.sha1_1, 'licenses': ['GPL-2.0+'], 'indexer_configuration_id': tool_id, } # when self.storage.content_fossology_license_add([license1]) # then actual_licenses = list(self.storage.content_fossology_license_get( [self.sha1_2, self.sha1_1])) expected_license = { self.sha1_1: [{ 'licenses': ['GPL-2.0+'], 'tool': tool, }] } # then self.assertEqual(actual_licenses, [expected_license]) - @istest - def content_fossology_license_add__new_license_added(self): + def test_content_fossology_license_add__new_license_added(self): # given tool = self.tools['nomos'] tool_id = tool['id'] license_v1 = { 'id': self.sha1_1, 'licenses': ['Apache-2.0'], 'indexer_configuration_id': tool_id, } # given self.storage.content_fossology_license_add([license_v1]) # conflict does nothing self.storage.content_fossology_license_add([license_v1]) # when actual_licenses = list(self.storage.content_fossology_license_get( [self.sha1_1])) # then expected_license = { self.sha1_1: [{ 'licenses': ['Apache-2.0'], 'tool': tool, }] } self.assertEqual(actual_licenses, [expected_license]) # given license_v2 = license_v1.copy() license_v2.update({ 'licenses': ['BSD-2-Clause'], }) self.storage.content_fossology_license_add([license_v2]) actual_licenses = list(self.storage.content_fossology_license_get( [self.sha1_1])) expected_license = { self.sha1_1: [{ 'licenses': ['Apache-2.0', 'BSD-2-Clause'], 'tool': tool }] } # license did not change as the v2 was dropped. self.assertEqual(actual_licenses, [expected_license]) - @istest - def content_fossology_license_add__update_in_place_duplicate(self): + def test_content_fossology_license_add__update_in_place_duplicate(self): # given tool = self.tools['nomos'] tool_id = tool['id'] license_v1 = { 'id': self.sha1_1, 'licenses': ['CECILL'], 'indexer_configuration_id': tool_id, } # given self.storage.content_fossology_license_add([license_v1]) # conflict does nothing self.storage.content_fossology_license_add([license_v1]) # when actual_licenses = list(self.storage.content_fossology_license_get( [self.sha1_1])) # then expected_license = { self.sha1_1: [{ 'licenses': ['CECILL'], 'tool': tool, }] } self.assertEqual(actual_licenses, [expected_license]) # given license_v2 = license_v1.copy() license_v2.update({ 'licenses': ['CECILL-2.0'] }) self.storage.content_fossology_license_add([license_v2], conflict_update=True) actual_licenses = list(self.storage.content_fossology_license_get( [self.sha1_1])) # license did change as the v2 was used to overwrite v1 expected_license = { self.sha1_1: [{ 'licenses': ['CECILL-2.0'], 'tool': tool, }] } self.assertEqual(actual_licenses, [expected_license]) - @istest - def content_metadata_missing(self): + def test_content_metadata_missing(self): # given tool_id = self.tools['swh-metadata-translator']['id'] metadata = [ { 'id': self.sha1_2, 'indexer_configuration_id': tool_id, }, { 'id': self.sha1_1, 'indexer_configuration_id': tool_id, } ] # when actual_missing = list(self.storage.content_metadata_missing(metadata)) # then self.assertEqual(list(actual_missing), [ self.sha1_2, self.sha1_1, ]) # given self.storage.content_metadata_add([{ 'id': self.sha1_2, 'translated_metadata': { 'other': {}, 'codeRepository': { 'type': 'git', 'url': 'https://github.com/moranegg/metadata_test' }, 'description': 'Simple package.json test for indexer', 'name': 'test_metadata', 'version': '0.0.1' }, 'indexer_configuration_id': tool_id }]) # when actual_missing = list(self.storage.content_metadata_missing(metadata)) # then self.assertEqual(actual_missing, [self.sha1_1]) - @istest - def content_metadata_get(self): + def test_content_metadata_get(self): # given tool_id = self.tools['swh-metadata-translator']['id'] metadata1 = { 'id': self.sha1_2, 'translated_metadata': { 'other': {}, 'codeRepository': { 'type': 'git', 'url': 'https://github.com/moranegg/metadata_test' }, 'description': 'Simple package.json test for indexer', 'name': 'test_metadata', 'version': '0.0.1' }, 'indexer_configuration_id': tool_id, } # when self.storage.content_metadata_add([metadata1]) # then actual_metadata = list(self.storage.content_metadata_get( [self.sha1_2, self.sha1_1])) expected_metadata = [{ 'id': self.sha1_2, 'translated_metadata': { 'other': {}, 'codeRepository': { 'type': 'git', 'url': 'https://github.com/moranegg/metadata_test' }, 'description': 'Simple package.json test for indexer', 'name': 'test_metadata', 'version': '0.0.1' }, 'tool': self.tools['swh-metadata-translator'] }] self.assertEqual(actual_metadata, expected_metadata) - @istest - def content_metadata_add_drop_duplicate(self): + def test_content_metadata_add_drop_duplicate(self): # given tool_id = self.tools['swh-metadata-translator']['id'] metadata_v1 = { 'id': self.sha1_2, 'translated_metadata': { 'other': {}, 'name': 'test_metadata', 'version': '0.0.1' }, 'indexer_configuration_id': tool_id, } # given self.storage.content_metadata_add([metadata_v1]) # when actual_metadata = list(self.storage.content_metadata_get( [self.sha1_2])) expected_metadata_v1 = [{ 'id': self.sha1_2, 'translated_metadata': { 'other': {}, 'name': 'test_metadata', 'version': '0.0.1' }, 'tool': self.tools['swh-metadata-translator'] }] self.assertEqual(actual_metadata, expected_metadata_v1) # given metadata_v2 = metadata_v1.copy() metadata_v2.update({ 'translated_metadata': { 'other': {}, 'name': 'test_drop_duplicated_metadata', 'version': '0.0.1' }, }) self.storage.content_metadata_add([metadata_v2]) # then actual_metadata = list(self.storage.content_metadata_get( [self.sha1_2])) # metadata did not change as the v2 was dropped. self.assertEqual(actual_metadata, expected_metadata_v1) - @istest - def content_metadata_add_update_in_place_duplicate(self): + def test_content_metadata_add_update_in_place_duplicate(self): # given tool_id = self.tools['swh-metadata-translator']['id'] metadata_v1 = { 'id': self.sha1_2, 'translated_metadata': { 'other': {}, 'name': 'test_metadata', 'version': '0.0.1' }, 'indexer_configuration_id': tool_id, } # given self.storage.content_metadata_add([metadata_v1]) # when actual_metadata = list(self.storage.content_metadata_get( [self.sha1_2])) # then expected_metadata_v1 = [{ 'id': self.sha1_2, 'translated_metadata': { 'other': {}, 'name': 'test_metadata', 'version': '0.0.1' }, 'tool': self.tools['swh-metadata-translator'] }] self.assertEqual(actual_metadata, expected_metadata_v1) # given metadata_v2 = metadata_v1.copy() metadata_v2.update({ 'translated_metadata': { 'other': {}, 'name': 'test_update_duplicated_metadata', 'version': '0.0.1' }, }) self.storage.content_metadata_add([metadata_v2], conflict_update=True) actual_metadata = list(self.storage.content_metadata_get( [self.sha1_2])) # language did not change as the v2 was dropped. expected_metadata_v2 = [{ 'id': self.sha1_2, 'translated_metadata': { 'other': {}, 'name': 'test_update_duplicated_metadata', 'version': '0.0.1' }, 'tool': self.tools['swh-metadata-translator'] }] # metadata did change as the v2 was used to overwrite v1 self.assertEqual(actual_metadata, expected_metadata_v2) - @istest - def revision_metadata_missing(self): + def test_revision_metadata_missing(self): # given tool_id = self.tools['swh-metadata-detector']['id'] metadata = [ { 'id': self.revision_id_1, 'indexer_configuration_id': tool_id, }, { 'id': self.revision_id_2, 'indexer_configuration_id': tool_id, } ] # when actual_missing = list(self.storage.revision_metadata_missing( metadata)) # then self.assertEqual(list(actual_missing), [ self.revision_id_1, self.revision_id_2, ]) # given self.storage.revision_metadata_add([{ 'id': self.revision_id_1, 'translated_metadata': { 'developmentStatus': None, 'version': None, 'operatingSystem': None, 'description': None, 'keywords': None, 'issueTracker': None, 'name': None, 'author': None, 'relatedLink': None, 'url': None, 'type': None, 'license': None, 'maintainer': None, 'email': None, 'softwareRequirements': None, 'identifier': None }, 'indexer_configuration_id': tool_id }]) # when actual_missing = list(self.storage.revision_metadata_missing( metadata)) # then self.assertEqual(actual_missing, [self.revision_id_2]) - @istest - def revision_metadata_get(self): + def test_revision_metadata_get(self): # given tool_id = self.tools['swh-metadata-detector']['id'] metadata_rev = { 'id': self.revision_id_2, 'translated_metadata': { 'developmentStatus': None, 'version': None, 'operatingSystem': None, 'description': None, 'keywords': None, 'issueTracker': None, 'name': None, 'author': None, 'relatedLink': None, 'url': None, 'type': None, 'license': None, 'maintainer': None, 'email': None, 'softwareRequirements': None, 'identifier': None }, 'indexer_configuration_id': tool_id } # when self.storage.revision_metadata_add([metadata_rev]) # then actual_metadata = list(self.storage.revision_metadata_get( [self.revision_id_2, self.revision_id_1])) expected_metadata = [{ 'id': self.revision_id_2, 'translated_metadata': metadata_rev['translated_metadata'], 'tool': self.tools['swh-metadata-detector'] }] self.assertEqual(actual_metadata, expected_metadata) - @istest - def revision_metadata_add_drop_duplicate(self): + def test_revision_metadata_add_drop_duplicate(self): # given tool_id = self.tools['swh-metadata-detector']['id'] metadata_v1 = { 'id': self.revision_id_1, 'translated_metadata': { 'developmentStatus': None, 'version': None, 'operatingSystem': None, 'description': None, 'keywords': None, 'issueTracker': None, 'name': None, 'author': None, 'relatedLink': None, 'url': None, 'type': None, 'license': None, 'maintainer': None, 'email': None, 'softwareRequirements': None, 'identifier': None }, 'indexer_configuration_id': tool_id, } # given self.storage.revision_metadata_add([metadata_v1]) # when actual_metadata = list(self.storage.revision_metadata_get( [self.revision_id_1])) expected_metadata_v1 = [{ 'id': self.revision_id_1, 'translated_metadata': metadata_v1['translated_metadata'], 'tool': self.tools['swh-metadata-detector'] }] self.assertEqual(actual_metadata, expected_metadata_v1) # given metadata_v2 = metadata_v1.copy() metadata_v2.update({ 'translated_metadata': { 'name': 'test_metadata', 'author': 'MG', }, }) self.storage.revision_metadata_add([metadata_v2]) # then actual_metadata = list(self.storage.revision_metadata_get( [self.revision_id_1])) # metadata did not change as the v2 was dropped. self.assertEqual(actual_metadata, expected_metadata_v1) - @istest - def revision_metadata_add_update_in_place_duplicate(self): + def test_revision_metadata_add_update_in_place_duplicate(self): # given tool_id = self.tools['swh-metadata-detector']['id'] metadata_v1 = { 'id': self.revision_id_2, 'translated_metadata': { 'developmentStatus': None, 'version': None, 'operatingSystem': None, 'description': None, 'keywords': None, 'issueTracker': None, 'name': None, 'author': None, 'relatedLink': None, 'url': None, 'type': None, 'license': None, 'maintainer': None, 'email': None, 'softwareRequirements': None, 'identifier': None }, 'indexer_configuration_id': tool_id, } # given self.storage.revision_metadata_add([metadata_v1]) # when actual_metadata = list(self.storage.revision_metadata_get( [self.revision_id_2])) # then expected_metadata_v1 = [{ 'id': self.revision_id_2, 'translated_metadata': metadata_v1['translated_metadata'], 'tool': self.tools['swh-metadata-detector'] }] self.assertEqual(actual_metadata, expected_metadata_v1) # given metadata_v2 = metadata_v1.copy() metadata_v2.update({ 'translated_metadata': { 'name': 'test_update_duplicated_metadata', 'author': 'MG' }, }) self.storage.revision_metadata_add([metadata_v2], conflict_update=True) actual_metadata = list(self.storage.revision_metadata_get( [self.revision_id_2])) # language did not change as the v2 was dropped. expected_metadata_v2 = [{ 'id': self.revision_id_2, 'translated_metadata': metadata_v2['translated_metadata'], 'tool': self.tools['swh-metadata-detector'] }] # metadata did change as the v2 was used to overwrite v1 self.assertEqual(actual_metadata, expected_metadata_v2) - @istest - def indexer_configuration_add(self): + def test_indexer_configuration_add(self): tool = { 'tool_name': 'some-unknown-tool', 'tool_version': 'some-version', 'tool_configuration': {"debian-package": "some-package"}, } actual_tool = self.storage.indexer_configuration_get(tool) self.assertIsNone(actual_tool) # does not exist # add it actual_tools = list(self.storage.indexer_configuration_add([tool])) self.assertEquals(len(actual_tools), 1) actual_tool = actual_tools[0] self.assertIsNotNone(actual_tool) # now it exists new_id = actual_tool.pop('id') self.assertEquals(actual_tool, tool) actual_tools2 = list(self.storage.indexer_configuration_add([tool])) actual_tool2 = actual_tools2[0] self.assertIsNotNone(actual_tool2) # now it exists new_id2 = actual_tool2.pop('id') self.assertEqual(new_id, new_id2) self.assertEqual(actual_tool, actual_tool2) - @istest - def indexer_configuration_add_multiple(self): + def test_indexer_configuration_add_multiple(self): tool = { 'tool_name': 'some-unknown-tool', 'tool_version': 'some-version', 'tool_configuration': {"debian-package": "some-package"}, } actual_tools = list(self.storage.indexer_configuration_add([tool])) self.assertEqual(len(actual_tools), 1) new_tools = [tool, { 'tool_name': 'yet-another-tool', 'tool_version': 'version', 'tool_configuration': {}, }] actual_tools = list(self.storage.indexer_configuration_add(new_tools)) self.assertEqual(len(actual_tools), 2) # order not guaranteed, so we iterate over results to check for tool in actual_tools: _id = tool.pop('id') self.assertIsNotNone(_id) self.assertIn(tool, new_tools) - @istest - def indexer_configuration_get_missing(self): + def test_indexer_configuration_get_missing(self): tool = { 'tool_name': 'unknown-tool', 'tool_version': '3.1.0rc2-31-ga2cbb8c', 'tool_configuration': {"command_line": "nomossa "}, } actual_tool = self.storage.indexer_configuration_get(tool) self.assertIsNone(actual_tool) - @istest - def indexer_configuration_get(self): + def test_indexer_configuration_get(self): tool = { 'tool_name': 'nomos', 'tool_version': '3.1.0rc2-31-ga2cbb8c', 'tool_configuration': {"command_line": "nomossa "}, } actual_tool = self.storage.indexer_configuration_get(tool) expected_tool = tool.copy() expected_tool['id'] = 1 self.assertEqual(expected_tool, actual_tool) - @istest - def indexer_configuration_metadata_get_missing_context(self): + def test_indexer_configuration_metadata_get_missing_context(self): tool = { 'tool_name': 'swh-metadata-translator', 'tool_version': '0.0.1', 'tool_configuration': {"context": "unknown-context"}, } actual_tool = self.storage.indexer_configuration_get(tool) self.assertIsNone(actual_tool) - @istest - def indexer_configuration_metadata_get(self): + def test_indexer_configuration_metadata_get(self): tool = { 'tool_name': 'swh-metadata-translator', 'tool_version': '0.0.1', 'tool_configuration': {"type": "local", "context": "npm"}, } actual_tool = self.storage.indexer_configuration_get(tool) expected_tool = tool.copy() expected_tool['id'] = actual_tool['id'] self.assertEqual(expected_tool, actual_tool) class IndexerTestStorage(CommonTestStorage, unittest.TestCase): """Running the tests locally. For the client api tests (remote storage), see `class`:swh.indexer.storage.test_api_client:TestRemoteStorage class. """ pass diff --git a/swh/indexer/tests/test_language.py b/swh/indexer/tests/test_language.py index 3990f77..0c50636 100644 --- a/swh/indexer/tests/test_language.py +++ b/swh/indexer/tests/test_language.py @@ -1,113 +1,109 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import logging -from nose.tools import istest from swh.indexer import language from swh.indexer.language import ContentLanguageIndexer from swh.indexer.tests.test_utils import MockObjStorage class _MockIndexerStorage(): """Mock storage to simplify reading indexers' outputs. """ def content_language_add(self, languages, conflict_update=None): self.state = languages self.conflict_update = conflict_update def indexer_configuration_add(self, tools): return [{ 'id': 20, }] class TestLanguageIndexer(ContentLanguageIndexer): """Specific language whose configuration is enough to satisfy the indexing tests. """ def prepare(self): self.config = { 'destination_task': None, 'rescheduling_task': None, 'tools': { 'name': 'pygments', 'version': '2.0.1+dfsg-1.1+deb8u1', 'configuration': { 'type': 'library', 'debian-package': 'python3-pygments', 'max_content_size': 10240, }, } } self.idx_storage = _MockIndexerStorage() self.log = logging.getLogger('swh.indexer') self.objstorage = MockObjStorage() self.destination_task = None self.rescheduling_task = self.config['rescheduling_task'] self.tool_config = self.config['tools']['configuration'] self.max_content_size = self.tool_config['max_content_size'] self.tools = self.register_tools(self.config['tools']) self.tool = self.tools[0] class Language(unittest.TestCase): """ Tests pygments tool for language detection """ def setUp(self): self.maxDiff = None - @istest def test_compute_language_none(self): # given self.content = "" self.declared_language = { 'lang': None } # when result = language.compute_language(self.content) # then self.assertEqual(self.declared_language, result) - @istest def test_index_content_language_python(self): # given # testing python sha1s = ['02fb2c89e14f7fab46701478c83779c7beb7b069'] lang_indexer = TestLanguageIndexer() # when lang_indexer.run(sha1s, policy_update='ignore-dups') results = lang_indexer.idx_storage.state expected_results = [{ 'id': '02fb2c89e14f7fab46701478c83779c7beb7b069', 'indexer_configuration_id': 20, 'lang': 'python' }] # then self.assertEqual(expected_results, results) - @istest def test_index_content_language_c(self): # given # testing c sha1s = ['103bc087db1d26afc3a0283f38663d081e9b01e6'] lang_indexer = TestLanguageIndexer() # when lang_indexer.run(sha1s, policy_update='ignore-dups') results = lang_indexer.idx_storage.state expected_results = [{ 'id': '103bc087db1d26afc3a0283f38663d081e9b01e6', 'indexer_configuration_id': 20, 'lang': 'c' }] # then self.assertEqual('c', results[0]['lang']) self.assertEqual(expected_results, results) diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py index d32e235..32b1803 100644 --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -1,366 +1,359 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import logging -from nose.tools import istest from swh.indexer.metadata_dictionary import compute_metadata from swh.indexer.metadata_detector import detect_metadata from swh.indexer.metadata_detector import extract_minimal_metadata_dict from swh.indexer.metadata import ContentMetadataIndexer from swh.indexer.metadata import RevisionMetadataIndexer from swh.indexer.tests.test_utils import MockObjStorage, MockStorage from swh.indexer.tests.test_utils import MockIndexerStorage class TestContentMetadataIndexer(ContentMetadataIndexer): """Specific Metadata whose configuration is enough to satisfy the indexing tests. """ def prepare(self): self.config.update({ 'rescheduling_task': None, }) self.idx_storage = MockIndexerStorage() self.log = logging.getLogger('swh.indexer') self.objstorage = MockObjStorage() self.destination_task = None self.rescheduling_task = self.config['rescheduling_task'] self.tools = self.register_tools(self.config['tools']) self.tool = self.tools[0] self.results = [] class TestRevisionMetadataIndexer(RevisionMetadataIndexer): """Specific indexer whose configuration is enough to satisfy the indexing tests. """ ContentMetadataIndexer = TestContentMetadataIndexer def prepare(self): self.config = { 'rescheduling_task': None, 'storage': { 'cls': 'remote', 'args': { 'url': 'http://localhost:9999', } }, 'tools': { 'name': 'swh-metadata-detector', 'version': '0.0.1', 'configuration': { 'type': 'local', 'context': 'npm' } } } self.storage = MockStorage() self.idx_storage = MockIndexerStorage() self.log = logging.getLogger('swh.indexer') self.objstorage = MockObjStorage() self.destination_task = None self.rescheduling_task = self.config['rescheduling_task'] self.tools = self.register_tools(self.config['tools']) self.tool = self.tools[0] self.results = [] class Metadata(unittest.TestCase): """ Tests metadata_mock_tool tool for Metadata detection """ def setUp(self): """ shows the entire diff in the results """ self.maxDiff = None self.content_tool = { 'name': 'swh-metadata-translator', 'version': '0.0.1', 'configuration': { 'type': 'local', 'context': 'npm' } } - @istest def test_compute_metadata_none(self): """ testing content empty content is empty should return None """ # given content = b"" context = "npm" # None if no metadata was found or an error occurred declared_metadata = None # when result = compute_metadata(context, content) # then self.assertEqual(declared_metadata, result) - @istest def test_compute_metadata_npm(self): """ testing only computation of metadata with hard_mapping_npm """ # given content = b""" { "name": "test_metadata", "version": "0.0.1", "description": "Simple package.json test for indexer", "repository": { "type": "git", "url": "https://github.com/moranegg/metadata_test" } } """ declared_metadata = { 'name': 'test_metadata', 'version': '0.0.1', 'description': 'Simple package.json test for indexer', 'codeRepository': { 'type': 'git', 'url': 'https://github.com/moranegg/metadata_test' }, 'other': {} } # when result = compute_metadata("npm", content) # then self.assertEqual(declared_metadata, result) - @istest def test_extract_minimal_metadata_dict(self): """ Test the creation of a coherent minimal metadata set """ # given metadata_list = [{ 'name': 'test_1', 'version': '0.0.1', 'description': 'Simple package.json test for indexer', 'codeRepository': { 'type': 'git', 'url': 'https://github.com/moranegg/metadata_test' }, 'other': {} }, { 'name': 'test_0_1', 'version': '0.0.1', 'description': 'Simple package.json test for indexer', 'codeRepository': { 'type': 'git', 'url': 'https://github.com/moranegg/metadata_test' }, 'other': {} }, { 'name': 'test_metadata', 'version': '0.0.1', 'author': 'moranegg', 'other': {} }] # when results = extract_minimal_metadata_dict(metadata_list) # then expected_results = { "developmentStatus": None, "version": ['0.0.1'], "operatingSystem": None, "description": ['Simple package.json test for indexer'], "keywords": None, "issueTracker": None, "name": ['test_1', 'test_0_1', 'test_metadata'], "author": ['moranegg'], "relatedLink": None, "url": None, "license": None, "maintainer": None, "email": None, "softwareRequirements": None, "identifier": None, "codeRepository": [{ 'type': 'git', 'url': 'https://github.com/moranegg/metadata_test' }] } self.assertEqual(expected_results, results) - @istest def test_index_content_metadata_npm(self): """ testing NPM with package.json - one sha1 uses a file that can't be translated to metadata and should return None in the translated metadata """ # given sha1s = ['26a9f72a7c87cc9205725cfd879f514ff4f3d8d5', 'd4c647f0fc257591cc9ba1722484229780d1c607', '02fb2c89e14f7fab46701478c83779c7beb7b069'] # this metadata indexer computes only metadata for package.json # in npm context with a hard mapping metadata_indexer = TestContentMetadataIndexer( tool=self.content_tool, config={}) # when metadata_indexer.run(sha1s, policy_update='ignore-dups') results = metadata_indexer.idx_storage.state expected_results = [{ 'indexer_configuration_id': 30, 'translated_metadata': { 'other': {}, 'codeRepository': { 'type': 'git', 'url': 'https://github.com/moranegg/metadata_test' }, 'description': 'Simple package.json test for indexer', 'name': 'test_metadata', 'version': '0.0.1' }, 'id': '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5' }, { 'indexer_configuration_id': 30, 'translated_metadata': { 'softwareRequirements': { 'JSONStream': '~1.3.1', 'abbrev': '~1.1.0', 'ansi-regex': '~2.1.1', 'ansicolors': '~0.3.2', 'ansistyles': '~0.1.3' }, 'issueTracker': { 'url': 'https://github.com/npm/npm/issues' }, 'author': 'Isaac Z. Schlueter (http://blog.izs.me)', 'codeRepository': { 'type': 'git', 'url': 'https://github.com/npm/npm' }, 'description': 'a package manager for JavaScript', 'softwareSuggestions': { 'tacks': '~1.2.6', 'tap': '~10.3.2' }, 'license': 'Artistic-2.0', 'version': '5.0.3', 'other': { 'preferGlobal': True, 'config': { 'publishtest': False } }, 'name': 'npm', 'keywords': [ 'install', 'modules', 'package manager', 'package.json' ], 'url': 'https://docs.npmjs.com/' }, 'id': 'd4c647f0fc257591cc9ba1722484229780d1c607' }, { 'indexer_configuration_id': 30, 'translated_metadata': None, 'id': '02fb2c89e14f7fab46701478c83779c7beb7b069' }] # The assertion below returns False sometimes because of nested lists self.assertEqual(expected_results, results) - @istest def test_detect_metadata_package_json(self): # given df = [{ 'sha1_git': b'abc', 'name': b'index.js', 'target': b'abc', 'length': 897, 'status': 'visible', 'type': 'file', 'perms': 33188, 'dir_id': b'dir_a', 'sha1': b'bcd' }, { 'sha1_git': b'aab', 'name': b'package.json', 'target': b'aab', 'length': 712, 'status': 'visible', 'type': 'file', 'perms': 33188, 'dir_id': b'dir_a', 'sha1': b'cde' }] # when results = detect_metadata(df) expected_results = { 'npm': [ b'cde' ] } # then self.assertEqual(expected_results, results) - @istest def test_revision_metadata_indexer(self): metadata_indexer = TestRevisionMetadataIndexer() sha1_gits = [ b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f', ] metadata_indexer.run(sha1_gits, 'update-dups') results = metadata_indexer.idx_storage.state expected_results = [{ 'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f', 'translated_metadata': { 'identifier': None, 'maintainer': None, 'url': [ 'https://github.com/librariesio/yarn-parser#readme' ], 'codeRepository': [{ 'type': 'git', 'url': 'git+https://github.com/librariesio/yarn-parser.git' }], 'author': ['Andrew Nesbitt'], 'license': ['AGPL-3.0'], 'version': ['1.0.0'], 'description': [ 'Tiny web service for parsing yarn.lock files' ], 'relatedLink': None, 'developmentStatus': None, 'operatingSystem': None, 'issueTracker': [{ 'url': 'https://github.com/librariesio/yarn-parser/issues' }], 'softwareRequirements': [{ 'express': '^4.14.0', 'yarn': '^0.21.0', 'body-parser': '^1.15.2' }], 'name': ['yarn-parser'], 'keywords': [['yarn', 'parse', 'lock', 'dependencies']], 'email': None }, 'indexer_configuration_id': 7 }] # then self.assertEqual(expected_results, results) diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py index 899780e..feac898 100644 --- a/swh/indexer/tests/test_mimetype.py +++ b/swh/indexer/tests/test_mimetype.py @@ -1,158 +1,153 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import logging -from nose.tools import istest from swh.indexer.mimetype import ContentMimetypeIndexer from swh.indexer.tests.test_utils import MockObjStorage class _MockIndexerStorage(): """Mock storage to simplify reading indexers' outputs. """ def content_mimetype_add(self, mimetypes, conflict_update=None): self.state = mimetypes self.conflict_update = conflict_update def indexer_configuration_add(self, tools): return [{ 'id': 10, }] class TestMimetypeIndexer(ContentMimetypeIndexer): """Specific mimetype whose configuration is enough to satisfy the indexing tests. """ def prepare(self): self.config = { 'destination_task': None, 'rescheduling_task': None, 'tools': { 'name': 'file', 'version': '1:5.30-1+deb9u1', 'configuration': { "type": "library", "debian-package": "python3-magic" }, }, } self.idx_storage = _MockIndexerStorage() self.log = logging.getLogger('swh.indexer') self.objstorage = MockObjStorage() self.destination_task = None self.rescheduling_task = self.config['rescheduling_task'] self.destination_task = self.config['destination_task'] self.tools = self.register_tools(self.config['tools']) self.tool = self.tools[0] class TestMimetypeIndexerUnknownToolStorage(TestMimetypeIndexer): """Specific mimetype whose configuration is not enough to satisfy the indexing tests. """ def prepare(self): super().prepare() self.tools = None class TestMimetypeIndexerWithErrors(unittest.TestCase): - @istest - def wrong_unknown_configuration_tool(self): + def test_wrong_unknown_configuration_tool(self): """Indexer with unknown configuration tool should fail the check""" with self.assertRaisesRegex(ValueError, 'Tools None is unknown'): TestMimetypeIndexerUnknownToolStorage() class TestMimetypeIndexerTest(unittest.TestCase): def setUp(self): self.indexer = TestMimetypeIndexer() - @istest def test_index_no_update(self): # given sha1s = [ '01c9379dfc33803963d07c1ccc748d3fe4c96bb5', '688a5ef812c53907562fe379d4b3851e69c7cb15', ] # when self.indexer.run(sha1s, policy_update='ignore-dups') # then expected_results = [{ 'id': '01c9379dfc33803963d07c1ccc748d3fe4c96bb5', 'indexer_configuration_id': 10, 'mimetype': b'text/plain', 'encoding': b'us-ascii', }, { 'id': '688a5ef812c53907562fe379d4b3851e69c7cb15', 'indexer_configuration_id': 10, 'mimetype': b'text/plain', 'encoding': b'us-ascii', }] self.assertFalse(self.indexer.idx_storage.conflict_update) self.assertEquals(expected_results, self.indexer.idx_storage.state) - @istest def test_index_update(self): # given sha1s = [ '01c9379dfc33803963d07c1ccc748d3fe4c96bb5', '688a5ef812c53907562fe379d4b3851e69c7cb15', 'da39a3ee5e6b4b0d3255bfef95601890afd80709', # empty content ] # when self.indexer.run(sha1s, policy_update='update-dups') # then expected_results = [{ 'id': '01c9379dfc33803963d07c1ccc748d3fe4c96bb5', 'indexer_configuration_id': 10, 'mimetype': b'text/plain', 'encoding': b'us-ascii', }, { 'id': '688a5ef812c53907562fe379d4b3851e69c7cb15', 'indexer_configuration_id': 10, 'mimetype': b'text/plain', 'encoding': b'us-ascii', }, { 'id': 'da39a3ee5e6b4b0d3255bfef95601890afd80709', 'indexer_configuration_id': 10, 'mimetype': b'application/x-empty', 'encoding': b'binary', }] self.assertTrue(self.indexer.idx_storage.conflict_update) self.assertEquals(expected_results, self.indexer.idx_storage.state) - @istest def test_index_one_unknown_sha1(self): # given sha1s = ['688a5ef812c53907562fe379d4b3851e69c7cb15', '799a5ef812c53907562fe379d4b3851e69c7cb15', # unknown '800a5ef812c53907562fe379d4b3851e69c7cb15'] # unknown # when self.indexer.run(sha1s, policy_update='update-dups') # then expected_results = [{ 'id': '688a5ef812c53907562fe379d4b3851e69c7cb15', 'indexer_configuration_id': 10, 'mimetype': b'text/plain', 'encoding': b'us-ascii', }] self.assertTrue(self.indexer.idx_storage.conflict_update) self.assertEquals(expected_results, self.indexer.idx_storage.state) diff --git a/swh/indexer/tests/test_orchestrator.py b/swh/indexer/tests/test_orchestrator.py index 68fb3c1..4810dac 100644 --- a/swh/indexer/tests/test_orchestrator.py +++ b/swh/indexer/tests/test_orchestrator.py @@ -1,163 +1,160 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest -from nose.tools import istest from swh.indexer.orchestrator import BaseOrchestratorIndexer from swh.indexer.indexer import RevisionIndexer from swh.indexer.tests.test_utils import MockIndexerStorage from swh.scheduler.task import Task class BaseTestIndexer(RevisionIndexer): ADDITIONAL_CONFIG = { 'tools': ('dict', { 'name': 'foo', 'version': 'bar', 'configuration': {} }), } def prepare(self): self.idx_storage = MockIndexerStorage() def check(self): pass def filter(self, ids): self.filtered = ids return ids def index(self, ids): self.indexed = ids return [id_ + '_indexed_by_' + self.__class__.__name__ for id_ in ids] def persist_index_computations(self, result, policy_update): self.persisted = result class Indexer1(BaseTestIndexer): def filter(self, ids): return super().filter([id_ for id_ in ids if '1' in id_]) class Indexer2(BaseTestIndexer): def filter(self, ids): return super().filter([id_ for id_ in ids if '2' in id_]) class Indexer3(BaseTestIndexer): def filter(self, ids): return super().filter([id_ for id_ in ids if '3' in id_]) class Indexer1Task(Task): pass class Indexer2Task(Task): pass class Indexer3Task(Task): pass class TestOrchestrator12(BaseOrchestratorIndexer): TASK_NAMES = { 'indexer1': 'swh.indexer.tests.test_orchestrator.Indexer1Task', 'indexer2': 'swh.indexer.tests.test_orchestrator.Indexer2Task', 'indexer3': 'swh.indexer.tests.test_orchestrator.Indexer3Task', } INDEXER_CLASSES = { 'indexer1': 'swh.indexer.tests.test_orchestrator.Indexer1', 'indexer2': 'swh.indexer.tests.test_orchestrator.Indexer2', 'indexer3': 'swh.indexer.tests.test_orchestrator.Indexer3', } def __init__(self): super().__init__() self.running_tasks = [] def prepare(self): self.config = { 'indexers': { 'indexer1': { 'batch_size': 2, 'check_presence': True, }, 'indexer2': { 'batch_size': 2, 'check_presence': True, }, } } self.prepare_tasks() def _run_tasks(self, celery_tasks): self.running_tasks.extend(celery_tasks) class OrchestratorTest(unittest.TestCase): maxDiff = None - @istest - def orchestrator_filter(self): + def test_orchestrator_filter(self): o = TestOrchestrator12() o.prepare() o.run(['id12', 'id2']) self.assertCountEqual(o.running_tasks, [ {'args': (), 'chord_size': None, 'immutable': False, 'kwargs': {'ids': ['id12'], 'policy_update': 'ignore-dups'}, 'options': {}, 'subtask_type': None, 'task': 'swh.indexer.tests.test_orchestrator.Indexer1Task'}, {'args': (), 'chord_size': None, 'immutable': False, 'kwargs': {'ids': ['id12', 'id2'], 'policy_update': 'ignore-dups'}, 'options': {}, 'subtask_type': None, 'task': 'swh.indexer.tests.test_orchestrator.Indexer2Task'}, ]) - @istest - def orchestrator_batch(self): + def test_orchestrator_batch(self): o = TestOrchestrator12() o.prepare() o.run(['id12', 'id2a', 'id2b', 'id2c']) self.assertCountEqual(o.running_tasks, [ {'args': (), 'chord_size': None, 'immutable': False, 'kwargs': {'ids': ['id12'], 'policy_update': 'ignore-dups'}, 'options': {}, 'subtask_type': None, 'task': 'swh.indexer.tests.test_orchestrator.Indexer1Task'}, {'args': (), 'chord_size': None, 'immutable': False, 'kwargs': {'ids': ['id12', 'id2a'], 'policy_update': 'ignore-dups'}, 'options': {}, 'subtask_type': None, 'task': 'swh.indexer.tests.test_orchestrator.Indexer2Task'}, {'args': (), 'chord_size': None, 'immutable': False, 'kwargs': {'ids': ['id2b', 'id2c'], 'policy_update': 'ignore-dups'}, 'options': {}, 'subtask_type': None, 'task': 'swh.indexer.tests.test_orchestrator.Indexer2Task'}, ]) diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py index 41c9068..a1090d2 100644 --- a/swh/indexer/tests/test_utils.py +++ b/swh/indexer/tests/test_utils.py @@ -1,261 +1,260 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.objstorage.exc import ObjNotFoundError class MockObjStorage: """Mock an swh-objstorage objstorage with predefined contents. """ data = {} def __init__(self): self.data = { '01c9379dfc33803963d07c1ccc748d3fe4c96bb5': b'this is some text', '688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text', '8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text', '02fb2c89e14f7fab46701478c83779c7beb7b069': b""" import unittest import logging - from nose.tools import istest from swh.indexer.mimetype import ContentMimetypeIndexer from swh.indexer.tests.test_utils import MockObjStorage class MockStorage(): def content_mimetype_add(self, mimetypes): self.state = mimetypes self.conflict_update = conflict_update def indexer_configuration_add(self, tools): return [{ 'id': 10, }] """, '103bc087db1d26afc3a0283f38663d081e9b01e6': b""" #ifndef __AVL__ #define __AVL__ typedef struct _avl_tree avl_tree; typedef struct _data_t { int content; } data_t; """, '93666f74f1cf635c8c8ac118879da6ec5623c410': b""" (should 'pygments (recognize 'lisp 'easily)) """, '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5': b""" { "name": "test_metadata", "version": "0.0.1", "description": "Simple package.json test for indexer", "repository": { "type": "git", "url": "https://github.com/moranegg/metadata_test" } } """, 'd4c647f0fc257591cc9ba1722484229780d1c607': b""" { "version": "5.0.3", "name": "npm", "description": "a package manager for JavaScript", "keywords": [ "install", "modules", "package manager", "package.json" ], "preferGlobal": true, "config": { "publishtest": false }, "homepage": "https://docs.npmjs.com/", "author": "Isaac Z. Schlueter (http://blog.izs.me)", "repository": { "type": "git", "url": "https://github.com/npm/npm" }, "bugs": { "url": "https://github.com/npm/npm/issues" }, "dependencies": { "JSONStream": "~1.3.1", "abbrev": "~1.1.0", "ansi-regex": "~2.1.1", "ansicolors": "~0.3.2", "ansistyles": "~0.1.3" }, "devDependencies": { "tacks": "~1.2.6", "tap": "~10.3.2" }, "license": "Artistic-2.0" } """, 'a7ab314d8a11d2c93e3dcf528ca294e7b431c449': b""" """, 'da39a3ee5e6b4b0d3255bfef95601890afd80709': b'', } def __iter__(self): yield from self.data.keys() def __contains__(self, sha1): return self.data.get(sha1) is not None def get(self, sha1): raw_content = self.data.get(sha1) if raw_content is None: raise ObjNotFoundError(sha1) return raw_content class MockIndexerStorage(): """Mock an swh-indexer storage. """ def indexer_configuration_add(self, tools): tool = tools[0] if tool['tool_name'] == 'swh-metadata-translator': return [{ 'id': 30, 'tool_name': 'swh-metadata-translator', 'tool_version': '0.0.1', 'tool_configuration': { 'type': 'local', 'context': 'npm' }, }] elif tool['tool_name'] == 'swh-metadata-detector': return [{ 'id': 7, 'tool_name': 'swh-metadata-detector', 'tool_version': '0.0.1', 'tool_configuration': { 'type': 'local', 'context': 'npm' }, }] def content_metadata_missing(self, sha1s): yield from [] def content_metadata_add(self, metadata, conflict_update=None): self.state = metadata self.conflict_update = conflict_update def revision_metadata_add(self, metadata, conflict_update=None): self.state = metadata self.conflict_update = conflict_update def content_metadata_get(self, sha1s): return [{ 'tool': { 'configuration': { 'type': 'local', 'context': 'npm' }, 'version': '0.0.1', 'id': 6, 'name': 'swh-metadata-translator' }, 'id': b'cde', 'translated_metadata': { 'issueTracker': { 'url': 'https://github.com/librariesio/yarn-parser/issues' }, 'version': '1.0.0', 'name': 'yarn-parser', 'author': 'Andrew Nesbitt', 'url': 'https://github.com/librariesio/yarn-parser#readme', 'processorRequirements': {'node': '7.5'}, 'other': { 'scripts': { 'start': 'node index.js' }, 'main': 'index.js' }, 'license': 'AGPL-3.0', 'keywords': ['yarn', 'parse', 'lock', 'dependencies'], 'codeRepository': { 'type': 'git', 'url': 'git+https://github.com/librariesio/yarn-parser.git' }, 'description': 'Tiny web service for parsing yarn.lock files', 'softwareRequirements': { 'yarn': '^0.21.0', 'express': '^4.14.0', 'body-parser': '^1.15.2'} } }] class MockStorage(): """Mock a real swh-storage storage to simplify reading indexers' outputs. """ def revision_get(self, revisions): return [{ 'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f', 'committer': { 'id': 26, 'name': b'Andrew Nesbitt', 'fullname': b'Andrew Nesbitt ', 'email': b'andrewnez@gmail.com' }, 'synthetic': False, 'date': { 'negative_utc': False, 'timestamp': { 'seconds': 1487596456, 'microseconds': 0 }, 'offset': 0 }, 'directory': b'10' }] def directory_ls(self, directory, recursive=False, cur=None): # with directory: b'\x9d', return [{ 'sha1_git': b'abc', 'name': b'index.js', 'target': b'abc', 'length': 897, 'status': 'visible', 'type': 'file', 'perms': 33188, 'dir_id': b'10', 'sha1': b'bcd' }, { 'sha1_git': b'aab', 'name': b'package.json', 'target': b'aab', 'length': 712, 'status': 'visible', 'type': 'file', 'perms': 33188, 'dir_id': b'10', 'sha1': b'cde' }, { 'dir_id': b'10', 'target': b'11', 'type': 'dir', 'length': None, 'name': b'.github', 'sha1': None, 'perms': 16384, 'sha1_git': None, 'status': None, 'sha256': None }]