diff --git a/swh/indexer/tests/test_ctags.py b/swh/indexer/tests/test_ctags.py --- a/swh/indexer/tests/test_ctags.py +++ b/swh/indexer/tests/test_ctags.py @@ -13,7 +13,8 @@ from swh.indexer.tests.test_utils import ( CommonContentIndexerTest, CommonIndexerWithErrorsTest, CommonIndexerNoTool, - SHA1_TO_CTAGS, NoDiskIndexer, BASE_TEST_CONFIG + SHA1_TO_CTAGS, NoDiskIndexer, BASE_TEST_CONFIG, + fill_storage, fill_obj_storage ) @@ -108,33 +109,39 @@ """ + legacy_get_format = True + def get_indexer_results(self, ids): yield from self.idx_storage.content_ctags_get(ids) def setUp(self): self.indexer = CtagsIndexerTest() self.idx_storage = self.indexer.idx_storage + fill_storage(self.indexer.storage) + fill_obj_storage(self.indexer.objstorage) # Prepare test input self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5' self.id1 = 'd4c647f0fc257591cc9ba1722484229780d1c607' self.id2 = '688a5ef812c53907562fe379d4b3851e69c7cb15' - tool_id = self.indexer.tool['id'] + tool = {k.replace('tool_', ''): v + for (k, v) in self.indexer.tool.items()} + self.expected_results = { self.id0: { 'id': self.id0, - 'indexer_configuration_id': tool_id, + 'tool': tool, 'ctags': SHA1_TO_CTAGS[self.id0], }, self.id1: { 'id': self.id1, - 'indexer_configuration_id': tool_id, + 'tool': tool, 'ctags': SHA1_TO_CTAGS[self.id1], }, self.id2: { 'id': self.id2, - 'indexer_configuration_id': tool_id, + 'tool': tool, 'ctags': SHA1_TO_CTAGS[self.id2], } } diff --git a/swh/indexer/tests/test_fossology_license.py b/swh/indexer/tests/test_fossology_license.py --- a/swh/indexer/tests/test_fossology_license.py +++ b/swh/indexer/tests/test_fossology_license.py @@ -86,32 +86,33 @@ """ def get_indexer_results(self, ids): - yield from self.idx_storage.content_ctags_get(ids) + yield from self.idx_storage.content_fossology_license_get(ids) def setUp(self): super().setUp() self.indexer = FossologyLicenseTestIndexer() self.idx_storage = self.indexer.idx_storage + fill_storage(self.indexer.storage) + fill_obj_storage(self.indexer.objstorage) self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5' self.id1 = '688a5ef812c53907562fe379d4b3851e69c7cb15' self.id2 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709' # empty content - tool_id = self.indexer.tool['id'] + + tool = {k.replace('tool_', ''): v + for (k, v) in self.indexer.tool.items()} # then self.expected_results = { self.id0: { - 'id': self.id0, - 'indexer_configuration_id': tool_id, + 'tool': tool, 'licenses': SHA1_TO_LICENSES[self.id0], }, self.id1: { - 'id': self.id1, - 'indexer_configuration_id': tool_id, + 'tool': tool, 'licenses': SHA1_TO_LICENSES[self.id1], }, self.id2: { - 'id': self.id2, - 'indexer_configuration_id': tool_id, + 'tool': tool, 'licenses': SHA1_TO_LICENSES[self.id2], } } diff --git a/swh/indexer/tests/test_language.py b/swh/indexer/tests/test_language.py --- a/swh/indexer/tests/test_language.py +++ b/swh/indexer/tests/test_language.py @@ -55,6 +55,8 @@ """ + legacy_get_format = True + def get_indexer_results(self, ids): yield from self.indexer.idx_storage.content_language_get(ids) @@ -66,22 +68,24 @@ self.id0 = '02fb2c89e14f7fab46701478c83779c7beb7b069' self.id1 = '103bc087db1d26afc3a0283f38663d081e9b01e6' self.id2 = 'd4c647f0fc257591cc9ba1722484229780d1c607' - tool_id = self.indexer.tool['id'] + + tool = {k.replace('tool_', ''): v + for (k, v) in self.indexer.tool.items()} self.expected_results = { self.id0: { 'id': self.id0, - 'indexer_configuration_id': tool_id, + 'tool': tool, 'lang': 'python', }, self.id1: { 'id': self.id1, - 'indexer_configuration_id': tool_id, + 'tool': tool, 'lang': 'c' }, self.id2: { 'id': self.id2, - 'indexer_configuration_id': tool_id, + 'tool': tool, 'lang': 'text-only' } } diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py --- a/swh/indexer/tests/test_mimetype.py +++ b/swh/indexer/tests/test_mimetype.py @@ -67,6 +67,7 @@ - Unknown sha1 in the input list are not indexed """ + legacy_get_format = True def get_indexer_results(self, ids): yield from self.idx_storage.content_mimetype_get(ids) @@ -74,27 +75,32 @@ def setUp(self): self.indexer = MimetypeTestIndexer() self.idx_storage = self.indexer.idx_storage + fill_storage(self.indexer.storage) + fill_obj_storage(self.indexer.objstorage) self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5' self.id1 = '688a5ef812c53907562fe379d4b3851e69c7cb15' self.id2 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709' - tool_id = self.indexer.tool['id'] + + tool = {k.replace('tool_', ''): v + for (k, v) in self.indexer.tool.items()} + self.expected_results = { self.id0: { 'id': self.id0, - 'indexer_configuration_id': tool_id, + 'tool': tool, 'mimetype': 'text/plain', 'encoding': 'us-ascii', }, self.id1: { 'id': self.id1, - 'indexer_configuration_id': tool_id, + 'tool': tool, 'mimetype': 'text/plain', 'encoding': 'us-ascii', }, self.id2: { 'id': self.id2, - 'indexer_configuration_id': tool_id, + 'tool': tool, 'mimetype': 'application/x-empty', 'encoding': 'binary', } diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py --- a/swh/indexer/tests/test_utils.py +++ b/swh/indexer/tests/test_utils.py @@ -3,6 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import abc import datetime import hashlib import random @@ -474,24 +475,53 @@ self.RangeIndexer() -class CommonContentIndexerTest: +class CommonContentIndexerTest(metaclass=abc.ABCMeta): + legacy_get_format = False + """True iff the tested indexer uses the legacy format. + see: https://forge.softwareheritage.org/T1433""" + def get_indexer_results(self, ids): """Override this for indexers that don't have a mock storage.""" return self.indexer.idx_storage.state - def assert_results_ok(self, sha1s, expected_results=None): + def assert_legacy_results_ok(self, sha1s, expected_results=None): + # XXX old format, remove this when all endpoints are + # updated to the new one + # see: https://forge.softwareheritage.org/T1433 sha1s = [sha1 if isinstance(sha1, bytes) else hash_to_bytes(sha1) for sha1 in sha1s] - actual_results = self.get_indexer_results(sha1s) + actual_results = list(self.get_indexer_results(sha1s)) if expected_results is None: expected_results = self.expected_results + self.assertEqual(len(expected_results), len(actual_results), + (expected_results, actual_results)) for indexed_data in actual_results: _id = indexed_data['id'] - self.assertEqual(indexed_data, expected_results[_id]) - _tool_id = indexed_data['indexer_configuration_id'] - self.assertEqual(_tool_id, self.indexer.tool['id']) + expected_data = expected_results[hashutil.hash_to_hex(_id)].copy() + expected_data['id'] = _id + self.assertEqual(indexed_data, expected_data) + + def assert_results_ok(self, sha1s, expected_results=None): + if self.legacy_get_format: + self.assert_legacy_results_ok(sha1s, expected_results) + return + + sha1s = [sha1 if isinstance(sha1, bytes) else hash_to_bytes(sha1) + for sha1 in sha1s] + actual_results = list(self.get_indexer_results(sha1s)) + + if expected_results is None: + expected_results = self.expected_results + + self.assertEqual(len(expected_results), len(actual_results), + (expected_results, actual_results)) + for indexed_data in actual_results: + (_id, indexed_data) = list(indexed_data.items())[0] + expected_data = expected_results[hashutil.hash_to_hex(_id)].copy() + expected_data = [expected_data] + self.assertEqual(indexed_data, expected_data) def test_index(self): """Known sha1 have their data indexed