diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py --- a/swh/indexer/storage/__init__.py +++ b/swh/indexer/storage/__init__.py @@ -743,7 +743,7 @@ @db_transaction_generator() def origin_intrinsic_metadata_search_by_producer( self, start=0, end=None, limit=100, ids_only=False, - mappings=None, + mappings=None, tool_ids=None, db=None, cur=None): """Returns the list of origins whose metadata contain all the terms. @@ -768,7 +768,7 @@ """ res = db.origin_intrinsic_metadata_search_by_producer( - start, end, limit, ids_only, mappings, cur) + start, end, limit, ids_only, mappings, tool_ids, cur) if ids_only: for (origin_id,) in res: yield origin_id diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py --- a/swh/indexer/storage/db.py +++ b/swh/indexer/storage/db.py @@ -368,7 +368,7 @@ yield from cur def origin_intrinsic_metadata_search_by_producer( - self, start, end, limit, ids_only, mappings, cur): + self, start, end, limit, ids_only, mappings, tool_ids, cur): if ids_only: keys = 'oim.origin_id' else: @@ -392,6 +392,9 @@ if mappings is not None: where.append('oim.mappings && %s') args.append(mappings) + if tool_ids is not None: + where.append('oim.indexer_configuration_id = ANY(%s)') + args.append(tool_ids) if where: query_parts.append('WHERE') query_parts.append(' AND '.join(where)) diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py --- a/swh/indexer/storage/in_memory.py +++ b/swh/indexer/storage/in_memory.py @@ -682,7 +682,7 @@ def origin_intrinsic_metadata_search_by_producer( self, start=0, end=None, limit=100, ids_only=False, - mappings=None, + mappings=None, tool_ids=None, db=None, cur=None): """Returns the list of origins whose metadata contain all the terms. @@ -709,6 +709,8 @@ nb_results = 0 if mappings is not None: mappings = frozenset(mappings) + if tool_ids is not None: + tool_ids = frozenset(tool_ids) for entry in self._origin_intrinsic_metadata.get_all(): if entry['id'] < start or (end and entry['id'] > end): continue @@ -716,6 +718,8 @@ return if mappings is not None and mappings.isdisjoint(entry['mappings']): continue + if tool_ids is not None and entry['tool']['id'] not in tool_ids: + continue if ids_only: yield entry['id'] else: diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py --- a/swh/indexer/tests/storage/test_storage.py +++ b/swh/indexer/tests/storage/test_storage.py @@ -39,6 +39,12 @@ "type": "local", "context": ["NpmMapping", "CodemetaMapping"]}, }, { + 'tool_name': 'swh-metadata-detector2', + 'tool_version': '0.0.1', + 'tool_configuration': { + "type": "local", "context": ["NpmMapping", "CodemetaMapping"]}, + }, + { 'tool_name': 'file', 'tool_version': '5.22', 'tool_configuration': {"command_line": "file --mime "}, @@ -1174,7 +1180,8 @@ [self.origin_id_1]) def _fill_origin_intrinsic_metadata(self): - tool_id = self.tools['swh-metadata-detector']['id'] + tool1_id = self.tools['swh-metadata-detector']['id'] + tool2_id = self.tools['swh-metadata-detector2']['id'] metadata1 = { '@context': 'foo', @@ -1184,13 +1191,13 @@ 'id': self.revision_id_1, 'translated_metadata': metadata1, 'mappings': ['npm'], - 'indexer_configuration_id': tool_id, + 'indexer_configuration_id': tool1_id, } metadata1_origin = { 'origin_id': self.origin_id_1, 'metadata': metadata1, 'mappings': ['npm'], - 'indexer_configuration_id': tool_id, + 'indexer_configuration_id': tool1_id, 'from_revision': self.revision_id_1, } metadata2 = { @@ -1201,13 +1208,13 @@ 'id': self.revision_id_2, 'translated_metadata': metadata2, 'mappings': ['npm', 'gemspec'], - 'indexer_configuration_id': tool_id, + 'indexer_configuration_id': tool2_id, } metadata2_origin = { 'origin_id': self.origin_id_2, 'metadata': metadata2, 'mappings': ['npm', 'gemspec'], - 'indexer_configuration_id': tool_id, + 'indexer_configuration_id': tool2_id, 'from_revision': self.revision_id_2, } metadata3 = { @@ -1217,13 +1224,13 @@ 'id': self.revision_id_3, 'translated_metadata': metadata3, 'mappings': ['npm', 'gemspec'], - 'indexer_configuration_id': tool_id, + 'indexer_configuration_id': tool2_id, } metadata3_origin = { 'origin_id': self.origin_id_3, 'metadata': metadata3, 'mappings': ['pkg-info'], - 'indexer_configuration_id': tool_id, + 'indexer_configuration_id': tool2_id, 'from_revision': self.revision_id_3, } @@ -1236,7 +1243,8 @@ def test_origin_intrinsic_metadata_search_by_producer(self): self._fill_origin_intrinsic_metadata() - tool = self.tools['swh-metadata-detector'] + tool1 = self.tools['swh-metadata-detector'] + tool2 = self.tools['swh-metadata-detector2'] endpoint = self.storage.origin_intrinsic_metadata_search_by_producer # test pagination @@ -1279,6 +1287,17 @@ endpoint(mappings=['npm'], limit=1, ids_only=True), [self.origin_id_1]) + # test tool filtering + self.assertCountEqual( + endpoint(tool_ids=[tool1['id']], ids_only=True), + [self.origin_id_1]) + self.assertCountEqual( + endpoint(tool_ids=[tool2['id']], ids_only=True), + [self.origin_id_2, self.origin_id_3]) + self.assertCountEqual( + endpoint(tool_ids=[tool1['id'], tool2['id']], ids_only=True), + [self.origin_id_1, self.origin_id_2, self.origin_id_3]) + # test ids_only=False self.assertEqual(list(endpoint(mappings=['gemspec'])), [{ 'origin_id': self.origin_id_2, @@ -1287,7 +1306,7 @@ 'author': 'Jane Doe', }, 'mappings': ['npm', 'gemspec'], - 'tool': tool, + 'tool': tool2, 'from_revision': self.revision_id_2, }])