diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py --- a/swh/indexer/storage/__init__.py +++ b/swh/indexer/storage/__init__.py @@ -660,6 +660,19 @@ cur) db.revision_metadata_add_from_temp(conflict_update, cur) + @remote_api_endpoint('revision_metadata/delete') + @db_transaction() + def revision_metadata_delete(self, entries, db=None, cur=None): + """Remove revision metadata from the storage. + + Args: + entries (dict): dictionaries with the following keys: + - **id** (bytes): revision identifier + - **indexer_configuration_id** (int): tool used to compute + metadata + """ + db.revision_metadata_delete(entries, cur) + @remote_api_endpoint('origin_intrinsic_metadata') @db_transaction_generator() def origin_intrinsic_metadata_get(self, ids, db=None, cur=None): @@ -714,6 +727,20 @@ cur) db.origin_intrinsic_metadata_add_from_temp(conflict_update, cur) + @remote_api_endpoint('origin_intrinsic_metadata/delete') + @db_transaction() + def origin_intrinsic_metadata_delete( + self, entries, db=None, cur=None): + """Remove origin metadata from the storage. + + Args: + entries (dict): dictionaries with the following keys: + - **id** (int): origin identifier + - **indexer_configuration_id** (int): tool used to compute + metadata + """ + db.origin_intrinsic_metadata_delete(entries, cur) + @remote_api_endpoint('origin_intrinsic_metadata/search/fulltext') @db_transaction_generator() def origin_intrinsic_metadata_search_fulltext( diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py --- a/swh/indexer/storage/db.py +++ b/swh/indexer/storage/db.py @@ -317,6 +317,16 @@ self._cursor(cur).execute("SELECT swh_revision_metadata_add(%s)", (conflict_update, )) + def revision_metadata_delete( + self, entries, cur=None): + cur = self._cursor(cur) + cur.execute( + "DELETE from revision_metadata " + "WHERE (id, indexer_configuration_id) IN " + " (VALUES %s)" % (', '.join('%s' for _ in entries)), + tuple((e['id'], e['indexer_configuration_id']) + for e in entries),) + def revision_metadata_get_from_list(self, ids, cur=None): yield from self._get_from_list( 'revision_metadata', ids, self.revision_metadata_cols, cur=cur) @@ -342,6 +352,16 @@ "SELECT swh_origin_intrinsic_metadata_add(%s)", (conflict_update, )) + def origin_intrinsic_metadata_delete( + self, entries, cur=None): + cur = self._cursor(cur) + cur.execute( + "DELETE from origin_intrinsic_metadata " + "WHERE (origin_id, indexer_configuration_id) IN" + " (VALUES %s)" % (', '.join('%s' for _ in entries)), + tuple((e['origin_id'], e['indexer_configuration_id']) + for e in entries),) + def origin_intrinsic_metadata_get_from_list(self, orig_ids, cur=None): yield from self._get_from_list( 'origin_intrinsic_metadata', orig_ids, diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py --- a/swh/indexer/storage/in_memory.py +++ b/swh/indexer/storage/in_memory.py @@ -176,6 +176,15 @@ if id_ not in self._sorted_ids: bisect.insort(self._sorted_ids, id_) + def delete(self, entries): + for entry in entries: + (id_, tool_id) = (entry['id'], entry['indexer_configuration_id']) + key = (id_, tool_id) + if tool_id in self._tools_per_id[id_]: + self._tools_per_id[id_].remove(tool_id) + if key in self._data: + del self._data[key] + class IndexerStorage: """In-memory SWH indexer storage.""" @@ -582,6 +591,16 @@ raise TypeError('identifiers must be bytes.') self._revision_metadata.add(metadata, conflict_update) + def revision_metadata_delete(self, entries): + """Remove revision metadata from the storage. + + Args: + entries (dict): dictionaries with the following keys: + - **revision** (int): origin identifier + - **id** (int): tool used to compute metadata + """ + self._revision_metadata.delete(entries) + def origin_intrinsic_metadata_get(self, ids): """Retrieve origin metadata per id. @@ -629,6 +648,22 @@ items.append(item) self._origin_intrinsic_metadata.add(items, conflict_update) + def origin_intrinsic_metadata_delete(self, entries): + """Remove origin metadata from the storage. + + Args: + entries (dict): dictionaries with the following keys: + - **origin_id** (int): origin identifier + - **indexer_configuration_id** (int): tool used to compute + metadata + """ + items = [] + for entry in entries: + item = entry.copy() + item['id'] = item.pop('origin_id') + items.append(item) + self._origin_intrinsic_metadata.delete(items) + def origin_intrinsic_metadata_search_fulltext( self, conjunction, limit=100): """Returns the list of origins whose metadata contain all the terms. diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py --- a/swh/indexer/tests/storage/test_storage.py +++ b/swh/indexer/tests/storage/test_storage.py @@ -293,12 +293,52 @@ self.assertEqual(actual_data, expected_data) + @rename + def delete(self): + # given + tool_id = self.tools[tool_name]['id'] + + query = [self.sha1_2, self.sha1_1] + + data1 = { + 'id': self.sha1_2, + **example_data1, + 'indexer_configuration_id': tool_id, + } + + # when + endpoint(self, 'add')([data1]) + endpoint(self, 'delete')([ + { + 'id': self.sha1_2, + 'indexer_configuration_id': tool_id, + } + ]) + + # then + actual_data = list(endpoint(self, 'get')(query)) + + # then + self.assertEqual(actual_data, []) + + @rename + def delete_nonexisting(self): + tool_id = self.tools[tool_name]['id'] + endpoint(self, 'delete')([ + { + 'id': self.sha1_2, + 'indexer_configuration_id': tool_id, + } + ]) + return ( missing, add__drop_duplicate, add__update_in_place_duplicate, add__duplicate_twice, get, + delete, + delete_nonexisting, ) @@ -345,6 +385,8 @@ test_content_mimetype_add__update_in_place_duplicate, test_content_mimetype_add__duplicate_twice, test_content_mimetype_get, + _, # content_mimetype_detete, + _, # content_mimetype_detete_nonexisting, ) = gen_generic_endpoint_tests( endpoint_type='content_mimetype', tool_name='file', @@ -365,6 +407,8 @@ test_content_language_add__update_in_place_duplicate, test_content_language_add__duplicate_twice, test_content_language_get, + _, # test_content_language_delete, + _, # test_content_language_delete_nonexisting, ) = gen_generic_endpoint_tests( endpoint_type='content_language', tool_name='pygments', @@ -384,6 +428,8 @@ _, # test_content_ctags_add__update_in_place_duplicate, _, # test_content_ctags_add__duplicate_twice, _, # test_content_ctags_get, + _, # test_content_ctags_delete, + _, # test_content_ctags_delete_nonexisting, ) = gen_generic_endpoint_tests( endpoint_type='content_ctags', tool_name='universal-ctags', @@ -724,6 +770,8 @@ _, # test_content_fossology_license_add__update_in_place_duplicate, _, # test_content_metadata_add__duplicate_twice, _, # test_content_fossology_license_get, + _, # test_content_fossology_license_delete, + _, # test_content_fossology_license_delete_nonexisting, ) = gen_generic_endpoint_tests( endpoint_type='content_fossology_license', tool_name='nomos', @@ -792,6 +840,8 @@ test_content_metadata_add__update_in_place_duplicate, test_content_metadata_add__duplicate_twice, test_content_metadata_get, + _, # test_content_metadata_delete, + _, # test_content_metadata_delete_nonexisting, ) = gen_generic_endpoint_tests( endpoint_type='content_metadata', tool_name='swh-metadata-detector', @@ -823,6 +873,8 @@ test_revision_metadata_add__update_in_place_duplicate, test_revision_metadata_add__duplicate_twice, test_revision_metadata_get, + test_revision_metadata_delete, + test_revision_metadata_delete_nonexisting, ) = gen_generic_endpoint_tests( endpoint_type='revision_metadata', tool_name='swh-metadata-detector', @@ -889,6 +941,53 @@ self.assertEqual(actual_metadata, expected_metadata) + def test_origin_intrinsic_metadata_delete(self): + # given + tool_id = self.tools['swh-metadata-detector']['id'] + + metadata = { + 'version': None, + 'name': None, + } + metadata_rev = { + 'id': self.revision_id_2, + 'translated_metadata': metadata, + 'mappings': ['mapping1'], + 'indexer_configuration_id': tool_id, + } + metadata_origin = { + 'origin_id': self.origin_id_1, + 'metadata': metadata, + 'indexer_configuration_id': tool_id, + 'mappings': ['mapping1'], + 'from_revision': self.revision_id_2, + } + + # when + self.storage.revision_metadata_add([metadata_rev]) + self.storage.origin_intrinsic_metadata_add([metadata_origin]) + self.storage.origin_intrinsic_metadata_delete([ + { + 'origin_id': self.origin_id_1, + 'indexer_configuration_id': tool_id + } + ]) + + # then + actual_metadata = list(self.storage.origin_intrinsic_metadata_get( + [self.origin_id_1, 42])) + + self.assertEqual(actual_metadata, []) + + def test_origin_intrinsic_metadata_delete_nonexisting(self): + tool_id = self.tools['swh-metadata-detector']['id'] + self.storage.origin_intrinsic_metadata_delete([ + { + 'origin_id': self.origin_id_1, + 'indexer_configuration_id': tool_id + } + ]) + def test_origin_intrinsic_metadata_add_drop_duplicate(self): # given tool_id = self.tools['swh-metadata-detector']['id']