diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py --- a/swh/indexer/storage/__init__.py +++ b/swh/indexer/storage/__init__.py @@ -660,6 +660,17 @@ cur) db.revision_metadata_add_from_temp(conflict_update, cur) + @remote_api_endpoint('revision_metadata/delete') + @db_transaction() + def revision_metadata_delete(self, revision, tool_id, db=None, cur=None): + """Remove revision metadata from the storage. + + Args: + revision: sha1_git of revision + tool_id: tool used to compute metadata + """ + db.revision_metadata_delete(revision, tool_id, cur) + @remote_api_endpoint('origin_intrinsic_metadata') @db_transaction_generator() def origin_intrinsic_metadata_get(self, ids, db=None, cur=None): @@ -714,6 +725,18 @@ cur) db.origin_intrinsic_metadata_add_from_temp(conflict_update, cur) + @remote_api_endpoint('origin_intrinsic_metadata/delete') + @db_transaction() + def origin_intrinsic_metadata_delete( + self, origin, tool_id, db=None, cur=None): + """Remove origin metadata from the storage. + + Args: + origin: origin identifier + tool_id: tool used to compute metadata + """ + db.origin_intrinsic_metadata_delete(origin, tool_id, cur) + @remote_api_endpoint('origin_intrinsic_metadata/search/fulltext') @db_transaction_generator() def origin_intrinsic_metadata_search_fulltext( diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py --- a/swh/indexer/storage/db.py +++ b/swh/indexer/storage/db.py @@ -317,6 +317,14 @@ self._cursor(cur).execute("SELECT swh_revision_metadata_add(%s)", (conflict_update, )) + def revision_metadata_delete( + self, rev_id, tool_id, cur=None): + cur = self._cursor(cur) + cur.execute( + "DELETE from revision_metadata " + "WHERE id = %s AND indexer_configuration_id = %s", + (rev_id, tool_id)) + def revision_metadata_get_from_list(self, ids, cur=None): yield from self._get_from_list( 'revision_metadata', ids, self.revision_metadata_cols, cur=cur) @@ -342,6 +350,14 @@ "SELECT swh_origin_intrinsic_metadata_add(%s)", (conflict_update, )) + def origin_intrinsic_metadata_delete( + self, origin_id, tool_id, cur=None): + cur = self._cursor(cur) + cur.execute( + "DELETE from origin_intrinsic_metadata " + "WHERE origin_id = %s AND indexer_configuration_id = %s", + (origin_id, tool_id)) + def origin_intrinsic_metadata_get_from_list(self, orig_ids, cur=None): yield from self._get_from_list( 'origin_intrinsic_metadata', orig_ids, diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py --- a/swh/indexer/storage/in_memory.py +++ b/swh/indexer/storage/in_memory.py @@ -176,6 +176,13 @@ if id_ not in self._sorted_ids: bisect.insort(self._sorted_ids, id_) + def delete(self, id_, tool_id): + key = (id_, tool_id) + if tool_id in self._tools_per_id[id_]: + self._tools_per_id[id_].remove(tool_id) + if key in self._data: + del self._data[key] + class IndexerStorage: """In-memory SWH indexer storage.""" @@ -582,6 +589,15 @@ raise TypeError('identifiers must be bytes.') self._revision_metadata.add(metadata, conflict_update) + def revision_metadata_delete(self, revision, tool_id): + """Remove revision metadata from the storage. + + Args: + revision: sha1_git of revision + tool_id: tool used to compute metadata + """ + self._revision_metadata.delete(revision, tool_id) + def origin_intrinsic_metadata_get(self, ids): """Retrieve origin metadata per id. @@ -629,6 +645,15 @@ items.append(item) self._origin_intrinsic_metadata.add(items, conflict_update) + def origin_intrinsic_metadata_delete(self, origin, tool_id): + """Remove origin metadata from the storage. + + Args: + origin: origin identifier + tool_id: tool used to compute metadata + """ + self._origin_intrinsic_metadata.delete(origin, tool_id) + def origin_intrinsic_metadata_search_fulltext( self, conjunction, limit=100): """Returns the list of origins whose metadata contain all the terms. diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py --- a/swh/indexer/tests/storage/test_storage.py +++ b/swh/indexer/tests/storage/test_storage.py @@ -293,12 +293,42 @@ self.assertEqual(actual_data, expected_data) + @rename + def delete(self): + # given + tool_id = self.tools[tool_name]['id'] + + query = [self.sha1_2, self.sha1_1] + + data1 = { + 'id': self.sha1_2, + **example_data1, + 'indexer_configuration_id': tool_id, + } + + # when + endpoint(self, 'add')([data1]) + endpoint(self, 'delete')(self.sha1_2, tool_id) + + # then + actual_data = list(endpoint(self, 'get')(query)) + + # then + self.assertEqual(actual_data, []) + + @rename + def delete_nonexisting(self): + tool_id = self.tools[tool_name]['id'] + endpoint(self, 'delete')(self.sha1_2, tool_id) + return ( missing, add__drop_duplicate, add__update_in_place_duplicate, add__duplicate_twice, get, + delete, + delete_nonexisting, ) @@ -345,6 +375,8 @@ test_content_mimetype_add__update_in_place_duplicate, test_content_mimetype_add__duplicate_twice, test_content_mimetype_get, + _, # content_mimetype_detete, + _, # content_mimetype_detete_nonexisting, ) = gen_generic_endpoint_tests( endpoint_type='content_mimetype', tool_name='file', @@ -365,6 +397,8 @@ test_content_language_add__update_in_place_duplicate, test_content_language_add__duplicate_twice, test_content_language_get, + _, # test_content_language_delete, + _, # test_content_language_delete_nonexisting, ) = gen_generic_endpoint_tests( endpoint_type='content_language', tool_name='pygments', @@ -384,6 +418,8 @@ _, # test_content_ctags_add__update_in_place_duplicate, _, # test_content_ctags_add__duplicate_twice, _, # test_content_ctags_get, + _, # test_content_ctags_delete, + _, # test_content_ctags_delete_nonexisting, ) = gen_generic_endpoint_tests( endpoint_type='content_ctags', tool_name='universal-ctags', @@ -724,6 +760,8 @@ _, # test_content_fossology_license_add__update_in_place_duplicate, _, # test_content_metadata_add__duplicate_twice, _, # test_content_fossology_license_get, + _, # test_content_fossology_license_delete, + _, # test_content_fossology_license_delete_nonexisting, ) = gen_generic_endpoint_tests( endpoint_type='content_fossology_license', tool_name='nomos', @@ -792,6 +830,8 @@ test_content_metadata_add__update_in_place_duplicate, test_content_metadata_add__duplicate_twice, test_content_metadata_get, + _, # test_content_metadata_delete, + _, # test_content_metadata_delete_nonexisting, ) = gen_generic_endpoint_tests( endpoint_type='content_metadata', tool_name='swh-metadata-detector', @@ -823,6 +863,8 @@ test_revision_metadata_add__update_in_place_duplicate, test_revision_metadata_add__duplicate_twice, test_revision_metadata_get, + test_revision_metadata_delete, + test_revision_metadata_delete_nonexisting, ) = gen_generic_endpoint_tests( endpoint_type='revision_metadata', tool_name='swh-metadata-detector', @@ -889,6 +931,45 @@ self.assertEqual(actual_metadata, expected_metadata) + def test_origin_intrinsic_metadata_delete(self): + # given + tool_id = self.tools['swh-metadata-detector']['id'] + + metadata = { + 'version': None, + 'name': None, + } + metadata_rev = { + 'id': self.revision_id_2, + 'translated_metadata': metadata, + 'mappings': ['mapping1'], + 'indexer_configuration_id': tool_id, + } + metadata_origin = { + 'origin_id': self.origin_id_1, + 'metadata': metadata, + 'indexer_configuration_id': tool_id, + 'mappings': ['mapping1'], + 'from_revision': self.revision_id_2, + } + + # when + self.storage.revision_metadata_add([metadata_rev]) + self.storage.origin_intrinsic_metadata_add([metadata_origin]) + self.storage.origin_intrinsic_metadata_delete( + self.origin_id_1, tool_id) + + # then + actual_metadata = list(self.storage.origin_intrinsic_metadata_get( + [self.origin_id_1, 42])) + + self.assertEqual(actual_metadata, []) + + def test_origin_intrinsic_metadata_delete_nonexisting(self): + tool_id = self.tools['swh-metadata-detector']['id'] + self.storage.origin_intrinsic_metadata_delete( + self.origin_id_1, tool_id) + def test_origin_intrinsic_metadata_add_drop_duplicate(self): # given tool_id = self.tools['swh-metadata-detector']['id']