diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py --- a/swh/indexer/indexer.py +++ b/swh/indexer/indexer.py @@ -461,8 +461,7 @@ dict with the indexing task status """ - status = "uneventful" - summary: Dict[str, Any] = {} + summary: Dict[str, Any] = {"status": "uneventful"} count = 0 try: if skip_existing: @@ -476,23 +475,22 @@ for contents in utils.grouper(gen, n=self.config["write_batch_size"]): res = self.persist_index_computations( - contents, policy_update="update-dups" + list(contents), policy_update="update-dups" ) if not count_object_added_key: count_object_added_key = list(res.keys())[0] count += res[count_object_added_key] if count > 0: - status = "eventful" + summary["status"] = "eventful" except Exception: if not self.catch_exceptions: raise self.log.exception("Problem when computing metadata.") - status = "failed" - finally: - summary["status"] = status - if count > 0 and count_object_added_key: - summary[count_object_added_key] = count - return summary + summary["status"] = "failed" + + if count > 0 and count_object_added_key: + summary[count_object_added_key] = count + return summary class OriginIndexer(BaseIndexer): @@ -522,17 +520,22 @@ **kwargs: passed to the `index` method """ - summary: Dict[str, Any] = {} - status = "uneventful" - results = self.index_list(origin_urls, **kwargs) + summary: Dict[str, Any] = {"status": "uneventful"} + try: + results = self.index_list(origin_urls, **kwargs) + except Exception: + if not self.catch_exceptions: + raise + summary["status"] = "failed" + return summary + summary_persist = self.persist_index_computations(results, policy_update) self.results = results if summary_persist: for value in summary_persist.values(): if value > 0: - status = "eventful" + summary["status"] = "eventful" summary.update(summary_persist) - summary["status"] = status return summary def index_list(self, origins: List[Any], **kwargs: Any) -> List[Dict]: @@ -543,9 +546,8 @@ if res: # If no results, skip it results.append(res) except Exception: - if not self.catch_exceptions: - raise - self.log.exception("Problem when processing origin %s", origin["id"]) + self.log.exception("Problem when processing origin %s", origin) + raise return results @@ -573,8 +575,7 @@ respectively update duplicates or ignore them """ - summary: Dict[str, Any] = {} - status = "uneventful" + summary: Dict[str, Any] = {"status": "uneventful"} results = [] revision_ids = [ @@ -595,13 +596,14 @@ if not self.catch_exceptions: raise self.log.exception("Problem when processing revision") - status = "failed" + summary["status"] = "failed" + return summary + summary_persist = self.persist_index_computations(results, policy_update) if summary_persist: for value in summary_persist.values(): if value > 0: - status = "eventful" + summary["status"] = "eventful" summary.update(summary_persist) self.results = results - summary["status"] = status return summary diff --git a/swh/indexer/tests/test_indexer.py b/swh/indexer/tests/test_indexer.py --- a/swh/indexer/tests/test_indexer.py +++ b/swh/indexer/tests/test_indexer.py @@ -4,37 +4,106 @@ # See top-level LICENSE file for more information from typing import Any, Dict, Optional, Union +from unittest.mock import Mock import pytest -from swh.indexer.indexer import ContentIndexer +from swh.indexer.indexer import ( + ContentIndexer, + ContentPartitionIndexer, + OriginIndexer, + RevisionIndexer, +) +from swh.indexer.storage import PagedResult, Sha1 from swh.model.model import Revision from .utils import BASE_TEST_CONFIG -class TestException(Exception): +class _TestException(Exception): pass -class CrashingIndexer(ContentIndexer): +class CrashingIndexerMixin: USE_TOOLS = False def index( self, id: Union[bytes, Dict, Revision], data: Optional[bytes] = None, **kwargs ) -> Dict[str, Any]: - pass + raise _TestException() def persist_index_computations(self, results, policy_update) -> Dict[str, int]: - raise TestException() + return {} + def indexed_contents_in_partition( + self, partition_id: int, nb_partitions: int, page_token: Optional[str] = None + ) -> PagedResult[Sha1]: + raise _TestException() -def test_catch_exceptions(): - indexer = CrashingIndexer(config=BASE_TEST_CONFIG) + +class CrashingContentIndexer(CrashingIndexerMixin, ContentIndexer): + pass + + +class CrashingContentPartitionIndexer(CrashingIndexerMixin, ContentPartitionIndexer): + pass + + +class CrashingRevisionIndexer(CrashingIndexerMixin, RevisionIndexer): + pass + + +class CrashingOriginIndexer(CrashingIndexerMixin, OriginIndexer): + pass + + +def test_content_indexer_catch_exceptions(): + indexer = CrashingContentIndexer(config=BASE_TEST_CONFIG) + indexer.objstorage = Mock() + indexer.objstorage.get.return_value = b"content" assert indexer.run([b"foo"], policy_update=True) == {"status": "failed"} indexer.catch_exceptions = False - with pytest.raises(TestException): + with pytest.raises(_TestException): indexer.run([b"foo"], policy_update=True) + + +def test_revision_indexer_catch_exceptions(): + indexer = CrashingRevisionIndexer(config=BASE_TEST_CONFIG) + indexer.storage = Mock() + indexer.storage.revision_get.return_value = ["rev"] + + assert indexer.run([b"foo"], policy_update=True) == {"status": "failed"} + + indexer.catch_exceptions = False + + with pytest.raises(_TestException): + indexer.run([b"foo"], policy_update=True) + + +def test_origin_indexer_catch_exceptions(): + indexer = CrashingOriginIndexer(config=BASE_TEST_CONFIG) + + assert indexer.run(["http://example.org"], policy_update=True) == { + "status": "failed" + } + + indexer.catch_exceptions = False + + with pytest.raises(_TestException): + indexer.run(["http://example.org"], policy_update=True) + + +def test_content_partition_indexer_catch_exceptions(): + indexer = CrashingContentPartitionIndexer( + config={**BASE_TEST_CONFIG, "write_batch_size": 42} + ) + + assert indexer.run(0, 42, policy_update=True) == {"status": "failed"} + + indexer.catch_exceptions = False + + with pytest.raises(_TestException): + indexer.run(0, 42, policy_update=True) diff --git a/swh/indexer/tests/utils.py b/swh/indexer/tests/utils.py --- a/swh/indexer/tests/utils.py +++ b/swh/indexer/tests/utils.py @@ -749,13 +749,13 @@ """ partition_id = 0 - nb_partitions = 4 + nb_partitions = 1 actual_results = self.indexer.run( partition_id, nb_partitions, skip_existing=False ) - assert actual_results == {"status": "uneventful"} # why? + assert actual_results["status"] == "eventful", actual_results def test_generate_content_get_no_result(self): """No result indexed returns False"""