diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py --- a/swh/indexer/indexer.py +++ b/swh/indexer/indexer.py @@ -336,6 +336,8 @@ except Exception: if not self.catch_exceptions: raise + self.log.exception("Problem when reading contents metadata.") + sentry_sdk.capture_exception() summary["status"] = "failed" return summary @@ -613,6 +615,7 @@ except Exception: if not self.catch_exceptions: raise + summary["status"] = "failed" return summary @@ -631,6 +634,8 @@ try: results.extend(self.index(origin.url, **kwargs)) except Exception: + if not self.catch_exceptions: + raise self.log.exception("Problem when processing origin %s", origin.url) sentry_sdk.capture_exception() raise diff --git a/swh/indexer/tests/conftest.py b/swh/indexer/tests/conftest.py --- a/swh/indexer/tests/conftest.py +++ b/swh/indexer/tests/conftest.py @@ -11,6 +11,7 @@ import pytest from pytest_postgresql import factories +import sentry_sdk import yaml from swh.core.db.pytest_plugin import initialize_database_for_module @@ -130,3 +131,40 @@ f.write(yaml.dump(swh_indexer_config)) monkeypatch.setenv("SWH_CONFIG_FILENAME", conffile) return conffile + + +@pytest.fixture +def sentry_init(): + # Inspired by + # https://github.com/getsentry/sentry-python/blob/1.5.9/tests/conftest.py#L168-L184 + + initialized = False + + def inner(*a, **kw): + nonlocal initialized + assert not initialized, "already initialized" + initialized = True + hub = sentry_sdk.Hub.current + client = sentry_sdk.Client(*a, **kw) + hub.bind_client(client) + client.transport = TestTransport() + + class TestTransport: + def __init__(self): + self.events = [] + self.envelopes = [] + + def capture_event(self, event): + self.events.append(event) + + def capture_envelope(self, envelope): + self.envelopes.append(envelope) + + with sentry_sdk.Hub(None): + yield inner + + +@pytest.fixture +def sentry_events(monkeypatch, sentry_init): + sentry_init() + return sentry_sdk.Hub.current.client.transport.events diff --git a/swh/indexer/tests/test_indexer.py b/swh/indexer/tests/test_indexer.py --- a/swh/indexer/tests/test_indexer.py +++ b/swh/indexer/tests/test_indexer.py @@ -73,55 +73,101 @@ return {"nb_added": len(results)} -def test_content_indexer_catch_exceptions(): +def check_sentry(sentry_events): + assert len(sentry_events) == 1 + sentry_event = sentry_events.pop() + assert "'_TestException'" in str(sentry_event) + + +def test_content_indexer_catch_exceptions(sentry_events): indexer = CrashingContentIndexer(config=BASE_TEST_CONFIG) indexer.objstorage = Mock() indexer.objstorage.get.return_value = b"content" + indexer.objstorage.get_batch.return_value = [b"content"] - assert indexer.run([b"foo"]) == {"status": "failed"} + sha1 = b"\x12" * 20 + + # As task, catching exceptions + assert indexer.run([sha1]) == {"status": "failed"} + check_sentry(sentry_events) + + # As journal client, catching exceptions + assert indexer.process_journal_objects({"content": [{"sha1": sha1}]}) == { + "status": "failed" + } + check_sentry(sentry_events) indexer.catch_exceptions = False + # As task, not catching exceptions with pytest.raises(_TestException): - indexer.run([b"foo"]) + indexer.run([sha1]) + assert sentry_events == [] + + # As journal client, not catching exceptions + with pytest.raises(_TestException): + assert indexer.process_journal_objects({"content": [{"sha1": sha1}]}) == { + "status": "failed" + } + assert sentry_events == [] -def test_directory_indexer_catch_exceptions(): +def test_directory_indexer_catch_exceptions(sentry_events): indexer = CrashingDirectoryIndexer(config=BASE_TEST_CONFIG) indexer.storage = Mock() indexer.storage.directory_get.return_value = [DIRECTORY2] - assert indexer.run([b"foo"]) == {"status": "failed"} + sha1 = DIRECTORY2.id + + # As task, catching exceptions + assert indexer.run([sha1]) == {"status": "failed"} + check_sentry(sentry_events) + # As journal client, catching exceptions assert indexer.process_journal_objects({"directory": [DIRECTORY2.to_dict()]}) == { "status": "failed" } + check_sentry(sentry_events) indexer.catch_exceptions = False + # As task, not catching exceptions with pytest.raises(_TestException): indexer.run([b"foo"]) + assert sentry_events == [] + # As journal client, not catching exceptions with pytest.raises(_TestException): indexer.process_journal_objects({"directory": [DIRECTORY2.to_dict()]}) + assert sentry_events == [] -def test_origin_indexer_catch_exceptions(): +def test_origin_indexer_catch_exceptions(sentry_events): indexer = CrashingOriginIndexer(config=BASE_TEST_CONFIG) - assert indexer.run(["http://example.org"]) == {"status": "failed"} + origin_url = "http://example.org" - assert indexer.process_journal_objects( - {"origin": [{"url": "http://example.org"}]} - ) == {"status": "failed"} + # As task, catching exceptions + assert indexer.run([origin_url]) == {"status": "failed"} + check_sentry(sentry_events) + + # As journal client, catching exceptions + assert indexer.process_journal_objects({"origin": [{"url": origin_url}]}) == { + "status": "failed" + } + check_sentry(sentry_events) indexer.catch_exceptions = False + # As task, not catching exceptions with pytest.raises(_TestException): - indexer.run(["http://example.org"]) + indexer.run([origin_url]) + assert sentry_events == [] + # As journal client, not catching exceptions with pytest.raises(_TestException): - indexer.process_journal_objects({"origin": [{"url": "http://example.org"}]}) + indexer.process_journal_objects({"origin": [{"url": origin_url}]}) + assert sentry_events == [] def test_content_partition_indexer_catch_exceptions(): diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py --- a/swh/indexer/tests/test_origin_metadata.py +++ b/swh/indexer/tests/test_origin_metadata.py @@ -259,15 +259,17 @@ assert orig_results == [] -def test_origin_metadata_indexer_error( +def test_origin_metadata_indexer_directory_error( swh_indexer_config, idx_storage: IndexerStorageInterface, storage: StorageInterface, obj_storage, + sentry_events, ) -> None: indexer = OriginMetadataIndexer(config=swh_indexer_config) origin = "https://github.com/librariesio/yarn-parser" + with patch( "swh.indexer.metadata.DirectoryMetadataIndexer" ".translate_directory_intrinsic_metadata", @@ -275,6 +277,43 @@ ): indexer.run([origin]) + assert len(sentry_events) == 1 + sentry_event = sentry_events.pop() + assert "'TypeError'" in str(sentry_event) + + dir_id = DIRECTORY2.id + + dir_results = list(indexer.idx_storage.directory_intrinsic_metadata_get([dir_id])) + assert dir_results == [] + + orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) + assert orig_results == [] + + +def test_origin_metadata_indexer_content_exception( + swh_indexer_config, + idx_storage: IndexerStorageInterface, + storage: StorageInterface, + obj_storage, + sentry_events, +) -> None: + + indexer = OriginMetadataIndexer(config=swh_indexer_config) + origin = "https://github.com/librariesio/yarn-parser" + + class TestException(Exception): + pass + + with patch( + "swh.indexer.metadata.ContentMetadataRow", + side_effect=TestException(), + ): + indexer.run([origin]) + + assert len(sentry_events) == 1 + sentry_event = sentry_events.pop() + assert ".TestException'" in str(sentry_event), sentry_event + dir_id = DIRECTORY2.id dir_results = list(indexer.idx_storage.directory_intrinsic_metadata_get([dir_id]))