diff --git a/swh/icinga_plugins/base_check.py b/swh/icinga_plugins/base_check.py index 3752818..44088fe 100644 --- a/swh/icinga_plugins/base_check.py +++ b/swh/icinga_plugins/base_check.py @@ -1,20 +1,25 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information class BaseCheck: def __init__(self, obj): self.warning_threshold = obj.get( '_warning_threshold', self.DEFAULT_WARNING_THRESHOLD) self.critical_threshold = obj.get( '_critical_threshold', self.DEFAULT_CRITICAL_THRESHOLD) def get_status(self, value): if self.critical_threshold and value >= self.critical_threshold: return (2, 'CRITICAL') elif self.warning_threshold and value >= self.warning_threshold: return (1, 'WARNING') else: return (0, 'OK') + + def print_result(self, status_type, status_string, **metrics): + print(f'{self.TYPE} {status_type} - {status_string}') + for (metric_name, metric_value) in sorted(metrics.items()): + print(f"| '{metric_name}' = {metric_value:.2f}s") diff --git a/swh/icinga_plugins/tests/test_vault.py b/swh/icinga_plugins/tests/test_vault.py index d5a34e7..bcf16aa 100644 --- a/swh/icinga_plugins/tests/test_vault.py +++ b/swh/icinga_plugins/tests/test_vault.py @@ -1,195 +1,195 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import re import time from click.testing import CliRunner from swh.icinga_plugins.cli import cli from .web_scenario import WebScenario dir_id = 'ab'*20 response_pending = { "obj_id": dir_id, "obj_type": "directory", "progress_message": "foo", "status": "pending" } response_done = { "fetch_url": f"/api/1/vault/directory/{dir_id}/raw/", "id": 9, "obj_id": dir_id, "obj_type": "directory", "status": "done" } response_failed = { "obj_id": dir_id, "obj_type": "directory", "progress_message": "foobar", "status": "failed" } class FakeStorage: def __init__(self, foo, **kwargs): pass def directory_get_random(self): return bytes.fromhex(dir_id) def invoke(args, catch_exceptions=False): runner = CliRunner() result = runner.invoke(cli, args) if not catch_exceptions and result.exception: print(result.output) raise result.exception return result def test_vault_immediate_success(requests_mock, mocker): scenario = WebScenario() url = f'mock://swh-web.example.org/api/1/vault/directory/{dir_id}/' scenario.add_step('get', url, {}, status_code=404) scenario.add_step('post', url, response_pending) scenario.add_step('get', url, response_done) scenario.install_mock(requests_mock) get_storage_mock = mocker.patch('swh.icinga_plugins.vault.get_storage') get_storage_mock.side_effect = FakeStorage sleep_mock = mocker.patch('time.sleep') result = invoke([ '--swh-web-url', 'mock://swh-web.example.org', '--swh-storage-url', 'foo://example.org', 'check-vault', 'directory', ]) assert re.match( rf'^VAULT OK - cooking directory {dir_id} took ' r'[0-9]\.[0-9]{2}s and succeeded.\n' - r"\| 'total time' = [0-9]\.[0-9]{2}s$", + r"\| 'total_time' = [0-9]\.[0-9]{2}s$", result.output) assert result.exit_code == 0, result.output sleep_mock.assert_called_once_with(10) def test_vault_delayed_success(requests_mock, mocker): scenario = WebScenario() url = f'mock://swh-web.example.org/api/1/vault/directory/{dir_id}/' scenario.add_step('get', url, {}, status_code=404) scenario.add_step('post', url, response_pending) scenario.add_step('get', url, response_pending) scenario.add_step('get', url, response_done) scenario.install_mock(requests_mock) get_storage_mock = mocker.patch('swh.icinga_plugins.vault.get_storage') get_storage_mock.side_effect = FakeStorage sleep_mock = mocker.patch('time.sleep') result = invoke([ '--swh-web-url', 'mock://swh-web.example.org', '--swh-storage-url', 'foo://example.org', 'check-vault', 'directory', ]) assert re.match( rf'^VAULT OK - cooking directory {dir_id} took ' r'[0-9]\.[0-9]{2}s and succeeded.\n' - r"\| 'total time' = [0-9]\.[0-9]{2}s$", + r"\| 'total_time' = [0-9]\.[0-9]{2}s$", result.output) assert result.exit_code == 0, result.output assert sleep_mock.call_count == 2 def test_vault_failure(requests_mock, mocker): scenario = WebScenario() url = f'mock://swh-web.example.org/api/1/vault/directory/{dir_id}/' scenario.add_step('get', url, {}, status_code=404) scenario.add_step('post', url, response_pending) scenario.add_step('get', url, response_failed) scenario.install_mock(requests_mock) get_storage_mock = mocker.patch('swh.icinga_plugins.vault.get_storage') get_storage_mock.side_effect = FakeStorage sleep_mock = mocker.patch('time.sleep') result = invoke([ '--swh-web-url', 'mock://swh-web.example.org', '--swh-storage-url', 'foo://example.org', 'check-vault', 'directory', ], catch_exceptions=True) assert re.match( rf'^VAULT CRITICAL - cooking directory {dir_id} took ' r'[0-9]\.[0-9]{2}s and failed with: foobar\n' - r"\| 'total time' = [0-9]\.[0-9]{2}s\n$", + r"\| 'total_time' = [0-9]\.[0-9]{2}s\n$", result.output) assert result.exit_code == 2, result.output sleep_mock.assert_called_once_with(10) def test_vault_timeout(requests_mock, mocker): time_offset = 0 def increment_time(): nonlocal time_offset time_offset += 4000 scenario = WebScenario() url = f'mock://swh-web.example.org/api/1/vault/directory/{dir_id}/' scenario.add_step('get', url, {}, status_code=404) scenario.add_step('post', url, response_pending) scenario.add_step('get', url, response_pending) scenario.add_step('get', url, response_pending, callback=increment_time) scenario.install_mock(requests_mock) get_storage_mock = mocker.patch('swh.icinga_plugins.vault.get_storage') get_storage_mock.side_effect = FakeStorage sleep_mock = mocker.patch('time.sleep') real_time = time.time mocker.patch( 'time.time', side_effect=lambda: real_time() + time_offset) result = invoke([ '--swh-web-url', 'mock://swh-web.example.org', '--swh-storage-url', 'foo://example.org', 'check-vault', 'directory', ], catch_exceptions=True) assert re.match( rf'^VAULT CRITICAL - cooking directory {dir_id} took more than ' r'[0-9]+\.[0-9]{2}s and has status: foo\n' - r"\| 'total time' = [0-9]{4}\.[0-9]{2}s\n$", + r"\| 'total_time' = [0-9]{4}\.[0-9]{2}s\n$", result.output) assert result.exit_code == 2, result.output assert sleep_mock.call_count == 2 diff --git a/swh/icinga_plugins/vault.py b/swh/icinga_plugins/vault.py index 26125b4..3df0ad5 100644 --- a/swh/icinga_plugins/vault.py +++ b/swh/icinga_plugins/vault.py @@ -1,89 +1,98 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import time import requests from swh.storage import get_storage from .base_check import BaseCheck class NoDirectory(Exception): pass class VaultCheck(BaseCheck): + TYPE = 'VAULT' DEFAULT_WARNING_THRESHOLD = 0 DEFAULT_CRITICAL_THRESHOLD = 3600 def __init__(self, obj): super().__init__(obj) self._swh_storage = get_storage('remote', url=obj['swh_storage_url']) self._swh_web_url = obj['swh_web_url'] self._poll_interval = obj['poll_interval'] def _url_for_dir(self, dir_id): return self._swh_web_url + f'/api/1/vault/directory/{dir_id.hex()}/' def _pick_directory(self): dir_ = self._swh_storage.directory_get_random() if dir_ is None: raise NoDirectory() return dir_ def _pick_uncached_directory(self): while True: dir_id = self._pick_directory() response = requests.get(self._url_for_dir(dir_id)) if response.status_code == 404: return dir_id def main(self): try: dir_id = self._pick_uncached_directory() except NoDirectory: - print('VAULT CRITICAL - No directory exists in the archive') + self.print_result( + 'CRITICAL', + 'No directory exists in the archive') return 2 start_time = time.time() total_time = 0 response = requests.post(self._url_for_dir(dir_id)) assert response.status_code == 200, (response, response.text) result = response.json() while result['status'] in ('new', 'pending'): time.sleep(self._poll_interval) response = requests.get(self._url_for_dir(dir_id)) assert response.status_code == 200, (response, response.text) result = response.json() total_time = time.time() - start_time if total_time > self.critical_threshold: - print(f'VAULT CRITICAL - cooking directory {dir_id.hex()} ' - f'took more than {total_time:.2f}s and has status: ' - f'{result["progress_message"]}') - print(f"| 'total time' = {total_time:.2f}s") + self.print_result( + 'CRITICAL', + f'cooking directory {dir_id.hex()} took more than ' + f'{total_time:.2f}s and has status: ' + f'{result["progress_message"]}', + total_time=total_time) return 2 if result['status'] == 'done': (status_code, status) = self.get_status(total_time) - print(f'VAULT {status} - cooking directory {dir_id.hex()} ' - f'took {total_time:.2f}s and succeeded.') - print(f"| 'total time' = {total_time:.2f}s") + self.print_result( + status, + f'cooking directory {dir_id.hex()} took {total_time:.2f}s ' + f'and succeeded.', + total_time=total_time) return status_code elif result['status'] == 'failed': - print(f'VAULT CRITICAL - cooking directory {dir_id.hex()} ' - f'took {total_time:.2f}s and failed with: ' - f'{result["progress_message"]}') - print(f"| 'total time' = {total_time:.2f}s") + self.print_result( + 'CRITICAL', + f'cooking directory {dir_id.hex()} took {total_time:.2f}s ' + f'and failed with: {result["progress_message"]}', + total_time=total_time) return 2 else: - print(f'VAULT CRITICAL - cooking directory {dir_id.hex()} ' - f'took {total_time:.2f}s and resulted in unknown: ' - f'status: {result["status"]}') - print(f"| 'total time' = {total_time:.2f}s") + self.print_result( + 'CRITICAL', + f'cooking directory {dir_id.hex()} took {total_time:.2f}s ' + f'and resulted in unknown status: {result["status"]}', + total_time=total_time) return 2