Changeset View
Changeset View
Standalone View
Standalone View
swh/icinga_plugins/vault.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import time | import time | ||||
import requests | import requests | ||||
from swh.storage import get_storage | from swh.storage import get_storage | ||||
from .base_check import BaseCheck | from .base_check import BaseCheck | ||||
class NoDirectory(Exception): | class NoDirectory(Exception): | ||||
pass | pass | ||||
class VaultCheck(BaseCheck): | class VaultCheck(BaseCheck): | ||||
TYPE = "VAULT" | TYPE = "VAULT" | ||||
DEFAULT_WARNING_THRESHOLD = 0 | DEFAULT_WARNING_THRESHOLD = 0 | ||||
DEFAULT_CRITICAL_THRESHOLD = 3600 | DEFAULT_CRITICAL_THRESHOLD = 3600 | ||||
def __init__(self, obj): | def __init__(self, obj): | ||||
super().__init__(obj) | super().__init__(obj, application="vault") | ||||
self._swh_storage = get_storage("remote", url=obj["swh_storage_url"]) | self._swh_storage = get_storage("remote", url=obj["swh_storage_url"]) | ||||
self._swh_web_url = obj["swh_web_url"] | self._swh_web_url = obj["swh_web_url"] | ||||
self._poll_interval = obj["poll_interval"] | self._poll_interval = obj["poll_interval"] | ||||
self.register_prometheus_gauge("status", "") | |||||
self.register_prometheus_gauge("duration", "seconds", ["step", "status"]) | |||||
def _url_for_dir(self, dir_id): | def _url_for_dir(self, dir_id): | ||||
return self._swh_web_url + f"/api/1/vault/directory/{dir_id.hex()}/" | return self._swh_web_url + f"/api/1/vault/directory/{dir_id.hex()}/" | ||||
def _pick_directory(self): | def _pick_directory(self): | ||||
dir_ = self._swh_storage.directory_get_random() | dir_ = self._swh_storage.directory_get_random() | ||||
if dir_ is None: | if dir_ is None: | ||||
raise NoDirectory() | raise NoDirectory() | ||||
return dir_ | return dir_ | ||||
Show All 28 Lines | def main(self): | ||||
if total_time > self.critical_threshold: | if total_time > self.critical_threshold: | ||||
self.print_result( | self.print_result( | ||||
"CRITICAL", | "CRITICAL", | ||||
f"cooking directory {dir_id.hex()} took more than " | f"cooking directory {dir_id.hex()} took more than " | ||||
f"{total_time:.2f}s and has status: " | f"{total_time:.2f}s and has status: " | ||||
f'{result["progress_message"]}', | f'{result["progress_message"]}', | ||||
total_time=total_time, | total_time=total_time, | ||||
) | ) | ||||
self.collect_prometheus_metric("status", 2) | |||||
self.collect_prometheus_metric( | |||||
"duration", total_time, ["cooking", "timeout"], | |||||
) | |||||
return 2 | return 2 | ||||
if result["status"] == "done": | exit_code = 0 | ||||
(status_code, status) = self.get_status(total_time) | status = result["status"] | ||||
prometheus_status = status | |||||
if status == "done": | |||||
(exit_code, state) = self.get_status(total_time) | |||||
self.print_result( | self.print_result( | ||||
status, | state, | ||||
f"cooking directory {dir_id.hex()} took {total_time:.2f}s " | f"cooking directory {dir_id.hex()} took {total_time:.2f}s " | ||||
f"and succeeded.", | f"and succeeded.", | ||||
total_time=total_time, | total_time=total_time, | ||||
) | ) | ||||
return status_code | |||||
elif result["status"] == "failed": | elif status == "failed": | ||||
self.print_result( | self.print_result( | ||||
"CRITICAL", | "CRITICAL", | ||||
f"cooking directory {dir_id.hex()} took {total_time:.2f}s " | f"cooking directory {dir_id.hex()} took {total_time:.2f}s " | ||||
f'and failed with: {result["progress_message"]}', | f'and failed with: {result["progress_message"]}', | ||||
total_time=total_time, | total_time=total_time, | ||||
) | ) | ||||
return 2 | |||||
exit_code = 2 | |||||
else: | else: | ||||
self.print_result( | self.print_result( | ||||
"CRITICAL", | "CRITICAL", | ||||
f"cooking directory {dir_id.hex()} took {total_time:.2f}s " | f"cooking directory {dir_id.hex()} took {total_time:.2f}s " | ||||
f'and resulted in unknown status: {result["status"]}', | f"and resulted in unknown status: {status}", | ||||
total_time=total_time, | total_time=total_time, | ||||
) | ) | ||||
return 2 | |||||
prometheus_status = "unknown" | |||||
exit_code = 2 | |||||
self.collect_prometheus_metric("status", exit_code) | |||||
self.collect_prometheus_metric( | |||||
"duration", total_time, ["end", prometheus_status], | |||||
) | |||||
return exit_code |