diff --git a/swh/icinga_plugins/cli.py b/swh/icinga_plugins/cli.py --- a/swh/icinga_plugins/cli.py +++ b/swh/icinga_plugins/cli.py @@ -56,6 +56,40 @@ sys.exit(VaultCheck(ctx.obj).main()) +@icinga_cli_group.group(name="check-savecodenow") +@click.option( + "--swh-storage-url", type=str, required=True, help="URL to an swh-storage HTTP API" +) +@click.option( + "--swh-web-url", type=str, required=True, help="URL to an swh-web instance" +) +@click.option( + "--poll-interval", + type=int, + default=10, + help="Interval (in seconds) between two polls to the API, " + "to check for save code now status.", +) +@click.pass_context +def check_scn(ctx, **kwargs): + ctx.obj.update(kwargs) + + +@check_scn.command(name="origin") +@click.argument("origin", type=str) +@click.option("--visit-type", type=str, required=True, help="Visit type for origin") +@click.pass_context +def check_scn_origin(ctx, origin, visit_type): + """Requests a save code now via the api for a given origin with type visit_type, waits + for its completion, report approximate time of completion (failed or succeeded) and + warn if threshold exceeded. + + """ + from .save_code_now import SaveCodeNowCheck + + sys.exit(SaveCodeNowCheck(ctx.obj, origin, visit_type).main()) + + @icinga_cli_group.group(name="check-deposit") @click.option( "--server", diff --git a/swh/icinga_plugins/save_code_now.py b/swh/icinga_plugins/save_code_now.py new file mode 100644 --- /dev/null +++ b/swh/icinga_plugins/save_code_now.py @@ -0,0 +1,102 @@ +# Copyright (C) 2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import time +from typing import Dict, List + +import requests + +from swh.storage import get_storage + +from .base_check import BaseCheck + + +class SaveCodeNowCheck(BaseCheck): + TYPE = "SAVECODENOW" + DEFAULT_WARNING_THRESHOLD = 60 + DEFAULT_CRITICAL_THRESHOLD = 120 + + def __init__(self, obj: Dict, origin: str, visit_type: str) -> None: + super().__init__(obj) + self.storage = get_storage("remote", url=obj["swh_storage_url"]) + self.api_url = obj["swh_web_url"] + self.poll_interval = obj["poll_interval"] + self.origin = origin + self.visit_type = visit_type + + def main(self) -> int: + """Main check scenario: + + """ + start_time: float = time.time() + total_time: float = 0.0 + scn_url = ( + f"{self.api_url}/api/1/origin/save/{self.visit_type}/url/{self.origin}/" + ) + response = requests.post(scn_url) + assert response.status_code == 200, (response, response.text) + + result: Dict = response.json() + + status_key = "save_task_status" + request_date = result["save_request_date"] + + origin_info = (self.visit_type, self.origin) + + while result[status_key] in ("not yet scheduled", "running", "scheduled"): + time.sleep(self.poll_interval) + response = requests.get(scn_url) + assert response.status_code == 200, (response, response.text) + raw_result: List[Dict] = response.json() + assert len(raw_result) > 0, f"Unexpected result: {raw_result}" + # bwarf + if len(raw_result) > 1: + # retrieve only the one status result we are interested in + result = next( + filter(lambda r: r["save_request_date"] == request_date, raw_result) + ) + else: + result = raw_result[0] + + # this because the api can return multiple entries for the same origin + assert result["save_request_date"] == request_date + + total_time = time.time() - start_time + + if total_time > self.critical_threshold: + self.print_result( + "CRITICAL", + f"Save code now request for {origin_info} took more than " + f"{total_time:.2f}s and has status: " + f'{result["save_task_status"]}', + total_time=total_time, + ) + return 2 + + if result[status_key] == "succeeded": + (status_code, status) = self.get_status(total_time) + self.print_result( + status, + f"Save code now request for {origin_info} took {total_time:.2f}s " + f"and succeeded.", + total_time=total_time, + ) + return status_code + elif result[status_key] == "failed": + self.print_result( + "CRITICAL", + f"Save code now request for {origin_info} took {total_time:.2f}s " + f"and failed", + total_time=total_time, + ) + return 2 + else: + self.print_result( + "CRITICAL", + f"Save code now request for {origin_info} took {total_time:.2f}s " + f"and resulted in unknown status: {result[status_key]}", + total_time=total_time, + ) + return 2 diff --git a/swh/icinga_plugins/tests/test_save_code_now.py b/swh/icinga_plugins/tests/test_save_code_now.py new file mode 100644 --- /dev/null +++ b/swh/icinga_plugins/tests/test_save_code_now.py @@ -0,0 +1,4 @@ +# Copyright (C) 2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information