diff --git a/swh/icinga_plugins/cli.py b/swh/icinga_plugins/cli.py --- a/swh/icinga_plugins/cli.py +++ b/swh/icinga_plugins/cli.py @@ -56,6 +56,37 @@ sys.exit(VaultCheck(ctx.obj).main()) +@icinga_cli_group.group(name="check-savecodenow") +@click.option( + "--swh-web-url", type=str, required=True, help="URL to an swh-web instance" +) +@click.option( + "--poll-interval", + type=int, + default=10, + help="Interval (in seconds) between two polls to the API, " + "to check for save code now status.", +) +@click.pass_context +def check_scn(ctx, **kwargs): + ctx.obj.update(kwargs) + + +@check_scn.command(name="origin") +@click.argument("origin", type=str) +@click.option("--visit-type", type=str, required=True, help="Visit type for origin") +@click.pass_context +def check_scn_origin(ctx, origin, visit_type): + """Requests a save code now via the api for a given origin with type visit_type, waits + for its completion, report approximate time of completion (failed or succeeded) and + warn if threshold exceeded. + + """ + from .save_code_now import SaveCodeNowCheck + + sys.exit(SaveCodeNowCheck(ctx.obj, origin, visit_type).main()) + + @icinga_cli_group.group(name="check-deposit") @click.option( "--server", diff --git a/swh/icinga_plugins/save_code_now.py b/swh/icinga_plugins/save_code_now.py new file mode 100644 --- /dev/null +++ b/swh/icinga_plugins/save_code_now.py @@ -0,0 +1,113 @@ +# Copyright (C) 2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import time +from typing import Dict, List + +import requests + +from .base_check import BaseCheck + +REPORT_MSG = "Save code now request for origin" + +WAITING_STATUSES = ("not yet scheduled", "running", "scheduled") + + +class SaveCodeNowCheck(BaseCheck): + TYPE = "SAVECODENOW" + DEFAULT_WARNING_THRESHOLD = 60 + DEFAULT_CRITICAL_THRESHOLD = 120 + + def __init__(self, obj: Dict, origin: str, visit_type: str) -> None: + super().__init__(obj) + self.api_url = obj["swh_web_url"].rstrip("/") + self.poll_interval = obj["poll_interval"] + self.origin = origin + self.visit_type = visit_type + + @staticmethod + def api_url_scn(root_api_url: str, origin: str, visit_type: str) -> str: + """Compute the save code now api url for a given origin""" + return f"{root_api_url}/api/1/origin/save/{visit_type}/url/{origin}/" + + def main(self) -> int: + """Scenario description: + + 1. Requests a save code now request via the api for origin self.origin with type + self.visit_type. + + 2. Polling regularly at self.poll_interval seconds the completion status. + + 3. When either succeeded, failed or threshold exceeded, report approximate time + of completion. This will warn if thresholds are exceeded. + + """ + start_time: float = time.time() + total_time: float = 0.0 + scn_url = self.api_url_scn(self.api_url, self.origin, self.visit_type) + response = requests.post(scn_url) + assert response.status_code == 200, (response, response.text) + + result: Dict = response.json() + + status_key = "save_task_status" + request_date = result["save_request_date"] + origin_info = (self.visit_type, self.origin) + + while result[status_key] in WAITING_STATUSES: + time.sleep(self.poll_interval) + response = requests.get(scn_url) + assert ( + response.status_code == 200 + ), "Unexpected response: {response}, {response.text}" + raw_result: List[Dict] = response.json() + assert len(raw_result) > 0, f"Unexpected result: {raw_result}" + + if len(raw_result) > 1: + # retrieve only the one status result we are interested in + result = next( + filter(lambda r: r["save_request_date"] == request_date, raw_result) + ) + else: + result = raw_result[0] + + # this because the api can return multiple entries for the same origin + assert result["save_request_date"] == request_date + + total_time = time.time() - start_time + + if total_time > self.critical_threshold: + self.print_result( + "CRITICAL", + f"{REPORT_MSG} {origin_info} took more than {total_time:.2f}s " + f'and has status: {result["save_task_status"]}.', + total_time=total_time, + ) + return 2 + + if result[status_key] == "succeeded": + (status_code, status) = self.get_status(total_time) + self.print_result( + status, + f"{REPORT_MSG} {origin_info} took {total_time:.2f}s and succeeded.", + total_time=total_time, + ) + return status_code + elif result[status_key] == "failed": + self.print_result( + "CRITICAL", + f"{REPORT_MSG} {origin_info} took {total_time:.2f}s and failed.", + total_time=total_time, + ) + return 2 + else: + self.print_result( + "CRITICAL", + f"{REPORT_MSG} {origin_info} took {total_time:.2f}s " + "and resulted in unsupported status: " + f"{result['save_request_status']} ; {result[status_key]}.", + total_time=total_time, + ) + return 2 diff --git a/swh/icinga_plugins/tests/test_deposit.py b/swh/icinga_plugins/tests/test_deposit.py --- a/swh/icinga_plugins/tests/test_deposit.py +++ b/swh/icinga_plugins/tests/test_deposit.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2020 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -9,10 +9,9 @@ import time from typing import Optional -from click.testing import CliRunner import pytest -from swh.icinga_plugins.cli import icinga_cli_group +from swh.icinga_plugins.tests.utils import invoke from .web_scenario import WebScenario @@ -165,15 +164,6 @@ return path -def invoke(args, catch_exceptions=False): - runner = CliRunner() - result = runner.invoke(icinga_cli_group, args) - if not catch_exceptions and result.exception: - print(result.output) - raise result.exception - return result - - def test_deposit_immediate_success( requests_mock, mocker, sample_archive, sample_metadata, mocked_time ): diff --git a/swh/icinga_plugins/tests/test_save_code_now.py b/swh/icinga_plugins/tests/test_save_code_now.py new file mode 100644 --- /dev/null +++ b/swh/icinga_plugins/tests/test_save_code_now.py @@ -0,0 +1,222 @@ +# Copyright (C) 2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from datetime import datetime, timezone +import random +from typing import Dict, Optional, Tuple + +import pytest + +from swh.icinga_plugins.save_code_now import ( + REPORT_MSG, + WAITING_STATUSES, + SaveCodeNowCheck, +) + +from .utils import invoke +from .web_scenario import WebScenario + + +def fake_response( + origin: str, + visit_type: str, + sor_status: str = "pending", + task_status: Optional[str] = None, +) -> Dict: + """Fake a save code now request api response""" + visit_date = None + if task_status in ("failed", "succeeded"): + visit_date = str(datetime.now(tz=timezone.utc)) + + return { + "visit_type": visit_type, + "origin_url": origin, + "save_request_date": "to-replace", + "save_request_status": sor_status, + "save_task_status": task_status, + "visit_date": visit_date, + } + + +@pytest.fixture +def origin_info() -> Tuple[str, str]: + """Build an origin info to request save code now + + """ + origin_name = random.choice(range(10)) + return random.choice(["git", "svn", "hg"]), f"mock://fake-origin-url/{origin_name}" + + +def test_save_code_now_success(requests_mock, mocker, mocked_time, origin_info): + """Successful ingestion scenario below threshold""" + scenario = WebScenario() + visit_type, origin = origin_info + + root_api_url = "mock://swh-web.example.org" + api_url = SaveCodeNowCheck.api_url_scn(root_api_url, origin, visit_type) + + # creation request + scenario.add_step( + "post", + api_url, + fake_response(origin, visit_type, "accepted", "not yet scheduled"), + ) + response_scheduled = fake_response(origin, visit_type, "accepted", "scheduled") + # status polling requests + scenario.add_step("get", api_url, [response_scheduled]) + # sometimes we can have multiple response so we fake that here + scenario.add_step("get", api_url, [response_scheduled, response_scheduled]) + scenario.add_step( + "get", api_url, [fake_response(origin, visit_type, "accepted", "succeeded")] + ) + scenario.install_mock(requests_mock) + + # fmt: off + result = invoke( + [ + "check-savecodenow", "--swh-web-url", root_api_url, + "origin", origin, + "--visit-type", visit_type, + ] + ) + # fmt: on + + assert result.output == ( + f"{SaveCodeNowCheck.TYPE} OK - {REPORT_MSG} {origin_info} took " + f"30.00s and succeeded.\n" + f"| 'total_time' = 30.00s\n" + ) + assert result.exit_code == 0, f"Unexpected result: {result.output}" + + +def test_save_code_now_failure(requests_mock, mocker, mocked_time, origin_info): + """Failed ingestion scenario should be reported""" + scenario = WebScenario() + visit_type, origin = origin_info + + root_api_url = "mock://swh-web.example.org" + api_url = SaveCodeNowCheck.api_url_scn(root_api_url, origin, visit_type) + + # creation request + scenario.add_step( + "post", + api_url, + fake_response(origin, visit_type, "accepted", "not yet scheduled"), + ) + # status polling requests + scenario.add_step( + "get", api_url, [fake_response(origin, visit_type, "accepted", "scheduled")] + ) + scenario.add_step( + "get", api_url, [fake_response(origin, visit_type, "accepted", "failed")] + ) + scenario.install_mock(requests_mock) + + # fmt: off + result = invoke( + [ + "check-savecodenow", "--swh-web-url", root_api_url, + "origin", origin, + "--visit-type", visit_type, + ], + catch_exceptions=True, + ) + # fmt: on + + assert result.output == ( + f"{SaveCodeNowCheck.TYPE} CRITICAL - {REPORT_MSG} {origin_info} took " + f"20.00s and failed.\n" + f"| 'total_time' = 20.00s\n" + ) + assert result.exit_code == 2, f"Unexpected result: {result.output}" + + +def test_save_code_now_pending_state_unsupported( + requests_mock, mocker, mocked_time, origin_info +): + """Pending save requests are not supported in the test so they should fail early + + Pending requests are requests that need a moderator to accept the repository into + the save code now flow. + + Do not actually use such origin to trigger the checks. + + """ + scenario = WebScenario() + visit_type, origin = origin_info + root_api_url = "mock://swh-web2.example.org" + api_url = SaveCodeNowCheck.api_url_scn(root_api_url, origin, visit_type) + + # creation request + scenario.add_step( + "post", api_url, fake_response(origin, visit_type, "pending", "not created"), + ) + scenario.install_mock(requests_mock) + + # fmt: off + result = invoke( + [ + "check-savecodenow", "--swh-web-url", root_api_url, + "origin", origin, + "--visit-type", visit_type, + ], + catch_exceptions=True, + ) + # fmt: on + + assert result.output == ( + f"{SaveCodeNowCheck.TYPE} CRITICAL - {REPORT_MSG} {origin_info} took " + f"0.00s and resulted in unsupported status: pending ; not created.\n" + f"| 'total_time' = 0.00s\n" + ) + assert result.exit_code == 2, f"Unexpected output: {result.output}" + + +def test_save_code_now_threshold_exceeded( + requests_mock, mocker, mocked_time, origin_info +): + """Saving requests exceeding threshold should mention warning in output + + """ + scenario = WebScenario() + visit_type, origin = origin_info + + root_api_url = "mock://swh-web2.example.org" + api_url = SaveCodeNowCheck.api_url_scn(root_api_url, origin, visit_type) + + # creation request + scenario.add_step( + "post", + api_url, + fake_response(origin, visit_type, "accepted", "not yet scheduled"), + ) + + # we'll make the response being in the awaiting status + # beyond 13, this will exceed the threshold + for i in range(13): + waiting_status = random.choice(WAITING_STATUSES) + response_scheduled = fake_response( + origin, visit_type, "accepted", waiting_status + ) + scenario.add_step("get", api_url, [response_scheduled]) + scenario.install_mock(requests_mock) + + # fmt: off + result = invoke( + [ + "check-savecodenow", "--swh-web-url", root_api_url, + "origin", origin, + "--visit-type", visit_type, + ], + catch_exceptions=True, + ) + # fmt: on + + assert result.output == ( + f"{SaveCodeNowCheck.TYPE} CRITICAL - {REPORT_MSG} {origin_info} took " + f"more than 130.00s and has status: {waiting_status}.\n" + f"| 'total_time' = 130.00s\n" + ) + assert result.exit_code == 2, f"Unexpected output: {result.output}" diff --git a/swh/icinga_plugins/tests/test_vault.py b/swh/icinga_plugins/tests/test_vault.py --- a/swh/icinga_plugins/tests/test_vault.py +++ b/swh/icinga_plugins/tests/test_vault.py @@ -1,13 +1,11 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import time -from click.testing import CliRunner - -from swh.icinga_plugins.cli import icinga_cli_group +from swh.icinga_plugins.tests.utils import invoke from .web_scenario import WebScenario @@ -51,15 +49,6 @@ return bytes.fromhex(dir_id) -def invoke(args, catch_exceptions=False): - runner = CliRunner() - result = runner.invoke(icinga_cli_group, args) - if not catch_exceptions and result.exception: - print(result.output) - raise result.exception - return result - - def test_vault_immediate_success(requests_mock, mocker, mocked_time): scenario = WebScenario() diff --git a/swh/icinga_plugins/tests/utils.py b/swh/icinga_plugins/tests/utils.py new file mode 100644 --- /dev/null +++ b/swh/icinga_plugins/tests/utils.py @@ -0,0 +1,20 @@ +# Copyright (C) 2019-2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from typing import List + +from click.testing import CliRunner, Result + +from swh.icinga_plugins.cli import icinga_cli_group + + +def invoke(args: List[str], catch_exceptions: bool = False) -> Result: + """Invoke icinga plugin main cli command with args""" + runner = CliRunner() + result = runner.invoke(icinga_cli_group, args) + if not catch_exceptions and result.exception: + print(result.output) + raise result.exception + return result