diff --git a/swh/icinga_plugins/tests/test_vault.py b/swh/icinga_plugins/tests/test_vault.py --- a/swh/icinga_plugins/tests/test_vault.py +++ b/swh/icinga_plugins/tests/test_vault.py @@ -3,6 +3,8 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import io +import tarfile import time from swh.icinga_plugins.tests.utils import invoke @@ -13,6 +15,15 @@ DIR_ID = "ab" * 20 +def _make_tarfile(): + fd = io.BytesIO() + with tarfile.open(fileobj=fd, mode="w:gz") as tf: + tf.addfile(tarfile.TarInfo(f"swh:1:dir:{DIR_ID}/README"), b"this is a readme\n") + return fd.getvalue() + + +TARBALL = _make_tarfile() + response_pending = { "obj_id": DIR_ID, "obj_type": "directory", @@ -62,7 +73,7 @@ scenario.add_step( "get", f"{ROOT_URL}/api/1/vault/directory/{DIR_ID}/raw/", - "this-is-a-tarball", + TARBALL, headers={"Content-Type": "application/gzip", "Content-Length": "100000"}, ) @@ -102,7 +113,7 @@ scenario.add_step( "get", f"{ROOT_URL}/api/1/vault/directory/{DIR_ID}/raw/", - "this-is-a-tarball", + TARBALL, headers={"Content-Type": "application/gzip", "Content-Length": "100000"}, ) @@ -247,7 +258,7 @@ scenario.add_step( "get", f"{ROOT_URL}/api/1/vault/directory/{DIR_ID}/raw/", - "this-is-a-tarball", + TARBALL, headers={"Content-Type": "application/gzip", "Content-Length": "100000"}, ) @@ -298,3 +309,43 @@ assert result.output == ("VAULT CRITICAL - No directory exists in the archive.\n") assert result.exit_code == 2, result.output + + +def test_vault_corrupt_tarball(requests_mock, mocker, mocked_time): + scenario = WebScenario() + + url = f"{ROOT_URL}/api/1/vault/directory/{DIR_ID}/" + + scenario.add_step("get", url, {}, status_code=404) + scenario.add_step("post", url, response_pending) + scenario.add_step("get", url, response_pending) + scenario.add_step("get", url, response_done) + scenario.add_step( + "get", + f"{ROOT_URL}/api/1/vault/directory/{DIR_ID}/raw/", + b"this-is-not-a-tarball", + headers={"Content-Type": "application/gzip", "Content-Length": "100000"}, + ) + + scenario.install_mock(requests_mock) + + get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage") + get_storage_mock.side_effect = FakeStorage + + result = invoke( + [ + "check-vault", + "--swh-web-url", + "mock://swh-web.example.org", + "--swh-storage-url", + "foo://example.org", + "directory", + ], + catch_exceptions=True, + ) + + assert result.output == ( + "VAULT CRITICAL - Error while reading tarball: not a gzip file\n" + "| 'total_time' = 20.00s\n" + ) + assert result.exit_code == 2, result.output diff --git a/swh/icinga_plugins/tests/web_scenario.py b/swh/icinga_plugins/tests/web_scenario.py --- a/swh/icinga_plugins/tests/web_scenario.py +++ b/swh/icinga_plugins/tests/web_scenario.py @@ -11,7 +11,7 @@ import dataclasses import json -from typing import Callable, Dict, List, Optional, Set +from typing import Callable, Dict, List, Optional, Set, Union import requests_mock @@ -20,7 +20,7 @@ class Step: expected_method: str expected_url: str - response: object + response: Union[str, bytes, Dict, List] status_code: int = 200 headers: Dict[str, str] = dataclasses.field(default_factory=dict) callback: Optional[Callable[[], int]] = None @@ -69,9 +69,7 @@ """ for endpoint in self._endpoints: mocker.register_uri( - endpoint.method.upper(), - endpoint.url, - text=self._request_callback, # type: ignore # stubs are too strict + endpoint.method.upper(), endpoint.url, content=self._request_callback, ) def _request_callback(self, request, context): @@ -89,6 +87,8 @@ step.callback() if isinstance(step.response, str): + return step.response.encode() + elif isinstance(step.response, bytes): return step.response else: - return json.dumps(step.response) + return json.dumps(step.response).encode() diff --git a/swh/icinga_plugins/vault.py b/swh/icinga_plugins/vault.py --- a/swh/icinga_plugins/vault.py +++ b/swh/icinga_plugins/vault.py @@ -3,6 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import tarfile import time import requests @@ -90,7 +91,8 @@ ) return 2 - response = requests.get(result["fetch_url"]) + # need to stream the response, as it may be large + response = requests.get(result["fetch_url"], stream=True) if response.status_code != 200: self.print_result( "CRITICAL", @@ -106,12 +108,24 @@ total_time=total_time, ) return 2 - content_length = response.headers["Content-Length"] - if int(content_length) < 10_000: # 10KB + + try: + with tarfile.open(fileobj=response.raw, mode="r:gz") as tf: + # Note that we are streaming the tarfile from the network, + # so we are allowed at most one pass on the tf object; + # and the sooner we close it the better. + # Fortunately, checking only the first member is good enough: + tarinfo = tf.next() + if not tarinfo.name.startswith(f"swh:1:dir:{dir_id.hex()}/"): + self.print_result( + "CRITICAL", + f"Unexpected member in tarball: {tarinfo.name}", + total_time=total_time, + ) + return 2 + except tarfile.ReadError as e: self.print_result( - "CRITICAL", - f"Content-Length too small when downloading bundle: {content_type}", - total_time=total_time, + "CRITICAL", f"Error while reading tarball: {e}", total_time=total_time, ) return 2