Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/icinga_plugins/tests/test_vault.py b/swh/icinga_plugins/tests/test_vault.py
index 7398590..04cece2 100644
--- a/swh/icinga_plugins/tests/test_vault.py
+++ b/swh/icinga_plugins/tests/test_vault.py
@@ -1,427 +1,489 @@
# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import io
+import tarfile
import time
from swh.icinga_plugins.tests.utils import invoke
from .web_scenario import WebScenario
DIR_ID = "ab" * 20
url_api = f"mock://swh-web.example.org/api/1/vault/directory/{DIR_ID}/"
url_fetch = f"mock://swh-web.example.org/api/1/vault/directory/{DIR_ID}/raw/"
+
+def _make_tarfile():
+ fd = io.BytesIO()
+ with tarfile.open(fileobj=fd, mode="w:gz") as tf:
+ tf.addfile(tarfile.TarInfo(f"swh:1:dir:{DIR_ID}/README"), b"this is a readme\n")
+
+ tarinfo = tarfile.TarInfo(f"swh:1:dir:{DIR_ID}")
+ tarinfo.type = tarfile.DIRTYPE
+ tf.addfile(tarinfo)
+ return fd.getvalue()
+
+
+TARBALL = _make_tarfile()
+
response_pending = {
"obj_id": DIR_ID,
"obj_type": "directory",
"progress_message": "foo",
"status": "pending",
}
response_done = {
"fetch_url": url_fetch,
"id": 9,
"obj_id": DIR_ID,
"obj_type": "directory",
"status": "done",
}
response_done_no_fetch = {
"id": 9,
"obj_id": DIR_ID,
"obj_type": "directory",
"status": "done",
}
response_failed = {
"obj_id": DIR_ID,
"obj_type": "directory",
"progress_message": "foobar",
"status": "failed",
}
response_unknown_status = {
"obj_id": DIR_ID,
"obj_type": "directory",
"progress_message": "what",
"status": "boo",
}
class FakeStorage:
def __init__(self, foo, **kwargs):
pass
def directory_get_random(self):
return bytes.fromhex(DIR_ID)
def test_vault_immediate_success(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
- "get", url_fetch, "xx" * 40, headers={"Content-Type": "application/gzip"}
+ "get", url_fetch, TARBALL, headers={"Content-Type": "application/gzip"}
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
]
)
assert result.output == (
f"VAULT OK - cooking directory {DIR_ID} took "
f"10.00s and succeeded.\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 0, result.output
def test_vault_delayed_success(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
- "get", url_fetch, "xx" * 40, headers={"Content-Type": "application/gzip"}
+ "get", url_fetch, TARBALL, headers={"Content-Type": "application/gzip"}
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
]
)
assert result.output == (
f"VAULT OK - cooking directory {DIR_ID} took "
f"20.00s and succeeded.\n"
f"| 'total_time' = 20.00s\n"
)
assert result.exit_code == 0, result.output
def test_vault_failure(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_failed)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
f"VAULT CRITICAL - cooking directory {DIR_ID} took "
f"10.00s and failed with: foobar\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_unknown_status(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_unknown_status)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
f"VAULT CRITICAL - cooking directory {DIR_ID} took "
f"10.00s and resulted in unknown status: boo\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_timeout(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_pending)
scenario.add_step(
"get", url_api, response_pending, callback=lambda: time.sleep(4000)
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
f"VAULT CRITICAL - cooking directory {DIR_ID} took more than "
f"4020.00s and has status: foo\n"
f"| 'total_time' = 4020.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_cached_directory(requests_mock, mocker, mocked_time):
"""First serves a directory that's already in the cache, to
test that vault_check requests another one."""
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=200)
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
- "get", url_fetch, "xx" * 40, headers={"Content-Type": "application/gzip"}
+ "get", url_fetch, TARBALL, headers={"Content-Type": "application/gzip"}
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
]
)
assert result.output == (
f"VAULT OK - cooking directory {DIR_ID} took "
f"10.00s and succeeded.\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 0, result.output
def test_vault_no_directory(requests_mock, mocker, mocked_time):
"""Tests with an empty storage"""
scenario = WebScenario()
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
mocker.patch(f"{__name__}.FakeStorage.directory_get_random", return_value=None)
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == ("VAULT CRITICAL - No directory exists in the archive.\n")
assert result.exit_code == 2, result.output
def test_vault_fetch_failed(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
"get",
url_fetch,
"",
status_code=500,
headers={"Content-Type": "application/gzip"},
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
f"VAULT CRITICAL - cooking directory {DIR_ID} took "
f"10.00s and succeeded, but fetch failed with status code 500.\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 2, result.output
-def test_vault_fetch_empty(requests_mock, mocker, mocked_time):
+def test_vault_fetch_missing_content_type(requests_mock, mocker, mocked_time):
+ scenario = WebScenario()
+
+ scenario.add_step("get", url_api, {}, status_code=404)
+ scenario.add_step("post", url_api, response_pending)
+ scenario.add_step("get", url_api, response_done)
+ scenario.add_step("get", url_fetch, "")
+
+ scenario.install_mock(requests_mock)
+
+ get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
+ get_storage_mock.side_effect = FakeStorage
+
+ result = invoke(
+ [
+ "check-vault",
+ "--swh-web-url",
+ "mock://swh-web.example.org",
+ "--swh-storage-url",
+ "foo://example.org",
+ "directory",
+ ],
+ catch_exceptions=True,
+ )
+
+ assert result.output == (
+ "VAULT CRITICAL - Unexpected Content-Type when downloading bundle: None\n"
+ "| 'total_time' = 10.00s\n"
+ )
+ assert result.exit_code == 2, result.output
+
+
+def test_vault_corrupt_tarball_gzip(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
+ scenario.add_step("get", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
- "get", url_fetch, "", headers={"Content-Type": "application/gzip"}
+ "get",
+ url_fetch,
+ b"this-is-not-a-tarball",
+ headers={"Content-Type": "application/gzip", "Content-Length": "100000"},
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
- f"VAULT CRITICAL - cooking directory {DIR_ID} took "
- f"10.00s and succeeded, but fetch was empty.\n"
- f"| 'total_time' = 10.00s\n"
+ "VAULT CRITICAL - Error while reading tarball: not a gzip file\n"
+ "| 'total_time' = 20.00s\n"
)
assert result.exit_code == 2, result.output
-def test_vault_fetch_missing_content_type(requests_mock, mocker, mocked_time):
+def test_vault_corrupt_tarball_member(requests_mock, mocker, mocked_time):
+ fd = io.BytesIO()
+ with tarfile.open(fileobj=fd, mode="w:gz") as tf:
+ tf.addfile(tarfile.TarInfo("wrong_dir_name/README"), b"this is a readme\n")
+ tarball = fd.getvalue()
+
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
+ scenario.add_step("get", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
- scenario.add_step("get", url_fetch, "")
+ scenario.add_step(
+ "get",
+ url_fetch,
+ tarball,
+ headers={"Content-Type": "application/gzip", "Content-Length": "100000"},
+ )
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
- "VAULT CRITICAL - Unexpected Content-Type when downloading bundle: None\n"
- "| 'total_time' = 10.00s\n"
+ "VAULT CRITICAL - Unexpected member in tarball: wrong_dir_name/README\n"
+ "| 'total_time' = 20.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_no_fetch_url(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_done_no_fetch)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
f"VAULT CRITICAL - cooking directory {DIR_ID} took 10.00s and succeeded, "
f"but API response did not contain a fetch_url.\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 2, result.output
diff --git a/swh/icinga_plugins/vault.py b/swh/icinga_plugins/vault.py
index fb1c1ad..3db33cc 100644
--- a/swh/icinga_plugins/vault.py
+++ b/swh/icinga_plugins/vault.py
@@ -1,145 +1,158 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import tarfile
import time
import requests
from swh.storage import get_storage
from .base_check import BaseCheck
class NoDirectory(Exception):
pass
class VaultCheck(BaseCheck):
TYPE = "VAULT"
DEFAULT_WARNING_THRESHOLD = 0
DEFAULT_CRITICAL_THRESHOLD = 3600
def __init__(self, obj):
super().__init__(obj)
self._swh_storage = get_storage("remote", url=obj["swh_storage_url"])
self._swh_web_url = obj["swh_web_url"]
self._poll_interval = obj["poll_interval"]
def _url_for_dir(self, dir_id):
return self._swh_web_url + f"/api/1/vault/directory/{dir_id.hex()}/"
def _pick_directory(self):
dir_ = self._swh_storage.directory_get_random()
if dir_ is None:
raise NoDirectory()
return dir_
def _pick_uncached_directory(self):
while True:
dir_id = self._pick_directory()
response = requests.get(self._url_for_dir(dir_id))
if response.status_code == 404:
return dir_id
def main(self):
try:
dir_id = self._pick_uncached_directory()
except NoDirectory:
self.print_result("CRITICAL", "No directory exists in the archive.")
return 2
start_time = time.time()
total_time = 0
response = requests.post(self._url_for_dir(dir_id))
assert response.status_code == 200, (response, response.text)
result = response.json()
while result["status"] in ("new", "pending"):
time.sleep(self._poll_interval)
response = requests.get(self._url_for_dir(dir_id))
assert response.status_code == 200, (response, response.text)
result = response.json()
total_time = time.time() - start_time
if total_time > self.critical_threshold:
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took more than "
f"{total_time:.2f}s and has status: "
f'{result["progress_message"]}',
total_time=total_time,
)
return 2
if result["status"] == "failed":
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
f'and failed with: {result["progress_message"]}',
total_time=total_time,
)
return 2
elif result["status"] != "done":
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
f'and resulted in unknown status: {result["status"]}',
total_time=total_time,
)
return 2
(status_code, status) = self.get_status(total_time)
if "fetch_url" not in result:
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
f"and succeeded, but API response did not contain a fetch_url.",
total_time=total_time,
)
return 2
with requests.get(result["fetch_url"], stream=True) as fetch_response:
try:
fetch_response.raise_for_status()
except requests.HTTPError:
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
f"and succeeded, but fetch failed with status code "
f"{fetch_response.status_code}.",
total_time=total_time,
)
return 2
content_type = fetch_response.headers.get("Content-Type")
if content_type != "application/gzip":
self.print_result(
"CRITICAL",
f"Unexpected Content-Type when downloading bundle: {content_type}",
total_time=total_time,
)
return 2
- response_length = 0
- for chunk in fetch_response.iter_content(decode_unicode=False):
- response_length += len(chunk)
-
- if response_length == 0:
- self.print_result(
- "CRITICAL",
- f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
- f"and succeeded, but fetch was empty.",
- total_time=total_time,
- )
- return 2
+ try:
+ with tarfile.open(fileobj=fetch_response.raw, mode="r:gz") as tf:
+ # Note that we are streaming the tarfile from the network,
+ # so we are allowed at most one pass on the tf object;
+ # and the sooner we close it the better.
+ # Fortunately, checking only the first member is good enough:
+ tarinfo = tf.next()
+ swhid = f"swh:1:dir:{dir_id.hex()}"
+ if tarinfo.name != swhid and not tarinfo.name.startswith(
+ f"{swhid}/"
+ ):
+ self.print_result(
+ "CRITICAL",
+ f"Unexpected member in tarball: {tarinfo.name}",
+ total_time=total_time,
+ )
+ return 2
+ except tarfile.ReadError as e:
+ self.print_result(
+ "CRITICAL",
+ f"Error while reading tarball: {e}",
+ total_time=total_time,
+ )
+ return 2
self.print_result(
status,
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
f"and succeeded.",
total_time=total_time,
)
return status_code

File Metadata

Mime Type
text/x-diff
Expires
Fri, Jul 4, 1:39 PM (4 d, 10 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3277102

Event Timeline