Page MenuHomeSoftware Heritage

D2454.diff
No OneTemporary

D2454.diff

diff --git a/swh/icinga_plugins/base_check.py b/swh/icinga_plugins/base_check.py
new file mode 100644
--- /dev/null
+++ b/swh/icinga_plugins/base_check.py
@@ -0,0 +1,20 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+class BaseCheck:
+ def __init__(self, obj):
+ self.warning_threshold = obj.get(
+ '_warning_threshold', self.DEFAULT_WARNING_THRESHOLD)
+ self.critical_threshold = obj.get(
+ '_critical_threshold', self.DEFAULT_CRITICAL_THRESHOLD)
+
+ def get_status(self, value):
+ if self.critical_threshold and value >= self.critical_threshold:
+ return (2, 'CRITICAL')
+ elif self.warning_threshold and value >= self.warning_threshold:
+ return (1, 'WARNING')
+ else:
+ return (0, 'OK')
diff --git a/swh/icinga_plugins/cli.py b/swh/icinga_plugins/cli.py
--- a/swh/icinga_plugins/cli.py
+++ b/swh/icinga_plugins/cli.py
@@ -17,13 +17,19 @@
help='URL to an swh-storage HTTP API')
@click.option('--swh-web-url', type=str,
help='URL to an swh-web instance')
+@click.option('-w', '--warning', type=int,
+ help='Warning threshold.')
+@click.option('-c', '--critical', type=int,
+ help='Critical threshold.')
@click.pass_context
-def cli(ctx, swh_storage_url, swh_web_url):
+def cli(ctx, swh_storage_url, swh_web_url, warning, critical):
"""Main command for Icinga plugins
"""
ctx.ensure_object(dict)
ctx.obj['swh_storage_url'] = swh_storage_url
ctx.obj['swh_web_url'] = swh_web_url
+ ctx.obj['warning_threshold'] = warning
+ ctx.obj['critical_threshold'] = critical
@cli.group(name='check-vault')
diff --git a/swh/icinga_plugins/tests/test_vault.py b/swh/icinga_plugins/tests/test_vault.py
--- a/swh/icinga_plugins/tests/test_vault.py
+++ b/swh/icinga_plugins/tests/test_vault.py
@@ -6,6 +6,7 @@
import enum
import json
import re
+import time
from click.testing import CliRunner
@@ -107,6 +108,7 @@
r'[0-9]\.[0-9]{2}s and succeeded.\n'
r"| 'total time' = [0-9]\.[0-9]{2}s",
result.output)
+ assert result.exit_code == 0, result.output
sleep_mock.assert_called_once_with(10)
@@ -168,6 +170,7 @@
r'[0-9]\.[0-9]{2}s and succeeded.\n'
r"| 'total time' = [0-9]\.[0-9]{2}s",
result.output)
+ assert result.exit_code == 0, result.output
assert sleep_mock.call_count == 2
@@ -225,5 +228,74 @@
r'[0-9]\.[0-9]{2}s and failed with: foobar\n'
r"| 'total time' = [0-9]\.[0-9]{2}s",
result.output)
+ assert result.exit_code == 2, result.output
sleep_mock.assert_called_once_with(10)
+
+
+def test_vault_timeout(requests_mock, mocker):
+
+ class Step(enum.Enum):
+ NOTHING_DONE = 0
+ CHECKED_UNCOOKED = 1
+ REQUESTED_COOKING = 2
+ PENDING = 3
+
+ step = Step.NOTHING_DONE
+
+ def post_callback(request, context):
+ nonlocal step
+ if step == Step.CHECKED_UNCOOKED:
+ step = Step.REQUESTED_COOKING
+ return json.dumps(response_pending)
+ else:
+ assert False, step
+
+ def get_callback(request, context):
+ context.json = True
+ nonlocal step, time_offset
+ if step == Step.NOTHING_DONE:
+ context.status_code = 404
+ step = Step.CHECKED_UNCOOKED
+ elif step == Step.CHECKED_UNCOOKED:
+ assert False
+ elif step == Step.REQUESTED_COOKING:
+ step = Step.PENDING
+ return json.dumps(response_pending)
+ elif step == Step.PENDING:
+ time_offset += 4000 # jump forward in time more than 1h
+ return json.dumps(response_pending)
+ else:
+ assert False, step
+
+ requests_mock.get(
+ f'mock://swh-web.example.org/api/1/vault/directory/{dir_id}/',
+ text=get_callback)
+ requests_mock.post(
+ f'mock://swh-web.example.org/api/1/vault/directory/{dir_id}/',
+ text=post_callback)
+
+ get_storage_mock = mocker.patch('swh.icinga_plugins.vault.get_storage')
+ get_storage_mock.side_effect = FakeStorage
+
+ sleep_mock = mocker.patch('time.sleep')
+
+ real_time = time.time
+ time_offset = 0
+ mocker.patch(
+ 'time.time', side_effect=lambda: real_time() + time_offset)
+
+ result = invoke([
+ '--swh-web-url', 'mock://swh-web.example.org',
+ '--swh-storage-url', 'foo://example.org',
+ 'check-vault', 'directory',
+ ], catch_exceptions=True)
+
+ assert re.match(
+ rf'VAULT CRITICAL - cooking directory {dir_id} took more than '
+ r'[0-9]+\.[0-9]{2}s and has status: foo\n'
+ r"| 'total time' = [0-9]\.[0-9]{2}s",
+ result.output)
+ assert result.exit_code == 2, result.output
+
+ assert sleep_mock.call_count == 2
diff --git a/swh/icinga_plugins/vault.py b/swh/icinga_plugins/vault.py
--- a/swh/icinga_plugins/vault.py
+++ b/swh/icinga_plugins/vault.py
@@ -9,13 +9,19 @@
from swh.storage import get_storage
+from .base_check import BaseCheck
+
class NoDirectory(Exception):
pass
-class VaultCheck:
+class VaultCheck(BaseCheck):
+ DEFAULT_WARNING_THRESHOLD = 0
+ DEFAULT_CRITICAL_THRESHOLD = 3600
+
def __init__(self, obj):
+ super().__init__(obj)
self._swh_storage = get_storage('remote', url=obj['swh_storage_url'])
self._swh_web_url = obj['swh_web_url']
self._poll_interval = obj['poll_interval']
@@ -44,6 +50,7 @@
return 2
start_time = time.time()
+ total_time = 0
response = requests.post(self._url_for_dir(dir_id))
assert response.status_code == 200, (response, response.text)
result = response.json()
@@ -53,23 +60,30 @@
assert response.status_code == 200, (response, response.text)
result = response.json()
- end_time = time.time()
- total_time = end_time - start_time
+ total_time = time.time() - start_time
+
+ if total_time > self.critical_threshold:
+ print(f'VAULT CRITICAL - cooking directory {dir_id.hex()} '
+ f'took more than {total_time:.2f}s and has status: '
+ f'{result["progress_message"]}')
+ print(f"| 'total time' = {total_time:.2f}s")
+ return 2
if result['status'] == 'done':
- print(f'VAULT OK - cooking directory {dir_id.hex()} '
+ (status_code, status) = self.get_status(total_time)
+ print(f'VAULT {status} - cooking directory {dir_id.hex()} '
f'took {total_time:.2f}s and succeeded.')
print(f"| 'total time' = {total_time:.2f}s")
- return 0
+ return status_code
elif result['status'] == 'failed':
print(f'VAULT CRITICAL - cooking directory {dir_id.hex()} '
f'took {total_time:.2f}s and failed with: '
f'{result["progress_message"]}')
print(f"| 'total time' = {total_time:.2f}s")
- return 3
+ return 2
else:
print(f'VAULT CRITICAL - cooking directory {dir_id.hex()} '
f'took {total_time:.2f}s and resulted in unknown: '
f'status: {result["status"]}')
print(f"| 'total time' = {total_time:.2f}s")
- return 3
+ return 2

File Metadata

Mime Type
text/plain
Expires
Fri, Jun 20, 5:52 PM (2 w, 13 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216051

Event Timeline