Changeset View
Changeset View
Standalone View
Standalone View
swh/icinga_plugins/deposit.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import sys | import sys | ||||
import time | import time | ||||
from typing import Any, Dict, Optional | from typing import Any, Dict, Optional | ||||
from swh.deposit.client import PublicApiDepositClient | from swh.deposit.client import PublicApiDepositClient | ||||
from .base_check import BaseCheck | from .base_check import BaseCheck | ||||
class DepositCheck(BaseCheck): | class DepositCheck(BaseCheck): | ||||
TYPE = "DEPOSIT" | TYPE = "DEPOSIT" | ||||
DEFAULT_WARNING_THRESHOLD = 120 | DEFAULT_WARNING_THRESHOLD = 120 | ||||
DEFAULT_CRITICAL_THRESHOLD = 3600 | DEFAULT_CRITICAL_THRESHOLD = 3600 | ||||
def __init__(self, obj): | def __init__(self, obj): | ||||
super().__init__(obj) | super().__init__(obj, application="deposit") | ||||
self._poll_interval = obj["poll_interval"] | self._poll_interval = obj["poll_interval"] | ||||
self._archive_path = obj["archive"] | self._archive_path = obj["archive"] | ||||
self._metadata_path = obj["metadata"] | self._metadata_path = obj["metadata"] | ||||
self._collection = obj["collection"] | self._collection = obj["collection"] | ||||
self._slug: Optional[str] = None | self._slug: Optional[str] = None | ||||
self._client = PublicApiDepositClient( | self._client = PublicApiDepositClient( | ||||
{ | { | ||||
"url": obj["server"], | "url": obj["server"], | ||||
"auth": {"username": obj["username"], "password": obj["password"]}, | "auth": {"username": obj["username"], "password": obj["password"]}, | ||||
} | } | ||||
) | ) | ||||
self.register_prometheus_gauge("duration", "seconds", ["step", "status"]) | |||||
self.register_prometheus_gauge("status", "") | |||||
def upload_deposit(self): | def upload_deposit(self): | ||||
slug = "check-deposit-%s" % datetime.datetime.now().isoformat() | slug = "check-deposit-%s" % datetime.datetime.now().isoformat() | ||||
result = self._client.deposit_create( | result = self._client.deposit_create( | ||||
archive=self._archive_path, | archive=self._archive_path, | ||||
metadata=self._metadata_path, | metadata=self._metadata_path, | ||||
collection=self._collection, | collection=self._collection, | ||||
in_progress=False, | in_progress=False, | ||||
slug=slug, | slug=slug, | ||||
Show All 31 Lines | def wait_while_status(self, statuses, start_time, metrics, result): | ||||
self.print_result( | self.print_result( | ||||
"CRITICAL", | "CRITICAL", | ||||
f"Timed out while in status " | f"Timed out while in status " | ||||
f'{result["deposit_status"]} ' | f'{result["deposit_status"]} ' | ||||
f'({metrics["total_time"]}s seconds since deposit ' | f'({metrics["total_time"]}s seconds since deposit ' | ||||
f"started)", | f"started)", | ||||
**metrics, | **metrics, | ||||
) | ) | ||||
self.collect_prometheus_metric( | |||||
"duration", | |||||
metrics["total_time"], | |||||
[result["deposit_status"], "timeout"], | |||||
) | |||||
self.collect_prometheus_metric( | |||||
"duration", metrics["total_time"], ["", "timeout"] | |||||
) | |||||
self.collect_prometheus_metric("status", 2) | |||||
sys.exit(2) | sys.exit(2) | ||||
time.sleep(self._poll_interval) | time.sleep(self._poll_interval) | ||||
result = self.get_deposit_status() | result = self.get_deposit_status() | ||||
return result | return result | ||||
Show All 12 Lines | def main(self): | ||||
# Check validation succeeded | # Check validation succeeded | ||||
if result["deposit_status"] == "rejected": | if result["deposit_status"] == "rejected": | ||||
self.print_result( | self.print_result( | ||||
"CRITICAL", | "CRITICAL", | ||||
f'Deposit was rejected: {result["deposit_status_detail"]}', | f'Deposit was rejected: {result["deposit_status_detail"]}', | ||||
**metrics, | **metrics, | ||||
) | ) | ||||
self.collect_prometheus_metric( | |||||
"duration", metrics["validation_time"], ["validation", "rejected"] | |||||
) | |||||
self.collect_prometheus_metric( | |||||
"duration", metrics["total_time"], ["validation", "rejected"] | |||||
) | |||||
self.collect_prometheus_metric("status", 2) | |||||
return 2 | return 2 | ||||
self.collect_prometheus_metric( | |||||
"duration", metrics["validation_time"], ["validation", "ok"] | |||||
) | |||||
# Wait for loading | # Wait for loading | ||||
result = self.wait_while_status( | result = self.wait_while_status( | ||||
["verified", "loading"], start_time, metrics, result | ["verified", "loading"], start_time, metrics, result | ||||
) | ) | ||||
metrics["total_time"] = time.time() - start_time | metrics["total_time"] = time.time() - start_time | ||||
metrics["load_time"] = ( | metrics["load_time"] = ( | ||||
metrics["total_time"] - metrics["upload_time"] - metrics["validation_time"] | metrics["total_time"] - metrics["upload_time"] - metrics["validation_time"] | ||||
) | ) | ||||
self.collect_prometheus_metric( | |||||
"duration", metrics["load_time"], ["loading", result["deposit_status"]] | |||||
) | |||||
# Check loading succeeded | # Check loading succeeded | ||||
if result["deposit_status"] == "failed": | if result["deposit_status"] == "failed": | ||||
self.print_result( | self.print_result( | ||||
"CRITICAL", | "CRITICAL", | ||||
f'Deposit loading failed: {result["deposit_status_detail"]}', | f'Deposit loading failed: {result["deposit_status_detail"]}', | ||||
**metrics, | **metrics, | ||||
) | ) | ||||
self.collect_prometheus_metric( | |||||
"duration", metrics["total_time"], ["total", "failed"] | |||||
) | |||||
self.collect_prometheus_metric("status", 2) | |||||
return 2 | return 2 | ||||
# Check for unexpected status | # Check for unexpected status | ||||
if result["deposit_status"] != "done": | if result["deposit_status"] != "done": | ||||
self.print_result( | self.print_result( | ||||
"CRITICAL", | "CRITICAL", | ||||
f'Deposit got unexpected status: {result["deposit_status"]} ' | f'Deposit got unexpected status: {result["deposit_status"]} ' | ||||
f'({result["deposit_status_detail"]})', | f'({result["deposit_status_detail"]})', | ||||
**metrics, | **metrics, | ||||
) | ) | ||||
self.collect_prometheus_metric( | |||||
"duration", metrics["total_time"], ["total", result["deposit_status"]] | |||||
) | |||||
self.collect_prometheus_metric("status", 2) | |||||
return 2 | return 2 | ||||
# Everything went fine, check total time wasn't too large and | # Everything went fine, check total time wasn't too large and | ||||
# print result | # print result | ||||
(status_code, status) = self.get_status(metrics["total_time"]) | (status_code, status) = self.get_status(metrics["total_time"]) | ||||
self.print_result( | self.print_result( | ||||
status, | status, | ||||
f'Deposit took {metrics["total_time"]:.2f}s and succeeded.', | f'Deposit took {metrics["total_time"]:.2f}s and succeeded.', | ||||
**metrics, | **metrics, | ||||
) | ) | ||||
if status_code != 0: # Stop if any problem in the initial scenario | if status_code != 0: # Stop if any problem in the initial scenario | ||||
ardumont: ? | |||||
Done Inline Actionstrue, good catch vsellier: true, good catch | |||||
self.collect_prometheus_metric("status", 2) | |||||
return status_code | return status_code | ||||
# Initial deposit is now completed, now we can update the deposit with metadata | # Initial deposit is now completed, now we can update the deposit with metadata | ||||
result = self.update_deposit_with_metadata() | result = self.update_deposit_with_metadata() | ||||
total_time = time.time() - start_time | total_time = time.time() - start_time | ||||
metrics_update = { | metrics_update = { | ||||
"total_time": total_time, | "total_time": total_time, | ||||
"update_time": ( | "update_time": ( | ||||
total_time | total_time | ||||
- metrics["upload_time"] | - metrics["upload_time"] | ||||
- metrics["validation_time"] | - metrics["validation_time"] | ||||
- metrics["load_time"] | - metrics["load_time"] | ||||
), | ), | ||||
} | } | ||||
if "error" in result: | if "error" in result: | ||||
self.print_result( | self.print_result( | ||||
"CRITICAL", | "CRITICAL", | ||||
f'Deposit Metadata update failed: {result["error"]} ', | f'Deposit Metadata update failed: {result["error"]} ', | ||||
**metrics_update, | **metrics_update, | ||||
) | ) | ||||
self.collect_prometheus_metric( | |||||
"duration", metrics["total_time"], ["total", "metadata_error"] | |||||
) | |||||
self.collect_prometheus_metric("status", 2) | |||||
return 2 | return 2 | ||||
(status_code, status) = self.get_status(metrics_update["total_time"]) | (status_code, status) = self.get_status(metrics_update["total_time"]) | ||||
self.print_result( | self.print_result( | ||||
status, | status, | ||||
f'Deposit Metadata update took {metrics_update["update_time"]:.2f}s ' | f'Deposit Metadata update took {metrics_update["update_time"]:.2f}s ' | ||||
"and succeeded.", | "and succeeded.", | ||||
**metrics_update, | **metrics_update, | ||||
) | ) | ||||
self.collect_prometheus_metric( | |||||
"duration", metrics["total_time"], ["total", "done"] | |||||
) | |||||
self.collect_prometheus_metric("status", status_code) | |||||
return status_code | return status_code |
?