Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9311989
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
92 KB
Subscribers
None
View Options
diff --git a/mypy.ini b/mypy.ini
index 2b77ba2..f8f07e3 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -1,18 +1,22 @@
[mypy]
namespace_packages = True
warn_unused_ignores = True
# 3rd party libraries without stubs (yet)
[mypy-pkg_resources.*]
ignore_missing_imports = True
[mypy-pytest.*]
ignore_missing_imports = True
[mypy-requests_mock.*]
ignore_missing_imports = True
+[mypy-prometheus_client.*]
+ignore_missing_imports = True
+
+
# [mypy-add_your_lib_here.*]
# ignore_missing_imports = True
diff --git a/requirements.txt b/requirements.txt
index ced35f8..516e676 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
click
psycopg2
requests
+prometheus-client
+typing
diff --git a/swh/icinga_plugins/base_check.py b/swh/icinga_plugins/base_check.py
index 7110a9e..08be439 100644
--- a/swh/icinga_plugins/base_check.py
+++ b/swh/icinga_plugins/base_check.py
@@ -1,32 +1,119 @@
# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import atexit
+from typing import Any, Dict, List
-from typing import Dict
+from prometheus_client import CollectorRegistry, Gauge, Summary, write_to_textfile
class BaseCheck:
DEFAULT_WARNING_THRESHOLD = 60
DEFAULT_CRITICAL_THRESHOLD = 120
+ PROMETHEUS_METRICS_BASENAME = "swh_e2e_"
- def __init__(self, obj: Dict[str, str]) -> None:
+ def __init__(self, obj: Dict[str, str], application: str):
self.warning_threshold = float(
obj.get("warning_threshold", self.DEFAULT_WARNING_THRESHOLD)
)
self.critical_threshold = float(
obj.get("critical_threshold", self.DEFAULT_CRITICAL_THRESHOLD)
)
+ self.prometheus_enabled = obj.get("prometheus_enabled")
+ self.prometheus_exporter_directory = obj.get("prometheus_exporter_directory")
+ self.environment = obj.get("environment")
+ self.application = application
+
+ # A new registry is created to not export the default process metrics
+ self.registry = CollectorRegistry()
+
+ self.prometheus_metrics: Dict[str, Any] = {}
+
+ atexit.register(self.save_prometheus_metrics)
def get_status(self, value):
if self.critical_threshold and value >= self.critical_threshold:
return (2, "CRITICAL")
elif self.warning_threshold and value >= self.warning_threshold:
return (1, "WARNING")
else:
return (0, "OK")
def print_result(self, status_type, status_string, **metrics):
print(f"{self.TYPE} {status_type} - {status_string}")
for (metric_name, metric_value) in sorted(metrics.items()):
print(f"| '{metric_name}' = {metric_value:.2f}s")
+
+ def collect_prometheus_metric(
+ self, name: str, value: float, labels: List[str] = []
+ ):
+ g = self.prometheus_metrics.get(self.PROMETHEUS_METRICS_BASENAME + name)
+
+ if g is None:
+ raise ValueError(f"No metric {name} found")
+
+ g.labels(*self._get_label_values(labels)).set(value)
+
+ def _get_label_values(self, labels: List[str]) -> List[str]:
+ label_list = []
+
+ if self.environment:
+ label_list.append(self.environment)
+
+ if self.application is None:
+ raise ValueError("Application name must be specified")
+ label_list.append(self.application)
+
+ return label_list + labels
+
+ def _get_label_names(self, values: List[str] = []) -> List[str]:
+ full_list = []
+
+ if self.environment:
+ full_list.append(self.environment)
+ full_list.append("application")
+
+ full_list += values
+
+ return full_list
+
+ def register_prometheus_summary(
+ self, name: str, unit: str, labels: List[str] = []
+ ) -> None:
+ full_name = self.PROMETHEUS_METRICS_BASENAME + name
+
+ self.prometheus_metrics[full_name] = Summary(
+ full_name,
+ "",
+ registry=self.registry,
+ unit=unit,
+ labelnames=self._get_label_names(labels),
+ )
+
+ def register_prometheus_gauge(
+ self, name: str, unit: str, labels: List[str] = []
+ ) -> None:
+ full_name = self.PROMETHEUS_METRICS_BASENAME + name
+
+ self.prometheus_metrics[full_name] = Gauge(
+ name=full_name,
+ documentation="",
+ registry=self.registry,
+ unit=unit,
+ labelnames=self._get_label_names(labels),
+ )
+
+ def save_prometheus_metrics(self) -> None:
+ """Dump on disk the .prom file containing the
+ metrics collected during the check execution.
+
+ It's a callback method triggered by the atexit
+ declared in the constructor."""
+ if self.prometheus_enabled:
+ assert self.prometheus_exporter_directory is not None
+
+ filename = (
+ self.prometheus_exporter_directory + "/" + self.application + ".prom"
+ )
+ write_to_textfile(filename, self.registry)
diff --git a/swh/icinga_plugins/cli.py b/swh/icinga_plugins/cli.py
index fd920c0..9ee8bd0 100644
--- a/swh/icinga_plugins/cli.py
+++ b/swh/icinga_plugins/cli.py
@@ -1,146 +1,164 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
# WARNING: do not import unnecessary things here to keep cli startup time under
# control
import sys
import click
from swh.core.cli import CONTEXT_SETTINGS
from swh.core.cli import swh as swh_cli_group
@swh_cli_group.group(name="icinga_plugins", context_settings=CONTEXT_SETTINGS)
@click.option("-w", "--warning", type=int, help="Warning threshold.")
@click.option("-c", "--critical", type=int, help="Critical threshold.")
+@click.option("--prometheus-exporter/--no-prometheus-exporter", default=False)
+@click.option(
+ "--prometheus-exporter-directory",
+ type=str,
+ default="/var/lib/prometheus/node-exporter",
+)
+@click.option("--environment", type=str, help="The tested environment")
@click.pass_context
-def icinga_cli_group(ctx, warning, critical):
+def icinga_cli_group(
+ ctx,
+ warning,
+ critical,
+ prometheus_exporter: bool,
+ prometheus_exporter_directory: str,
+ environment: str,
+):
"""Main command for Icinga plugins"""
ctx.ensure_object(dict)
if warning:
ctx.obj["warning_threshold"] = int(warning)
if critical:
ctx.obj["critical_threshold"] = int(critical)
+ ctx.obj["prometheus_enabled"] = prometheus_exporter
+ ctx.obj["prometheus_exporter_directory"] = prometheus_exporter_directory
+ ctx.obj["environment"] = environment
+
@icinga_cli_group.group(name="check-vault")
@click.option(
"--swh-storage-url", type=str, required=True, help="URL to an swh-storage HTTP API"
)
@click.option(
"--swh-web-url", type=str, required=True, help="URL to an swh-web instance"
)
@click.option(
"--poll-interval",
type=int,
default=10,
help="Interval (in seconds) between two polls to the API, "
"to check for cooking status.",
)
@click.pass_context
def check_vault(ctx, **kwargs):
ctx.obj.update(kwargs)
@check_vault.command(name="directory")
@click.pass_context
def check_vault_directory(ctx):
"""Picks a random directory, requests its cooking via swh-web,
and waits for completion."""
from .vault import VaultCheck
sys.exit(VaultCheck(ctx.obj).main())
@icinga_cli_group.group(name="check-savecodenow")
@click.option(
"--swh-web-url", type=str, required=True, help="URL to an swh-web instance"
)
@click.option(
"--poll-interval",
type=int,
default=10,
help="Interval (in seconds) between two polls to the API, "
"to check for save code now status.",
)
@click.pass_context
def check_scn(ctx, **kwargs):
ctx.obj.update(kwargs)
@check_scn.command(name="origin")
@click.argument("origin", type=str)
@click.option("--visit-type", type=str, required=True, help="Visit type for origin")
@click.pass_context
def check_scn_origin(ctx, origin, visit_type):
"""Requests a save code now via the api for a given origin with type visit_type, waits
for its completion, report approximate time of completion (failed or succeeded) and
warn if threshold exceeded.
"""
from .save_code_now import SaveCodeNowCheck
sys.exit(SaveCodeNowCheck(ctx.obj, origin, visit_type).main())
@icinga_cli_group.group(name="check-deposit")
@click.option(
"--server",
type=str,
default="https://deposit.softwareheritage.org/1",
help="URL to the SWORD server to test",
)
@click.option(
"--provider-url",
type=str,
required=True,
help=(
"Root URL of the deposit client, as defined in the "
"'deposit_client.provider_url' column in the deposit DB"
),
)
@click.option("--username", type=str, required=True, help="Login for the SWORD server")
@click.option(
"--password", type=str, required=True, help="Password for the SWORD server"
)
@click.option(
"--collection",
type=str,
required=True,
help="Software collection to use on the SWORD server",
)
@click.option(
"--poll-interval",
type=int,
default=10,
help="Interval (in seconds) between two polls to the API, "
"to check for ingestion status.",
)
@click.option(
"--swh-web-url", type=str, required=True, help="URL to an swh-web instance"
)
@click.pass_context
def check_deposit(ctx, **kwargs):
ctx.obj.update(kwargs)
@check_deposit.command(name="single")
@click.option(
"--archive", type=click.Path(), required=True, help="Software artefact to upload"
)
@click.option(
"--metadata",
type=click.Path(),
required=True,
help="Metadata file for the software artefact.",
)
@click.pass_context
def check_deposit_single(ctx, **kwargs):
"""Checks the provided archive and metadata file and be deposited."""
from .deposit import DepositCheck
ctx.obj.update(kwargs)
sys.exit(DepositCheck(ctx.obj).main())
diff --git a/swh/icinga_plugins/deposit.py b/swh/icinga_plugins/deposit.py
index db680af..863c4a9 100644
--- a/swh/icinga_plugins/deposit.py
+++ b/swh/icinga_plugins/deposit.py
@@ -1,262 +1,307 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
import sys
import time
from typing import Any, Dict, Optional
import requests
from swh.deposit.client import PublicApiDepositClient
from .base_check import BaseCheck
class DepositCheck(BaseCheck):
TYPE = "DEPOSIT"
DEFAULT_WARNING_THRESHOLD = 120
DEFAULT_CRITICAL_THRESHOLD = 3600
def __init__(self, obj):
- super().__init__(obj)
+ super().__init__(obj, application="deposit")
self.api_url = obj["swh_web_url"].rstrip("/")
self._poll_interval = obj["poll_interval"]
self._archive_path = obj["archive"]
self._metadata_path = obj["metadata"]
self._collection = obj["collection"]
self._slug: Optional[str] = None
self._provider_url = obj["provider_url"]
self._client = PublicApiDepositClient(
{
"url": obj["server"],
"auth": {"username": obj["username"], "password": obj["password"]},
}
)
+ self.register_prometheus_gauge("duration", "seconds", ["step", "status"])
+ self.register_prometheus_gauge("status", "")
+
def upload_deposit(self):
slug = (
"check-deposit-%s"
% datetime.datetime.fromtimestamp(time.time()).isoformat()
)
result = self._client.deposit_create(
archive=self._archive_path,
metadata=self._metadata_path,
collection=self._collection,
in_progress=False,
slug=slug,
)
self._slug = slug
self._deposit_id = result["deposit_id"]
return result
def update_deposit_with_metadata(self) -> Dict[str, Any]:
"""Trigger a metadata update on the deposit once it's completed."""
deposit = self.get_deposit_status()
swhid = deposit["deposit_swh_id"]
assert deposit["deposit_id"] == self._deposit_id
# We can reuse the initial metadata file we already sent
return self._client.deposit_update(
self._collection,
self._deposit_id,
self._slug,
metadata=self._metadata_path,
swhid=swhid,
)
def get_deposit_status(self):
return self._client.deposit_status(
collection=self._collection, deposit_id=self._deposit_id
)
def wait_while_status(self, statuses, start_time, metrics, result):
while result["deposit_status"] in statuses:
metrics["total_time"] = time.time() - start_time
if metrics["total_time"] > self.critical_threshold:
self.print_result(
"CRITICAL",
f"Timed out while in status "
f'{result["deposit_status"]} '
f'({metrics["total_time"]}s seconds since deposit '
f"started)",
**metrics,
)
+
+ self.collect_prometheus_metric(
+ "duration",
+ metrics["total_time"],
+ [result["deposit_status"], "timeout"],
+ )
+ self.collect_prometheus_metric(
+ "duration", metrics["total_time"], ["", "timeout"]
+ )
+ self.collect_prometheus_metric("status", 2)
+
sys.exit(2)
time.sleep(self._poll_interval)
result = self.get_deposit_status()
return result
def main(self):
start_time = time.time()
start_datetime = datetime.datetime.fromtimestamp(
start_time, tz=datetime.timezone.utc
)
metrics = {}
# Upload the archive and metadata
result = self.upload_deposit()
metrics["upload_time"] = time.time() - start_time
# Wait for validation
result = self.wait_while_status(["deposited"], start_time, metrics, result)
metrics["total_time"] = time.time() - start_time
metrics["validation_time"] = metrics["total_time"] - metrics["upload_time"]
# Check validation succeeded
if result["deposit_status"] == "rejected":
self.print_result(
"CRITICAL",
f'Deposit was rejected: {result["deposit_status_detail"]}',
**metrics,
)
+ self.collect_prometheus_metric(
+ "duration", metrics["validation_time"], ["validation", "rejected"]
+ )
+ self.collect_prometheus_metric(
+ "duration", metrics["total_time"], ["validation", "rejected"]
+ )
+ self.collect_prometheus_metric("status", 2)
return 2
+ self.collect_prometheus_metric(
+ "duration", metrics["validation_time"], ["validation", "ok"]
+ )
# Wait for loading
result = self.wait_while_status(
["verified", "loading"], start_time, metrics, result
)
metrics["total_time"] = time.time() - start_time
metrics["load_time"] = (
metrics["total_time"] - metrics["upload_time"] - metrics["validation_time"]
)
+ self.collect_prometheus_metric(
+ "duration", metrics["load_time"], ["loading", result["deposit_status"]]
+ )
# Check loading succeeded
if result["deposit_status"] == "failed":
self.print_result(
"CRITICAL",
f'Deposit loading failed: {result["deposit_status_detail"]}',
**metrics,
)
+ self.collect_prometheus_metric(
+ "duration", metrics["total_time"], ["total", "failed"]
+ )
+ self.collect_prometheus_metric("status", 2)
return 2
# Check for unexpected status
if result["deposit_status"] != "done":
self.print_result(
"CRITICAL",
f'Deposit got unexpected status: {result["deposit_status"]} '
f'({result["deposit_status_detail"]})',
**metrics,
)
+ self.collect_prometheus_metric(
+ "duration", metrics["total_time"], ["total", result["deposit_status"]]
+ )
+ self.collect_prometheus_metric("status", 2)
return 2
# Get the SWHID
if "deposit_swh_id" not in result:
# if the deposit succeeded immediately (which is rare), it does not
# contain the SWHID, so we need to re-fetch its status.
result = self.get_deposit_status()
if result.get("deposit_swh_id") is None:
self.print_result(
"CRITICAL",
f"'deposit_swh_id' missing from result: {result!r}",
**metrics,
)
return 2
swhid = result["deposit_swh_id"]
# Check for unexpected status
if result["deposit_status"] != "done":
self.print_result(
"CRITICAL",
f'Deposit status went from "done" to: {result["deposit_status"]} '
f'({result["deposit_status_detail"]})',
**metrics,
)
return 2
# Get metadata list from swh-web
response = requests.get(
f"{self.api_url}/api/1/raw-extrinsic-metadata/swhid/{swhid}/",
params={
"authority": f"deposit_client {self._provider_url}",
"after": start_datetime.isoformat(),
},
)
if response.status_code != 200:
self.print_result(
"CRITICAL",
f"Getting the list of metadata returned code {response.status_code}: "
f"{response.content!r}",
**metrics,
)
return 2
metadata_objects = response.json()
expected_origin = f"{self._provider_url}/{self._slug}"
# Filter out objects that were clearly not created by this deposit
relevant_metadata_objects = [
d for d in metadata_objects if d.get("origin") == expected_origin
]
if not relevant_metadata_objects:
self.print_result(
"CRITICAL",
f"No recent metadata on {swhid} with origin {expected_origin} in: "
f"{metadata_objects!r}",
**metrics,
)
return 2
# Check the metadata was loaded as-is
metadata_url = relevant_metadata_objects[0]["metadata_url"]
metadata_file = requests.get(metadata_url).content
with open(self._metadata_path, "rb") as fd:
expected_metadata_file = fd.read()
if metadata_file != expected_metadata_file:
self.print_result(
"CRITICAL",
f"Metadata on {swhid} with origin {expected_origin} "
f"(at {metadata_url}) differs from uploaded Atom document "
f"(at {self._metadata_path})",
**metrics,
)
return 2
# Everything went fine, check total time wasn't too large and
# print result
(status_code, status) = self.get_status(metrics["total_time"])
self.print_result(
status,
f'Deposit took {metrics["total_time"]:.2f}s and succeeded.',
**metrics,
)
if status_code != 0: # Stop if any problem in the initial scenario
+ self.collect_prometheus_metric("status", status_code)
return status_code
# Initial deposit is now completed, now we can update the deposit with metadata
result = self.update_deposit_with_metadata()
total_time = time.time() - start_time
metrics_update = {
"total_time": total_time,
"update_time": (
total_time
- metrics["upload_time"]
- metrics["validation_time"]
- metrics["load_time"]
),
}
if "error" in result:
self.print_result(
"CRITICAL",
f'Deposit Metadata update failed: {result["error"]} ',
**metrics_update,
)
+ self.collect_prometheus_metric(
+ "duration", metrics["total_time"], ["total", "metadata_error"]
+ )
+ self.collect_prometheus_metric("status", 2)
return 2
(status_code, status) = self.get_status(metrics_update["total_time"])
self.print_result(
status,
f'Deposit Metadata update took {metrics_update["update_time"]:.2f}s '
"and succeeded.",
**metrics_update,
)
+
+ self.collect_prometheus_metric(
+ "duration", metrics["total_time"], ["total", "done"]
+ )
+ self.collect_prometheus_metric("status", status_code)
return status_code
diff --git a/swh/icinga_plugins/save_code_now.py b/swh/icinga_plugins/save_code_now.py
index 131c080..1922225 100644
--- a/swh/icinga_plugins/save_code_now.py
+++ b/swh/icinga_plugins/save_code_now.py
@@ -1,113 +1,124 @@
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import time
from typing import Dict, List
import requests
from .base_check import BaseCheck
REPORT_MSG = "Save code now request for origin"
WAITING_STATUSES = ("not yet scheduled", "running", "scheduled")
class SaveCodeNowCheck(BaseCheck):
TYPE = "SAVECODENOW"
DEFAULT_WARNING_THRESHOLD = 60
DEFAULT_CRITICAL_THRESHOLD = 120
def __init__(self, obj: Dict, origin: str, visit_type: str) -> None:
- super().__init__(obj)
+ super().__init__(obj, application="scn")
self.api_url = obj["swh_web_url"].rstrip("/")
self.poll_interval = obj["poll_interval"]
self.origin = origin
self.visit_type = visit_type
+ self.register_prometheus_gauge("duration", "seconds", ["status"])
+ self.register_prometheus_gauge("status", "")
+
@staticmethod
def api_url_scn(root_api_url: str, origin: str, visit_type: str) -> str:
"""Compute the save code now api url for a given origin"""
return f"{root_api_url}/api/1/origin/save/{visit_type}/url/{origin}/"
def main(self) -> int:
"""Scenario description:
1. Requests a save code now request via the api for origin self.origin with type
self.visit_type.
2. Polling regularly at self.poll_interval seconds the completion status.
3. When either succeeded, failed or threshold exceeded, report approximate time
of completion. This will warn if thresholds are exceeded.
"""
start_time: float = time.time()
total_time: float = 0.0
scn_url = self.api_url_scn(self.api_url, self.origin, self.visit_type)
response = requests.post(scn_url)
assert response.status_code == 200, (response, response.text)
result: Dict = response.json()
status_key = "save_task_status"
request_date = result["save_request_date"]
origin_info = (self.visit_type, self.origin)
while result[status_key] in WAITING_STATUSES:
time.sleep(self.poll_interval)
response = requests.get(scn_url)
assert (
response.status_code == 200
), f"Unexpected response: {response}, {response.text}"
raw_result: List[Dict] = response.json()
assert len(raw_result) > 0, f"Unexpected result: {raw_result}"
if len(raw_result) > 1:
# retrieve only the one status result we are interested in
result = next(
filter(lambda r: r["save_request_date"] == request_date, raw_result)
)
else:
result = raw_result[0]
# this because the api can return multiple entries for the same origin
assert result["save_request_date"] == request_date
total_time = time.time() - start_time
if total_time > self.critical_threshold:
self.print_result(
"CRITICAL",
f"{REPORT_MSG} {origin_info} took more than {total_time:.2f}s "
f'and has status: {result["save_task_status"]}.',
total_time=total_time,
)
+ self.collect_prometheus_metric("duration", total_time, ["timeout"])
+ self.collect_prometheus_metric("status", 2)
return 2
if result[status_key] == "succeeded":
(status_code, status) = self.get_status(total_time)
self.print_result(
status,
f"{REPORT_MSG} {origin_info} took {total_time:.2f}s and succeeded.",
total_time=total_time,
)
+ self.collect_prometheus_metric("duration", total_time, ["succeeded"])
+ self.collect_prometheus_metric("status", status_code)
return status_code
elif result[status_key] == "failed":
self.print_result(
"CRITICAL",
f"{REPORT_MSG} {origin_info} took {total_time:.2f}s and failed.",
total_time=total_time,
)
+ self.collect_prometheus_metric("duration", total_time, ["failed"])
+ self.collect_prometheus_metric("status", 2)
return 2
else:
self.print_result(
"CRITICAL",
f"{REPORT_MSG} {origin_info} took {total_time:.2f}s "
"and resulted in unsupported status: "
f"{result['save_request_status']} ; {result[status_key]}.",
total_time=total_time,
)
+ self.collect_prometheus_metric("duration", total_time, ["failed"])
+ self.collect_prometheus_metric("status", 2)
return 2
diff --git a/swh/icinga_plugins/tests/test_base_check.py b/swh/icinga_plugins/tests/test_base_check.py
new file mode 100644
index 0000000..fe25dd3
--- /dev/null
+++ b/swh/icinga_plugins/tests/test_base_check.py
@@ -0,0 +1,57 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pytest
+
+from swh.icinga_plugins.base_check import BaseCheck
+
+
+def test_inexistent_metric():
+ base_check = BaseCheck({}, "test")
+
+ with pytest.raises(ValueError, match="No metric unknown found"):
+ base_check.collect_prometheus_metric("unknown", 10, [])
+
+
+def test_environment():
+ base_check = BaseCheck({"environment": "pytest"}, "test")
+
+ with pytest.raises(ValueError, match="No metric unknown found"):
+ base_check.collect_prometheus_metric("unknown", 10, [])
+
+
+def test_application_not_defined():
+ base_check = BaseCheck({"environment": "pytest"}, "test")
+ base_check.register_prometheus_gauge("gauge", "seconds")
+ base_check.application = None
+
+ with pytest.raises(ValueError, match="Application name must be specified"):
+ base_check.collect_prometheus_metric("gauge", 10, [])
+
+
+def test_save_without_directory(tmpdir):
+ config = {
+ "prometheus_enabled": True,
+ }
+
+ base_check = BaseCheck(config, "test")
+
+ with pytest.raises(AssertionError):
+ base_check.save_prometheus_metrics()
+
+
+def test_save(tmpdir):
+ application = "my_application"
+ config = {
+ "prometheus_enabled": True,
+ "prometheus_exporter_directory": tmpdir.strpath,
+ }
+
+ base_check = BaseCheck(config, application)
+ base_check.register_prometheus_gauge("gauge", "count")
+ base_check.collect_prometheus_metric("gauge", 10)
+ base_check.save_prometheus_metrics()
+
+ assert f"{tmpdir.strpath}/{application}.prom" in tmpdir.listdir()
diff --git a/swh/icinga_plugins/tests/test_deposit.py b/swh/icinga_plugins/tests/test_deposit.py
index 64404f2..18c472f 100644
--- a/swh/icinga_plugins/tests/test_deposit.py
+++ b/swh/icinga_plugins/tests/test_deposit.py
@@ -1,991 +1,1018 @@
# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
import io
import os
import tarfile
import time
from typing import Optional
import pytest
from swh.icinga_plugins.tests.utils import invoke
from .web_scenario import WebScenario
POLL_INTERVAL = 10
BASE_URL = "http://swh-deposit.example.org/1"
BASE_WEB_URL = "http+mock://swh-web.example.org"
PROVIDER_URL = "http://icinga-checker.example.org"
COMMON_OPTIONS = [
"--server",
BASE_URL,
"--username",
"test",
"--password",
"test",
"--collection",
"testcol",
"--swh-web-url",
BASE_WEB_URL,
"--provider-url",
PROVIDER_URL,
]
SAMPLE_METADATA = """
<?xml version="1.0" encoding="utf-8"?>
<entry xmlns="http://www.w3.org/2005/Atom"
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0">
<title>Test Software</title>
<client>swh</client>
<external_identifier>test-software</external_identifier>
<codemeta:author>
<codemeta:name>No One</codemeta:name>
</codemeta:author>
</entry>
"""
ENTRY_TEMPLATE = """
<entry xmlns="http://www.w3.org/2005/Atom"
xmlns:sword="http://purl.org/net/sword/"
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit"
xmlns:dcterms="http://purl.org/dc/terms/">
<swh:deposit_id>42</swh:deposit_id>
<swh:deposit_date>2019-12-19 18:11:00</swh:deposit_date>
<swh:deposit_archive>foo.tar.gz</swh:deposit_archive>
<swh:deposit_status>{status}</swh:deposit_status>
<sword:packaging>http://purl.org/net/sword/package/SimpleZip</sword:packaging>
</entry>
"""
STATUS_TEMPLATE = """
<entry xmlns="http://www.w3.org/2005/Atom"
xmlns:sword="http://purl.org/net/sword/"
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit"
xmlns:dcterms="http://purl.org/dc/terms/">
<swh:deposit_id>42</swh:deposit_id>
<swh:deposit_status>{status}</swh:deposit_status>
<swh:deposit_status_detail>{status_detail}</swh:deposit_status_detail>%s
</entry>
"""
def compute_origin():
# This is the same origin the checker would compute, because we mock time.time
# to be constant until time.sleep is called
return (
PROVIDER_URL
+ "/check-deposit-%s" % datetime.datetime.fromtimestamp(time.time()).isoformat()
)
def status_template(
status: str, status_detail: str = "", swhid: Optional[str] = None
) -> str:
"""Generate a proper status template out of status, status_detail and optional swhid"""
if swhid is not None:
template = (
STATUS_TEMPLATE % f"\n <swh:deposit_swh_id>{swhid}</swh:deposit_swh_id>"
)
return template.format(status=status, status_detail=status_detail, swhid=swhid)
template = STATUS_TEMPLATE % ""
return template.format(status=status, status_detail=status_detail)
def test_status_template():
actual_status = status_template(status="deposited")
assert (
actual_status
== """
<entry xmlns="http://www.w3.org/2005/Atom"
xmlns:sword="http://purl.org/net/sword/"
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit"
xmlns:dcterms="http://purl.org/dc/terms/">
<swh:deposit_id>42</swh:deposit_id>
<swh:deposit_status>deposited</swh:deposit_status>
<swh:deposit_status_detail></swh:deposit_status_detail>
</entry>
"""
)
actual_status = status_template(status="verified", status_detail="detail")
assert (
actual_status
== """
<entry xmlns="http://www.w3.org/2005/Atom"
xmlns:sword="http://purl.org/net/sword/"
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit"
xmlns:dcterms="http://purl.org/dc/terms/">
<swh:deposit_id>42</swh:deposit_id>
<swh:deposit_status>verified</swh:deposit_status>
<swh:deposit_status_detail>detail</swh:deposit_status_detail>
</entry>
"""
)
actual_status = status_template(
status="done", swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
)
assert (
actual_status
== """
<entry xmlns="http://www.w3.org/2005/Atom"
xmlns:sword="http://purl.org/net/sword/"
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit"
xmlns:dcterms="http://purl.org/dc/terms/">
<swh:deposit_id>42</swh:deposit_id>
<swh:deposit_status>done</swh:deposit_status>
<swh:deposit_status_detail></swh:deposit_status_detail>
<swh:deposit_swh_id>swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74</swh:deposit_swh_id>
</entry>
"""
)
@pytest.fixture(scope="session")
def tmp_path(tmp_path_factory):
return tmp_path_factory.mktemp(__name__)
@pytest.fixture(scope="session")
def sample_metadata(tmp_path):
"""Returns a sample metadata file's path"""
path = os.path.join(tmp_path, "metadata.xml")
with open(path, "w") as fd:
fd.write(SAMPLE_METADATA)
return path
@pytest.fixture(scope="session")
def sample_archive(tmp_path):
"""Returns a sample archive's path"""
path = os.path.join(tmp_path, "archive.tar.gz")
with tarfile.open(path, "w:gz") as tf:
tf.addfile(tarfile.TarInfo("hello.py"), io.BytesIO(b'print("Hello world")'))
return path
def test_deposit_immediate_success(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
"""Both deposit creation and deposit metadata update passed without delays"""
origin = compute_origin()
scenario = WebScenario()
status_xml = status_template(
status="done",
status_detail="",
swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74",
)
# Initial deposit
scenario.add_step(
"post",
f"{BASE_URL}/testcol/",
ENTRY_TEMPLATE.format(status="done"),
)
# Checker gets the SWHID
swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
status_xml = status_template(
status="done",
status_detail="",
swhid=swhid,
)
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
# Then the checker checks the metadata appeared on the website
scenario.add_step(
"get",
f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
f"?authority=deposit_client+http%3A%2F%2Ficinga-checker.example.org"
f"&after=2022-03-04T17%3A02%3A39%2B00%3A00",
[
{
"swhid": swhid,
"origin": origin,
"discovery_date": "2999-03-03T10:48:47+00:00",
"metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
}
],
)
scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA)
# Then metadata update
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
# internal deposit client does call status, then update metadata then status api
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_xml,
)
scenario.add_step(
"put",
f"{BASE_URL}/testcol/42/atom/",
status_xml,
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_xml,
)
scenario.install_mock(requests_mock)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
]
)
assert result.output == (
"DEPOSIT OK - Deposit took 0.00s and succeeded.\n"
"| 'load_time' = 0.00s\n"
"| 'total_time' = 0.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 0.00s\n"
"DEPOSIT OK - Deposit Metadata update took 0.00s and succeeded.\n"
"| 'total_time' = 0.00s\n"
"| 'update_time' = 0.00s\n"
)
assert result.exit_code == 0, f"Unexpected output: {result.output}"
def test_deposit_delays(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
"""Deposit creation passed with some delays, deposit metadata update passed without
delay
"""
origin = compute_origin()
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="verified"),
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="loading"),
)
# Deposit done, checker gets the SWHID
swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
status_xml = status_template(
status="done",
status_detail="",
swhid=swhid,
)
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
# Then the checker checks the metadata appeared on the website
scenario.add_step(
"get",
f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
f"?authority=deposit_client+http%3A%2F%2Ficinga-checker.example.org"
f"&after=2022-03-04T17%3A02%3A39%2B00%3A00",
[
{
"swhid": swhid,
"origin": origin,
"discovery_date": "2999-03-03T10:48:47+00:00",
"metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
}
],
)
scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA)
# Then metadata update
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
# internal deposit client does call status, then update metadata then status api
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_xml,
)
scenario.add_step(
"put",
f"{BASE_URL}/testcol/42/atom/",
status_xml,
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_xml,
)
scenario.install_mock(requests_mock)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
]
)
assert result.output == (
"DEPOSIT OK - Deposit took 30.00s and succeeded.\n"
"| 'load_time' = 20.00s\n"
"| 'total_time' = 30.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
"DEPOSIT OK - Deposit Metadata update took 0.00s and succeeded.\n"
"| 'total_time' = 30.00s\n"
"| 'update_time' = 0.00s\n"
)
assert result.exit_code == 0, f"Unexpected output: {result.output}"
def test_deposit_then_metadata_update_failed(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
"""Deposit creation passed, deposit metadata update failed"""
origin = compute_origin()
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="verified"),
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="loading"),
)
# Deposit done, checker gets the SWHID
swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
status_xml = status_template(
status="done",
status_detail="",
swhid=swhid,
)
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
# Then the checker checks the metadata appeared on the website
scenario.add_step(
"get",
f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
f"?authority=deposit_client+http%3A%2F%2Ficinga-checker.example.org"
f"&after=2022-03-04T17%3A02%3A39%2B00%3A00",
[
{
"swhid": swhid,
"origin": origin,
"discovery_date": "2999-03-03T10:48:47+00:00",
"metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
}
],
)
scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA)
# Then metadata update calls
failed_status_xml = status_template(
status="failed", # lying here
status_detail="Failure to ingest",
swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74",
)
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", failed_status_xml)
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", failed_status_xml)
scenario.install_mock(requests_mock)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT OK - Deposit took 30.00s and succeeded.\n"
"| 'load_time' = 20.00s\n"
"| 'total_time' = 30.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
"DEPOSIT CRITICAL - Deposit Metadata update failed: You can only update "
"metadata on deposit with status 'done' \n"
"| 'total_time' = 30.00s\n"
"| 'update_time' = 0.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_delay_warning(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
"""Deposit creation exceeded delays, no deposit update occurred."""
origin = compute_origin()
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="verified"),
)
# Deposit done, checker gets the SWHID
swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
status_xml = status_template(
status="done",
status_detail="",
swhid=swhid,
)
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
# Then the checker checks the metadata appeared on the website
scenario.add_step(
"get",
f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
f"?authority=deposit_client+http%3A%2F%2Ficinga-checker.example.org"
f"&after=2022-03-04T17%3A02%3A39%2B00%3A00",
[
{
"swhid": swhid,
"origin": origin,
"discovery_date": "2999-03-03T10:48:47+00:00",
"metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
}
],
)
scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA)
scenario.install_mock(requests_mock)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"--warning",
"15",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT WARNING - Deposit took 20.00s and succeeded.\n"
"| 'load_time' = 10.00s\n"
"| 'total_time' = 20.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 1, f"Unexpected output: {result.output}"
def test_deposit_delay_critical(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
origin = compute_origin()
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="verified"),
)
# Deposit done, checker gets the SWHID
swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
status_xml = status_template(
status="done",
status_detail="",
swhid=swhid,
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_xml,
callback=lambda: time.sleep(60),
)
# Then the checker checks the metadata appeared on the website
scenario.add_step(
"get",
f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
f"?authority=deposit_client+http%3A%2F%2Ficinga-checker.example.org"
f"&after=2022-03-04T17%3A02%3A39%2B00%3A00",
[
{
"swhid": swhid,
"origin": origin,
"discovery_date": "2999-03-03T10:48:47+00:00",
"metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
}
],
)
scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA)
scenario.install_mock(requests_mock)
result = invoke(
[
"--critical",
"50",
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Deposit took 80.00s and succeeded.\n"
"| 'load_time' = 70.00s\n"
"| 'total_time' = 80.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_timeout(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
scenario = WebScenario()
scenario.add_step(
"post",
f"{BASE_URL}/testcol/",
ENTRY_TEMPLATE.format(status="deposited"),
callback=lambda: time.sleep(1500),
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="verified"),
callback=lambda: time.sleep(1500),
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="loading"),
callback=lambda: time.sleep(1500),
)
scenario.install_mock(requests_mock)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Timed out while in status loading "
"(4520.0s seconds since deposit started)\n"
"| 'total_time' = 4520.00s\n"
"| 'upload_time' = 1500.00s\n"
"| 'validation_time' = 1510.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_metadata_missing(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
origin = compute_origin()
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="verified"),
)
# Deposit done, checker gets the SWHID
swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
status_xml = status_template(
status="done",
status_detail="",
swhid=swhid,
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_xml,
)
# Then the checker checks the metadata appeared on the website
metadata_list = [
{
# Filtered out, because wrong origin
"swhid": swhid,
"origin": "http://wrong-origin.example.org",
"discovery_date": "2999-03-03T10:48:47+00:00",
"metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
},
]
scenario.add_step(
"get",
f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
f"?authority=deposit_client+http%3A%2F%2Ficinga-checker.example.org"
f"&after=2022-03-04T17%3A02%3A39%2B00%3A00",
metadata_list,
)
scenario.install_mock(requests_mock)
result = invoke(
[
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
f"DEPOSIT CRITICAL - No recent metadata on {swhid} with origin {origin} in: "
f"{metadata_list!r}\n"
"| 'load_time' = 10.00s\n"
"| 'total_time' = 20.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_metadata_error(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="verified"),
)
# Deposit done, checker gets the SWHID
swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
status_xml = status_template(
status="done",
status_detail="",
swhid=swhid,
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_xml,
)
# Then the checker checks the metadata appeared on the website
scenario.add_step(
"get",
f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
f"?authority=deposit_client+http%3A%2F%2Ficinga-checker.example.org"
f"&after=2022-03-04T17%3A02%3A39%2B00%3A00",
"foo\nbar",
status_code=400,
)
scenario.install_mock(requests_mock)
result = invoke(
[
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Getting the list of metadata returned code 400: "
"b'foo\\nbar'\n"
"| 'load_time' = 10.00s\n"
"| 'total_time' = 20.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_metadata_corrupt(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
origin = compute_origin()
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="verified"),
)
# Deposit done, checker gets the SWHID
swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
status_xml = status_template(
status="done",
status_detail="",
swhid=swhid,
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_xml,
)
# Then the checker checks the metadata appeared on the website
metadata_list = [
{
"swhid": swhid,
"origin": origin,
"discovery_date": "2999-03-03T09:48:47+00:00",
"metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
},
]
scenario.add_step(
"get",
f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
f"?authority=deposit_client+http%3A%2F%2Ficinga-checker.example.org"
f"&after=2022-03-04T17%3A02%3A39%2B00%3A00",
metadata_list,
)
scenario.add_step(
"get",
f"{BASE_WEB_URL}/the-metadata-url",
SAMPLE_METADATA[0:-1], # corrupting the metadata by dropping the last byte
)
scenario.install_mock(requests_mock)
result = invoke(
[
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
f"DEPOSIT CRITICAL - Metadata on {swhid} with origin {origin} (at "
f"{BASE_WEB_URL}/the-metadata-url) differs from uploaded Atom document (at "
f"{sample_metadata})\n"
"| 'load_time' = 10.00s\n"
"| 'total_time' = 20.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_rejected(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="rejected", status_detail="booo"),
)
scenario.install_mock(requests_mock)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Deposit was rejected: booo\n"
"| 'total_time' = 10.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_failed(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="verified"),
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="loading"),
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="failed", status_detail="booo"),
)
scenario.install_mock(requests_mock)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Deposit loading failed: booo\n"
"| 'load_time' = 20.00s\n"
"| 'total_time' = 30.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_unexpected_status(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="verified"),
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="loading"),
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="what", status_detail="booo"),
)
scenario.install_mock(requests_mock)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Deposit got unexpected status: what (booo)\n"
"| 'load_time' = 20.00s\n"
"| 'total_time' = 30.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
diff --git a/swh/icinga_plugins/tests/test_save_code_now.py b/swh/icinga_plugins/tests/test_save_code_now.py
index 1053c94..03d72d6 100644
--- a/swh/icinga_plugins/tests/test_save_code_now.py
+++ b/swh/icinga_plugins/tests/test_save_code_now.py
@@ -1,256 +1,259 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime, timezone
import random
from typing import Dict, Optional, Tuple
import pytest
from swh.icinga_plugins.save_code_now import (
REPORT_MSG,
WAITING_STATUSES,
SaveCodeNowCheck,
)
from .utils import invoke
from .web_scenario import WebScenario
def fake_response(
origin: str,
visit_type: str,
sor_status: str = "pending",
task_status: Optional[str] = None,
) -> Dict:
"""Fake a save code now request api response"""
visit_date = None
if task_status in ("failed", "succeeded"):
visit_date = str(datetime.now(tz=timezone.utc))
return {
"visit_type": visit_type,
"origin_url": origin,
"save_request_date": "to-replace",
"save_request_status": sor_status,
"save_task_status": task_status,
"visit_date": visit_date,
}
@pytest.fixture
def origin_info() -> Tuple[str, str]:
"""Build an origin info to request save code now"""
origin_name = random.choice(range(10))
return random.choice(["git", "svn", "hg"]), f"mock://fake-origin-url/{origin_name}"
def test_save_code_now_success(requests_mock, mocker, mocked_time, origin_info):
"""Successful ingestion scenario below threshold"""
scenario = WebScenario()
visit_type, origin = origin_info
root_api_url = "mock://swh-web.example.org"
api_url = SaveCodeNowCheck.api_url_scn(root_api_url, origin, visit_type)
# creation request
scenario.add_step(
"post",
api_url,
fake_response(origin, visit_type, "accepted", "not yet scheduled"),
)
response_scheduled = fake_response(origin, visit_type, "accepted", "scheduled")
# status polling requests
scenario.add_step("get", api_url, [response_scheduled])
# sometimes we can have multiple response so we fake that here
scenario.add_step("get", api_url, [response_scheduled, response_scheduled])
scenario.add_step(
"get", api_url, [fake_response(origin, visit_type, "accepted", "succeeded")]
)
scenario.install_mock(requests_mock)
# fmt: off
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory", "/tmp",
"check-savecodenow", "--swh-web-url", root_api_url,
"origin", origin,
"--visit-type", visit_type,
]
)
# fmt: on
assert result.output == (
f"{SaveCodeNowCheck.TYPE} OK - {REPORT_MSG} {origin_info} took "
f"30.00s and succeeded.\n"
f"| 'total_time' = 30.00s\n"
)
assert result.exit_code == 0, f"Unexpected result: {result.output}"
def test_save_code_now_failure(requests_mock, mocker, mocked_time, origin_info):
"""Failed ingestion scenario should be reported"""
scenario = WebScenario()
visit_type, origin = origin_info
root_api_url = "mock://swh-web.example.org"
api_url = SaveCodeNowCheck.api_url_scn(root_api_url, origin, visit_type)
# creation request
scenario.add_step(
"post",
api_url,
fake_response(origin, visit_type, "accepted", "not yet scheduled"),
)
# status polling requests
scenario.add_step(
"get", api_url, [fake_response(origin, visit_type, "accepted", "scheduled")]
)
scenario.add_step(
"get", api_url, [fake_response(origin, visit_type, "accepted", "failed")]
)
scenario.install_mock(requests_mock)
# fmt: off
result = invoke(
[
"check-savecodenow", "--swh-web-url", root_api_url,
"origin", origin,
"--visit-type", visit_type,
],
catch_exceptions=True,
)
# fmt: on
assert result.output == (
f"{SaveCodeNowCheck.TYPE} CRITICAL - {REPORT_MSG} {origin_info} took "
f"20.00s and failed.\n"
f"| 'total_time' = 20.00s\n"
)
assert result.exit_code == 2, f"Unexpected result: {result.output}"
def test_save_code_now_pending_state_unsupported(
requests_mock, mocker, mocked_time, origin_info
):
"""Pending save requests are not supported in the test so they should fail early
Pending requests are requests that need a moderator to accept the repository into
the save code now flow.
Do not actually use such origin to trigger the checks.
"""
scenario = WebScenario()
visit_type, origin = origin_info
root_api_url = "mock://swh-web2.example.org"
api_url = SaveCodeNowCheck.api_url_scn(root_api_url, origin, visit_type)
# creation request
scenario.add_step(
"post",
api_url,
fake_response(origin, visit_type, "pending", "not created"),
)
scenario.install_mock(requests_mock)
# fmt: off
result = invoke(
[
"check-savecodenow", "--swh-web-url", root_api_url,
"origin", origin,
"--visit-type", visit_type,
],
catch_exceptions=True,
)
# fmt: on
assert result.output == (
f"{SaveCodeNowCheck.TYPE} CRITICAL - {REPORT_MSG} {origin_info} took "
f"0.00s and resulted in unsupported status: pending ; not created.\n"
f"| 'total_time' = 0.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_save_code_now_threshold_exceeded(
requests_mock, mocker, mocked_time, origin_info
):
"""Saving requests exceeding threshold should mention warning in output"""
scenario = WebScenario()
visit_type, origin = origin_info
root_api_url = "mock://swh-web2.example.org"
api_url = SaveCodeNowCheck.api_url_scn(root_api_url, origin, visit_type)
# creation request
scenario.add_step(
"post",
api_url,
fake_response(origin, visit_type, "accepted", "not yet scheduled"),
)
# we'll make the response being in the awaiting status
# beyond 13, this will exceed the threshold
for i in range(13):
waiting_status = random.choice(WAITING_STATUSES)
response_scheduled = fake_response(
origin, visit_type, "accepted", waiting_status
)
scenario.add_step("get", api_url, [response_scheduled])
scenario.install_mock(requests_mock)
# fmt: off
result = invoke(
[
- "check-savecodenow", "--swh-web-url", root_api_url,
+ "check-savecodenow",
+ "--swh-web-url", root_api_url,
"origin", origin,
"--visit-type", visit_type,
],
catch_exceptions=True,
)
# fmt: on
assert result.output == (
f"{SaveCodeNowCheck.TYPE} CRITICAL - {REPORT_MSG} {origin_info} took "
f"more than 130.00s and has status: {waiting_status}.\n"
f"| 'total_time' = 130.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_save_code_now_unexpected_failure(
requests_mock, mocker, mocked_time, origin_info
):
"""Unexpected failure if the webapi refuses to answer for example"""
scenario = WebScenario()
visit_type, origin = origin_info
root_api_url = "mock://swh-web.example.org"
api_url = SaveCodeNowCheck.api_url_scn(root_api_url, origin, visit_type)
# creation request
scenario.add_step(
"post",
api_url,
fake_response(origin, visit_type, "accepted", "not yet scheduled"),
)
# status polling requests
scenario.add_step(
"get", api_url, [fake_response(origin, visit_type, "accepted", "scheduled")]
)
# unexpected issue when communicating with the api
scenario.add_step("get", api_url, {}, status_code=500)
scenario.install_mock(requests_mock)
with pytest.raises(AssertionError):
# fmt: off
invoke(
[
"check-savecodenow", "--swh-web-url", root_api_url,
"origin", origin,
"--visit-type", visit_type,
],
)
# fmt: on
diff --git a/swh/icinga_plugins/tests/test_vault.py b/swh/icinga_plugins/tests/test_vault.py
index 82f2e8f..07beb09 100644
--- a/swh/icinga_plugins/tests/test_vault.py
+++ b/swh/icinga_plugins/tests/test_vault.py
@@ -1,536 +1,551 @@
# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import io
import tarfile
import time
from swh.icinga_plugins.tests.utils import invoke
from .web_scenario import WebScenario
DIR_ID = "ab" * 20
url_api = f"mock://swh-web.example.org/api/1/vault/directory/{DIR_ID}/"
url_fetch = f"mock://swh-web.example.org/api/1/vault/directory/{DIR_ID}/raw/"
def _make_tarfile():
fd = io.BytesIO()
with tarfile.open(fileobj=fd, mode="w:gz") as tf:
tf.addfile(tarfile.TarInfo(f"swh:1:dir:{DIR_ID}/README"), b"this is a readme\n")
tarinfo = tarfile.TarInfo(f"swh:1:dir:{DIR_ID}")
tarinfo.type = tarfile.DIRTYPE
tf.addfile(tarinfo)
return fd.getvalue()
TARBALL = _make_tarfile()
response_pending = {
"obj_id": DIR_ID,
"obj_type": "directory",
"progress_message": "foo",
"status": "pending",
}
response_done = {
"fetch_url": url_fetch,
"id": 9,
"obj_id": DIR_ID,
"obj_type": "directory",
"status": "done",
}
response_done_no_fetch = {
"id": 9,
"obj_id": DIR_ID,
"obj_type": "directory",
"status": "done",
}
response_failed = {
"obj_id": DIR_ID,
"obj_type": "directory",
"progress_message": "foobar",
"status": "failed",
}
response_unknown_status = {
"obj_id": DIR_ID,
"obj_type": "directory",
"progress_message": "what",
"status": "boo",
}
class FakeStorage:
def __init__(self, foo, **kwargs):
pass
def directory_get_random(self):
return bytes.fromhex(DIR_ID)
def test_vault_immediate_success(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
"get", url_fetch, TARBALL, headers={"Content-Type": "application/gzip"}
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
]
)
assert result.output == (
f"VAULT OK - cooking directory {DIR_ID} took "
f"10.00s and succeeded.\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 0, result.output
def test_vault_delayed_success(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
"get", url_fetch, TARBALL, headers={"Content-Type": "application/gzip"}
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
]
)
assert result.output == (
f"VAULT OK - cooking directory {DIR_ID} took "
f"20.00s and succeeded.\n"
f"| 'total_time' = 20.00s\n"
)
assert result.exit_code == 0, result.output
def test_vault_failure(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_failed)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
f"VAULT CRITICAL - cooking directory {DIR_ID} took "
f"10.00s and failed with: foobar\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_unknown_status(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_unknown_status)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
f"VAULT CRITICAL - cooking directory {DIR_ID} took "
f"10.00s and resulted in unknown status: boo\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_timeout(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_pending)
scenario.add_step(
"get", url_api, response_pending, callback=lambda: time.sleep(4000)
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
f"VAULT CRITICAL - cooking directory {DIR_ID} took more than "
f"4020.00s and has status: foo\n"
f"| 'total_time' = 4020.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_cached_directory(requests_mock, mocker, mocked_time):
"""First serves a directory that's already in the cache, to
test that vault_check requests another one."""
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=200)
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
"get", url_fetch, TARBALL, headers={"Content-Type": "application/gzip"}
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
]
)
assert result.output == (
f"VAULT OK - cooking directory {DIR_ID} took "
f"10.00s and succeeded.\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 0, result.output
def test_vault_no_directory(requests_mock, mocker, mocked_time):
"""Tests with an empty storage"""
scenario = WebScenario()
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
mocker.patch(f"{__name__}.FakeStorage.directory_get_random", return_value=None)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == ("VAULT CRITICAL - No directory exists in the archive.\n")
assert result.exit_code == 2, result.output
def test_vault_fetch_failed(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
"get",
url_fetch,
"",
status_code=500,
headers={"Content-Type": "application/gzip"},
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
f"VAULT CRITICAL - cooking directory {DIR_ID} took "
f"10.00s and succeeded, but fetch failed with status code 500.\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_fetch_missing_content_type(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step("get", url_fetch, "")
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
"VAULT CRITICAL - Unexpected Content-Type when downloading bundle: None\n"
"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_corrupt_tarball_gzip(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
"get",
url_fetch,
b"this-is-not-a-tarball",
headers={"Content-Type": "application/gzip"},
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
"VAULT CRITICAL - ReadError while reading tarball: not a gzip file\n"
"| 'total_time' = 20.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_corrupt_tarball_member(requests_mock, mocker, mocked_time):
fd = io.BytesIO()
with tarfile.open(fileobj=fd, mode="w:gz") as tf:
tf.addfile(tarfile.TarInfo("wrong_dir_name/README"), b"this is a readme\n")
tarball = fd.getvalue()
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
"get",
url_fetch,
tarball,
headers={"Content-Type": "application/gzip"},
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
"VAULT CRITICAL - Unexpected member in tarball: wrong_dir_name/README\n"
"| 'total_time' = 20.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_empty_tarball(requests_mock, mocker, mocked_time):
fd = io.BytesIO()
with tarfile.open(fileobj=fd, mode="w:gz"):
pass
tarball = fd.getvalue()
print(tarball)
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
"get",
url_fetch,
tarball,
headers={"Content-Type": "application/gzip"},
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
# This error message will need to be updated when https://bugs.python.org/issue46922
# is resolved.
assert result.output == (
"VAULT CRITICAL - StreamError while reading tarball (empty file?): "
"seeking backwards is not allowed\n"
"| 'total_time' = 20.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_no_fetch_url(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_done_no_fetch)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
f"VAULT CRITICAL - cooking directory {DIR_ID} took 10.00s and succeeded, "
f"but API response did not contain a fetch_url.\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 2, result.output
diff --git a/swh/icinga_plugins/vault.py b/swh/icinga_plugins/vault.py
index 25d8693..8265920 100644
--- a/swh/icinga_plugins/vault.py
+++ b/swh/icinga_plugins/vault.py
@@ -1,174 +1,213 @@
-# Copyright (C) 2019 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import tarfile
import time
+from typing import List
import requests
from swh.storage import get_storage
from .base_check import BaseCheck
class NoDirectory(Exception):
pass
class VaultCheck(BaseCheck):
TYPE = "VAULT"
DEFAULT_WARNING_THRESHOLD = 0
DEFAULT_CRITICAL_THRESHOLD = 3600
def __init__(self, obj):
- super().__init__(obj)
+ super().__init__(obj, application="vault")
self._swh_storage = get_storage("remote", url=obj["swh_storage_url"])
self._swh_web_url = obj["swh_web_url"]
self._poll_interval = obj["poll_interval"]
+ self.register_prometheus_gauge("status", "")
+ self.register_prometheus_gauge("duration", "seconds", ["step", "status"])
+
def _url_for_dir(self, dir_id):
return self._swh_web_url + f"/api/1/vault/directory/{dir_id.hex()}/"
def _pick_directory(self):
dir_ = self._swh_storage.directory_get_random()
if dir_ is None:
raise NoDirectory()
return dir_
def _pick_uncached_directory(self):
while True:
dir_id = self._pick_directory()
response = requests.get(self._url_for_dir(dir_id))
if response.status_code == 404:
return dir_id
+ def _collect_prometheus_metrics(
+ self, status: int, duration: float, labels: List[str]
+ ) -> None:
+ self.collect_prometheus_metric("status", status)
+ self.collect_prometheus_metric(
+ "duration", duration, labels,
+ )
+
def main(self):
try:
dir_id = self._pick_uncached_directory()
except NoDirectory:
self.print_result("CRITICAL", "No directory exists in the archive.")
return 2
start_time = time.time()
total_time = 0
response = requests.post(self._url_for_dir(dir_id))
assert response.status_code == 200, (response, response.text)
result = response.json()
while result["status"] in ("new", "pending"):
time.sleep(self._poll_interval)
response = requests.get(self._url_for_dir(dir_id))
assert response.status_code == 200, (response, response.text)
result = response.json()
total_time = time.time() - start_time
if total_time > self.critical_threshold:
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took more than "
f"{total_time:.2f}s and has status: "
f'{result["progress_message"]}',
total_time=total_time,
)
+
+ self._collect_prometheus_metrics(2, total_time, ["cooking", "timeout"])
+
return 2
if result["status"] == "failed":
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
f'and failed with: {result["progress_message"]}',
total_time=total_time,
)
+
+ self._collect_prometheus_metrics(2, total_time, ["cooking", "failed"])
+
return 2
elif result["status"] != "done":
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
f'and resulted in unknown status: {result["status"]}',
total_time=total_time,
)
+
+ self._collect_prometheus_metrics(2, total_time, ["cooking", "unknown"])
return 2
(status_code, status) = self.get_status(total_time)
if "fetch_url" not in result:
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
f"and succeeded, but API response did not contain a fetch_url.",
total_time=total_time,
)
+ self._collect_prometheus_metrics(2, total_time, ["fetch", "no_url"])
return 2
with requests.get(result["fetch_url"], stream=True) as fetch_response:
try:
fetch_response.raise_for_status()
except requests.HTTPError:
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
f"and succeeded, but fetch failed with status code "
f"{fetch_response.status_code}.",
total_time=total_time,
)
+ self._collect_prometheus_metrics(2, total_time, ["fetch", "error"])
return 2
content_type = fetch_response.headers.get("Content-Type")
if content_type != "application/gzip":
self.print_result(
"CRITICAL",
f"Unexpected Content-Type when downloading bundle: {content_type}",
total_time=total_time,
)
+ self._collect_prometheus_metrics(
+ 2, total_time, ["download", "unexpected_content_type"]
+ )
return 2
try:
with tarfile.open(fileobj=fetch_response.raw, mode="r|gz") as tf:
# Note that we are streaming the tarfile from the network,
# so we are allowed at most one pass on the tf object;
# and the sooner we close it the better.
# Fortunately, checking only the first member is good enough:
tarinfo = tf.next()
swhid = f"swh:1:dir:{dir_id.hex()}"
if tarinfo.name != swhid and not tarinfo.name.startswith(
f"{swhid}/"
):
self.print_result(
"CRITICAL",
f"Unexpected member in tarball: {tarinfo.name}",
total_time=total_time,
)
+ self._collect_prometheus_metrics(
+ 2, total_time, ["check", "archive_content"]
+ )
return 2
except tarfile.ReadError as e:
self.print_result(
"CRITICAL",
f"ReadError while reading tarball: {e}",
total_time=total_time,
)
+ self._collect_prometheus_metrics(
+ 2, total_time, ["check", "archive_content"]
+ )
return 2
except tarfile.StreamError as e:
if e.args[0] == "seeking backwards is not allowed":
# Probably https://bugs.python.org/issue46922
self.print_result(
"CRITICAL",
f"StreamError while reading tarball (empty file?): {e}",
total_time=total_time,
)
+ self._collect_prometheus_metrics(
+ 2, total_time, ["check", "archive_content"]
+ )
return 2
self.print_result(
"CRITICAL",
f"StreamError while reading tarball: {e}",
total_time=total_time,
)
+ self._collect_prometheus_metrics(
+ 2, total_time, ["check", "archive_content"]
+ )
return 2
self.print_result(
status,
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
f"and succeeded.",
total_time=total_time,
)
+
+ self._collect_prometheus_metrics(status_code, total_time, ["end", ""])
return status_code
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Thu, Jul 3, 10:38 AM (2 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3254657
Attached To
rDICP Icinga plugins
Event Timeline
Log In to Comment