diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 0000000..73145d3
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,2 @@
+# python: Reformat code with black 22.3.0
+cb62ef1b400559657f9211650f5fab2063d8578e
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 05398bb..1c95e3d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,42 +1,40 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
hooks:
- id: trailing-whitespace
- id: check-json
- id: check-yaml
- repo: https://gitlab.com/pycqa/flake8
rev: 4.0.1
hooks:
- id: flake8
+ additional_dependencies: [flake8-bugbear==22.3.23]
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
hooks:
- id: codespell
name: Check source code spelling
stages: [commit]
- - id: codespell
- name: Check commit message spelling
- stages: [commit-msg]
- repo: local
hooks:
- id: mypy
name: mypy
entry: mypy
args: [swh]
pass_filenames: false
language: system
types: [python]
- repo: https://github.com/PyCQA/isort
rev: 5.10.1
hooks:
- id: isort
- repo: https://github.com/python/black
- rev: 19.10b0
+ rev: 22.3.0
hooks:
- id: black
diff --git a/PKG-INFO b/PKG-INFO
index befa3cd..753dded 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,29 +1,29 @@
Metadata-Version: 2.1
Name: swh.icinga_plugins
-Version: 0.4.2
+Version: 0.5.0
Summary: Icinga plugins for Software Heritage infrastructure monitoring
Home-page: https://forge.softwareheritage.org/diffusion/swh-icinga-plugins
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
Project-URL: Funding, https://www.softwareheritage.org/donate
Project-URL: Source, https://forge.softwareheritage.org/source/swh-icinga-plugins
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
Classifier: Operating System :: OS Independent
Classifier: Development Status :: 3 - Alpha
Requires-Python: >=3.7
Description-Content-Type: text/markdown
Provides-Extra: testing
License-File: LICENSE
License-File: AUTHORS
swh-icinga-plugins
==================
Scripts for end-to-end monitoring of the SWH infrastructure
diff --git a/mypy.ini b/mypy.ini
index 2b77ba2..f8f07e3 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -1,18 +1,22 @@
[mypy]
namespace_packages = True
warn_unused_ignores = True
# 3rd party libraries without stubs (yet)
[mypy-pkg_resources.*]
ignore_missing_imports = True
[mypy-pytest.*]
ignore_missing_imports = True
[mypy-requests_mock.*]
ignore_missing_imports = True
+[mypy-prometheus_client.*]
+ignore_missing_imports = True
+
+
# [mypy-add_your_lib_here.*]
# ignore_missing_imports = True
diff --git a/pytest.ini b/pytest.ini
index 9454e79..114a44a 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,3 +1,5 @@
[pytest]
addopts = -p no:django
-norecursedirs = docs .*
+norecursedirs = build docs .*
+
+asyncio_mode = strict
diff --git a/requirements-test.txt b/requirements-test.txt
index 888161b..e470c85 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,7 +1,7 @@
-pytest < 7.0.0 # v7.0.0 removed _pytest.tmpdir.TempdirFactory, which is used by some of the pytest plugins we use
+pytest
pytest-mock
requests-mock
types-click
types-requests
types-python-dateutil
types-PyYAML
diff --git a/requirements.txt b/requirements.txt
index ced35f8..516e676 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
click
psycopg2
requests
+prometheus-client
+typing
diff --git a/setup.cfg b/setup.cfg
index 1d722c2..f65ba0a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,8 +1,9 @@
[flake8]
-ignore = E203,E231,W503
+select = C,E,F,W,B950
+ignore = E203,E231,E501,W503
max-line-length = 88
[egg_info]
tag_build =
tag_date = 0
diff --git a/swh.icinga_plugins.egg-info/PKG-INFO b/swh.icinga_plugins.egg-info/PKG-INFO
index d49054b..0320d1f 100644
--- a/swh.icinga_plugins.egg-info/PKG-INFO
+++ b/swh.icinga_plugins.egg-info/PKG-INFO
@@ -1,29 +1,29 @@
Metadata-Version: 2.1
Name: swh.icinga-plugins
-Version: 0.4.2
+Version: 0.5.0
Summary: Icinga plugins for Software Heritage infrastructure monitoring
Home-page: https://forge.softwareheritage.org/diffusion/swh-icinga-plugins
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
Project-URL: Funding, https://www.softwareheritage.org/donate
Project-URL: Source, https://forge.softwareheritage.org/source/swh-icinga-plugins
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
Classifier: Operating System :: OS Independent
Classifier: Development Status :: 3 - Alpha
Requires-Python: >=3.7
Description-Content-Type: text/markdown
Provides-Extra: testing
License-File: LICENSE
License-File: AUTHORS
swh-icinga-plugins
==================
Scripts for end-to-end monitoring of the SWH infrastructure
diff --git a/swh.icinga_plugins.egg-info/SOURCES.txt b/swh.icinga_plugins.egg-info/SOURCES.txt
index e7829f9..97debe4 100644
--- a/swh.icinga_plugins.egg-info/SOURCES.txt
+++ b/swh.icinga_plugins.egg-info/SOURCES.txt
@@ -1,47 +1,49 @@
+.git-blame-ignore-revs
.gitignore
.pre-commit-config.yaml
AUTHORS
CODE_OF_CONDUCT.md
CONTRIBUTORS
LICENSE
MANIFEST.in
Makefile
README.md
mypy.ini
pyproject.toml
pytest.ini
requirements-swh.txt
requirements-test.txt
requirements.txt
setup.cfg
setup.py
tox.ini
data/deposit/jesuisgpl.tgz
data/deposit/jesuisgpl.tgz.xml
docs/.gitignore
docs/Makefile
docs/conf.py
docs/index.rst
docs/_static/.placeholder
docs/_templates/.placeholder
swh/__init__.py
swh.icinga_plugins.egg-info/PKG-INFO
swh.icinga_plugins.egg-info/SOURCES.txt
swh.icinga_plugins.egg-info/dependency_links.txt
swh.icinga_plugins.egg-info/entry_points.txt
swh.icinga_plugins.egg-info/requires.txt
swh.icinga_plugins.egg-info/top_level.txt
swh/icinga_plugins/__init__.py
swh/icinga_plugins/base_check.py
swh/icinga_plugins/cli.py
swh/icinga_plugins/deposit.py
swh/icinga_plugins/py.typed
swh/icinga_plugins/save_code_now.py
swh/icinga_plugins/vault.py
swh/icinga_plugins/tests/__init__.py
swh/icinga_plugins/tests/conftest.py
+swh/icinga_plugins/tests/test_base_check.py
swh/icinga_plugins/tests/test_deposit.py
swh/icinga_plugins/tests/test_save_code_now.py
swh/icinga_plugins/tests/test_vault.py
swh/icinga_plugins/tests/utils.py
swh/icinga_plugins/tests/web_scenario.py
\ No newline at end of file
diff --git a/swh.icinga_plugins.egg-info/requires.txt b/swh.icinga_plugins.egg-info/requires.txt
index eb46405..0f59140 100644
--- a/swh.icinga_plugins.egg-info/requires.txt
+++ b/swh.icinga_plugins.egg-info/requires.txt
@@ -1,15 +1,17 @@
click
psycopg2
requests
+prometheus-client
+typing
swh.core[http]>=0.3
swh.deposit>=0.3
swh.storage>=0.0.162
[testing]
-pytest<7.0.0
+pytest
pytest-mock
requests-mock
types-click
types-requests
types-python-dateutil
types-PyYAML
diff --git a/swh/icinga_plugins/base_check.py b/swh/icinga_plugins/base_check.py
index 7110a9e..08be439 100644
--- a/swh/icinga_plugins/base_check.py
+++ b/swh/icinga_plugins/base_check.py
@@ -1,32 +1,119 @@
# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import atexit
+from typing import Any, Dict, List
-from typing import Dict
+from prometheus_client import CollectorRegistry, Gauge, Summary, write_to_textfile
class BaseCheck:
DEFAULT_WARNING_THRESHOLD = 60
DEFAULT_CRITICAL_THRESHOLD = 120
+ PROMETHEUS_METRICS_BASENAME = "swh_e2e_"
- def __init__(self, obj: Dict[str, str]) -> None:
+ def __init__(self, obj: Dict[str, str], application: str):
self.warning_threshold = float(
obj.get("warning_threshold", self.DEFAULT_WARNING_THRESHOLD)
)
self.critical_threshold = float(
obj.get("critical_threshold", self.DEFAULT_CRITICAL_THRESHOLD)
)
+ self.prometheus_enabled = obj.get("prometheus_enabled")
+ self.prometheus_exporter_directory = obj.get("prometheus_exporter_directory")
+ self.environment = obj.get("environment")
+ self.application = application
+
+ # A new registry is created to not export the default process metrics
+ self.registry = CollectorRegistry()
+
+ self.prometheus_metrics: Dict[str, Any] = {}
+
+ atexit.register(self.save_prometheus_metrics)
def get_status(self, value):
if self.critical_threshold and value >= self.critical_threshold:
return (2, "CRITICAL")
elif self.warning_threshold and value >= self.warning_threshold:
return (1, "WARNING")
else:
return (0, "OK")
def print_result(self, status_type, status_string, **metrics):
print(f"{self.TYPE} {status_type} - {status_string}")
for (metric_name, metric_value) in sorted(metrics.items()):
print(f"| '{metric_name}' = {metric_value:.2f}s")
+
+ def collect_prometheus_metric(
+ self, name: str, value: float, labels: List[str] = []
+ ):
+ g = self.prometheus_metrics.get(self.PROMETHEUS_METRICS_BASENAME + name)
+
+ if g is None:
+ raise ValueError(f"No metric {name} found")
+
+ g.labels(*self._get_label_values(labels)).set(value)
+
+ def _get_label_values(self, labels: List[str]) -> List[str]:
+ label_list = []
+
+ if self.environment:
+ label_list.append(self.environment)
+
+ if self.application is None:
+ raise ValueError("Application name must be specified")
+ label_list.append(self.application)
+
+ return label_list + labels
+
+ def _get_label_names(self, values: List[str] = []) -> List[str]:
+ full_list = []
+
+ if self.environment:
+ full_list.append(self.environment)
+ full_list.append("application")
+
+ full_list += values
+
+ return full_list
+
+ def register_prometheus_summary(
+ self, name: str, unit: str, labels: List[str] = []
+ ) -> None:
+ full_name = self.PROMETHEUS_METRICS_BASENAME + name
+
+ self.prometheus_metrics[full_name] = Summary(
+ full_name,
+ "",
+ registry=self.registry,
+ unit=unit,
+ labelnames=self._get_label_names(labels),
+ )
+
+ def register_prometheus_gauge(
+ self, name: str, unit: str, labels: List[str] = []
+ ) -> None:
+ full_name = self.PROMETHEUS_METRICS_BASENAME + name
+
+ self.prometheus_metrics[full_name] = Gauge(
+ name=full_name,
+ documentation="",
+ registry=self.registry,
+ unit=unit,
+ labelnames=self._get_label_names(labels),
+ )
+
+ def save_prometheus_metrics(self) -> None:
+ """Dump on disk the .prom file containing the
+ metrics collected during the check execution.
+
+ It's a callback method triggered by the atexit
+ declared in the constructor."""
+ if self.prometheus_enabled:
+ assert self.prometheus_exporter_directory is not None
+
+ filename = (
+ self.prometheus_exporter_directory + "/" + self.application + ".prom"
+ )
+ write_to_textfile(filename, self.registry)
diff --git a/swh/icinga_plugins/cli.py b/swh/icinga_plugins/cli.py
index b9412ae..9ee8bd0 100644
--- a/swh/icinga_plugins/cli.py
+++ b/swh/icinga_plugins/cli.py
@@ -1,147 +1,164 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
# WARNING: do not import unnecessary things here to keep cli startup time under
# control
import sys
import click
from swh.core.cli import CONTEXT_SETTINGS
from swh.core.cli import swh as swh_cli_group
@swh_cli_group.group(name="icinga_plugins", context_settings=CONTEXT_SETTINGS)
@click.option("-w", "--warning", type=int, help="Warning threshold.")
@click.option("-c", "--critical", type=int, help="Critical threshold.")
+@click.option("--prometheus-exporter/--no-prometheus-exporter", default=False)
+@click.option(
+ "--prometheus-exporter-directory",
+ type=str,
+ default="/var/lib/prometheus/node-exporter",
+)
+@click.option("--environment", type=str, help="The tested environment")
@click.pass_context
-def icinga_cli_group(ctx, warning, critical):
- """Main command for Icinga plugins
- """
+def icinga_cli_group(
+ ctx,
+ warning,
+ critical,
+ prometheus_exporter: bool,
+ prometheus_exporter_directory: str,
+ environment: str,
+):
+ """Main command for Icinga plugins"""
ctx.ensure_object(dict)
if warning:
ctx.obj["warning_threshold"] = int(warning)
if critical:
ctx.obj["critical_threshold"] = int(critical)
+ ctx.obj["prometheus_enabled"] = prometheus_exporter
+ ctx.obj["prometheus_exporter_directory"] = prometheus_exporter_directory
+ ctx.obj["environment"] = environment
+
@icinga_cli_group.group(name="check-vault")
@click.option(
"--swh-storage-url", type=str, required=True, help="URL to an swh-storage HTTP API"
)
@click.option(
"--swh-web-url", type=str, required=True, help="URL to an swh-web instance"
)
@click.option(
"--poll-interval",
type=int,
default=10,
help="Interval (in seconds) between two polls to the API, "
"to check for cooking status.",
)
@click.pass_context
def check_vault(ctx, **kwargs):
ctx.obj.update(kwargs)
@check_vault.command(name="directory")
@click.pass_context
def check_vault_directory(ctx):
"""Picks a random directory, requests its cooking via swh-web,
and waits for completion."""
from .vault import VaultCheck
sys.exit(VaultCheck(ctx.obj).main())
@icinga_cli_group.group(name="check-savecodenow")
@click.option(
"--swh-web-url", type=str, required=True, help="URL to an swh-web instance"
)
@click.option(
"--poll-interval",
type=int,
default=10,
help="Interval (in seconds) between two polls to the API, "
"to check for save code now status.",
)
@click.pass_context
def check_scn(ctx, **kwargs):
ctx.obj.update(kwargs)
@check_scn.command(name="origin")
@click.argument("origin", type=str)
@click.option("--visit-type", type=str, required=True, help="Visit type for origin")
@click.pass_context
def check_scn_origin(ctx, origin, visit_type):
"""Requests a save code now via the api for a given origin with type visit_type, waits
for its completion, report approximate time of completion (failed or succeeded) and
warn if threshold exceeded.
"""
from .save_code_now import SaveCodeNowCheck
sys.exit(SaveCodeNowCheck(ctx.obj, origin, visit_type).main())
@icinga_cli_group.group(name="check-deposit")
@click.option(
"--server",
type=str,
default="https://deposit.softwareheritage.org/1",
help="URL to the SWORD server to test",
)
@click.option(
"--provider-url",
type=str,
required=True,
help=(
"Root URL of the deposit client, as defined in the "
"'deposit_client.provider_url' column in the deposit DB"
),
)
@click.option("--username", type=str, required=True, help="Login for the SWORD server")
@click.option(
"--password", type=str, required=True, help="Password for the SWORD server"
)
@click.option(
"--collection",
type=str,
required=True,
help="Software collection to use on the SWORD server",
)
@click.option(
"--poll-interval",
type=int,
default=10,
help="Interval (in seconds) between two polls to the API, "
"to check for ingestion status.",
)
@click.option(
"--swh-web-url", type=str, required=True, help="URL to an swh-web instance"
)
@click.pass_context
def check_deposit(ctx, **kwargs):
ctx.obj.update(kwargs)
@check_deposit.command(name="single")
@click.option(
"--archive", type=click.Path(), required=True, help="Software artefact to upload"
)
@click.option(
"--metadata",
type=click.Path(),
required=True,
help="Metadata file for the software artefact.",
)
@click.pass_context
def check_deposit_single(ctx, **kwargs):
"""Checks the provided archive and metadata file and be deposited."""
from .deposit import DepositCheck
ctx.obj.update(kwargs)
sys.exit(DepositCheck(ctx.obj).main())
diff --git a/swh/icinga_plugins/deposit.py b/swh/icinga_plugins/deposit.py
index 8f94c0f..863c4a9 100644
--- a/swh/icinga_plugins/deposit.py
+++ b/swh/icinga_plugins/deposit.py
@@ -1,264 +1,307 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
import sys
import time
from typing import Any, Dict, Optional
import requests
from swh.deposit.client import PublicApiDepositClient
from .base_check import BaseCheck
class DepositCheck(BaseCheck):
TYPE = "DEPOSIT"
DEFAULT_WARNING_THRESHOLD = 120
DEFAULT_CRITICAL_THRESHOLD = 3600
def __init__(self, obj):
- super().__init__(obj)
+ super().__init__(obj, application="deposit")
self.api_url = obj["swh_web_url"].rstrip("/")
self._poll_interval = obj["poll_interval"]
self._archive_path = obj["archive"]
self._metadata_path = obj["metadata"]
self._collection = obj["collection"]
self._slug: Optional[str] = None
self._provider_url = obj["provider_url"]
self._client = PublicApiDepositClient(
{
"url": obj["server"],
"auth": {"username": obj["username"], "password": obj["password"]},
}
)
+ self.register_prometheus_gauge("duration", "seconds", ["step", "status"])
+ self.register_prometheus_gauge("status", "")
+
def upload_deposit(self):
slug = (
"check-deposit-%s"
% datetime.datetime.fromtimestamp(time.time()).isoformat()
)
result = self._client.deposit_create(
archive=self._archive_path,
metadata=self._metadata_path,
collection=self._collection,
in_progress=False,
slug=slug,
)
self._slug = slug
self._deposit_id = result["deposit_id"]
return result
def update_deposit_with_metadata(self) -> Dict[str, Any]:
- """Trigger a metadata update on the deposit once it's completed.
-
- """
+ """Trigger a metadata update on the deposit once it's completed."""
deposit = self.get_deposit_status()
swhid = deposit["deposit_swh_id"]
assert deposit["deposit_id"] == self._deposit_id
# We can reuse the initial metadata file we already sent
return self._client.deposit_update(
self._collection,
self._deposit_id,
self._slug,
metadata=self._metadata_path,
swhid=swhid,
)
def get_deposit_status(self):
return self._client.deposit_status(
collection=self._collection, deposit_id=self._deposit_id
)
def wait_while_status(self, statuses, start_time, metrics, result):
while result["deposit_status"] in statuses:
metrics["total_time"] = time.time() - start_time
if metrics["total_time"] > self.critical_threshold:
self.print_result(
"CRITICAL",
f"Timed out while in status "
f'{result["deposit_status"]} '
f'({metrics["total_time"]}s seconds since deposit '
f"started)",
**metrics,
)
+
+ self.collect_prometheus_metric(
+ "duration",
+ metrics["total_time"],
+ [result["deposit_status"], "timeout"],
+ )
+ self.collect_prometheus_metric(
+ "duration", metrics["total_time"], ["", "timeout"]
+ )
+ self.collect_prometheus_metric("status", 2)
+
sys.exit(2)
time.sleep(self._poll_interval)
result = self.get_deposit_status()
return result
def main(self):
start_time = time.time()
start_datetime = datetime.datetime.fromtimestamp(
start_time, tz=datetime.timezone.utc
)
metrics = {}
# Upload the archive and metadata
result = self.upload_deposit()
metrics["upload_time"] = time.time() - start_time
# Wait for validation
result = self.wait_while_status(["deposited"], start_time, metrics, result)
metrics["total_time"] = time.time() - start_time
metrics["validation_time"] = metrics["total_time"] - metrics["upload_time"]
# Check validation succeeded
if result["deposit_status"] == "rejected":
self.print_result(
"CRITICAL",
f'Deposit was rejected: {result["deposit_status_detail"]}',
**metrics,
)
+ self.collect_prometheus_metric(
+ "duration", metrics["validation_time"], ["validation", "rejected"]
+ )
+ self.collect_prometheus_metric(
+ "duration", metrics["total_time"], ["validation", "rejected"]
+ )
+ self.collect_prometheus_metric("status", 2)
return 2
+ self.collect_prometheus_metric(
+ "duration", metrics["validation_time"], ["validation", "ok"]
+ )
# Wait for loading
result = self.wait_while_status(
["verified", "loading"], start_time, metrics, result
)
metrics["total_time"] = time.time() - start_time
metrics["load_time"] = (
metrics["total_time"] - metrics["upload_time"] - metrics["validation_time"]
)
+ self.collect_prometheus_metric(
+ "duration", metrics["load_time"], ["loading", result["deposit_status"]]
+ )
# Check loading succeeded
if result["deposit_status"] == "failed":
self.print_result(
"CRITICAL",
f'Deposit loading failed: {result["deposit_status_detail"]}',
**metrics,
)
+ self.collect_prometheus_metric(
+ "duration", metrics["total_time"], ["total", "failed"]
+ )
+ self.collect_prometheus_metric("status", 2)
return 2
# Check for unexpected status
if result["deposit_status"] != "done":
self.print_result(
"CRITICAL",
f'Deposit got unexpected status: {result["deposit_status"]} '
f'({result["deposit_status_detail"]})',
**metrics,
)
+ self.collect_prometheus_metric(
+ "duration", metrics["total_time"], ["total", result["deposit_status"]]
+ )
+ self.collect_prometheus_metric("status", 2)
return 2
# Get the SWHID
if "deposit_swh_id" not in result:
# if the deposit succeeded immediately (which is rare), it does not
# contain the SWHID, so we need to re-fetch its status.
result = self.get_deposit_status()
if result.get("deposit_swh_id") is None:
self.print_result(
"CRITICAL",
f"'deposit_swh_id' missing from result: {result!r}",
**metrics,
)
return 2
swhid = result["deposit_swh_id"]
# Check for unexpected status
if result["deposit_status"] != "done":
self.print_result(
"CRITICAL",
f'Deposit status went from "done" to: {result["deposit_status"]} '
f'({result["deposit_status_detail"]})',
**metrics,
)
return 2
# Get metadata list from swh-web
response = requests.get(
f"{self.api_url}/api/1/raw-extrinsic-metadata/swhid/{swhid}/",
params={
"authority": f"deposit_client {self._provider_url}",
"after": start_datetime.isoformat(),
},
)
if response.status_code != 200:
self.print_result(
"CRITICAL",
f"Getting the list of metadata returned code {response.status_code}: "
f"{response.content!r}",
**metrics,
)
return 2
metadata_objects = response.json()
expected_origin = f"{self._provider_url}/{self._slug}"
# Filter out objects that were clearly not created by this deposit
relevant_metadata_objects = [
d for d in metadata_objects if d.get("origin") == expected_origin
]
if not relevant_metadata_objects:
self.print_result(
"CRITICAL",
f"No recent metadata on {swhid} with origin {expected_origin} in: "
f"{metadata_objects!r}",
**metrics,
)
return 2
# Check the metadata was loaded as-is
metadata_url = relevant_metadata_objects[0]["metadata_url"]
metadata_file = requests.get(metadata_url).content
with open(self._metadata_path, "rb") as fd:
expected_metadata_file = fd.read()
if metadata_file != expected_metadata_file:
self.print_result(
"CRITICAL",
f"Metadata on {swhid} with origin {expected_origin} "
f"(at {metadata_url}) differs from uploaded Atom document "
f"(at {self._metadata_path})",
**metrics,
)
return 2
# Everything went fine, check total time wasn't too large and
# print result
(status_code, status) = self.get_status(metrics["total_time"])
self.print_result(
status,
f'Deposit took {metrics["total_time"]:.2f}s and succeeded.',
**metrics,
)
if status_code != 0: # Stop if any problem in the initial scenario
+ self.collect_prometheus_metric("status", status_code)
return status_code
# Initial deposit is now completed, now we can update the deposit with metadata
result = self.update_deposit_with_metadata()
total_time = time.time() - start_time
metrics_update = {
"total_time": total_time,
"update_time": (
total_time
- metrics["upload_time"]
- metrics["validation_time"]
- metrics["load_time"]
),
}
if "error" in result:
self.print_result(
"CRITICAL",
f'Deposit Metadata update failed: {result["error"]} ',
**metrics_update,
)
+ self.collect_prometheus_metric(
+ "duration", metrics["total_time"], ["total", "metadata_error"]
+ )
+ self.collect_prometheus_metric("status", 2)
return 2
(status_code, status) = self.get_status(metrics_update["total_time"])
self.print_result(
status,
f'Deposit Metadata update took {metrics_update["update_time"]:.2f}s '
"and succeeded.",
**metrics_update,
)
+
+ self.collect_prometheus_metric(
+ "duration", metrics["total_time"], ["total", "done"]
+ )
+ self.collect_prometheus_metric("status", status_code)
return status_code
diff --git a/swh/icinga_plugins/save_code_now.py b/swh/icinga_plugins/save_code_now.py
index 131c080..1922225 100644
--- a/swh/icinga_plugins/save_code_now.py
+++ b/swh/icinga_plugins/save_code_now.py
@@ -1,113 +1,124 @@
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import time
from typing import Dict, List
import requests
from .base_check import BaseCheck
REPORT_MSG = "Save code now request for origin"
WAITING_STATUSES = ("not yet scheduled", "running", "scheduled")
class SaveCodeNowCheck(BaseCheck):
TYPE = "SAVECODENOW"
DEFAULT_WARNING_THRESHOLD = 60
DEFAULT_CRITICAL_THRESHOLD = 120
def __init__(self, obj: Dict, origin: str, visit_type: str) -> None:
- super().__init__(obj)
+ super().__init__(obj, application="scn")
self.api_url = obj["swh_web_url"].rstrip("/")
self.poll_interval = obj["poll_interval"]
self.origin = origin
self.visit_type = visit_type
+ self.register_prometheus_gauge("duration", "seconds", ["status"])
+ self.register_prometheus_gauge("status", "")
+
@staticmethod
def api_url_scn(root_api_url: str, origin: str, visit_type: str) -> str:
"""Compute the save code now api url for a given origin"""
return f"{root_api_url}/api/1/origin/save/{visit_type}/url/{origin}/"
def main(self) -> int:
"""Scenario description:
1. Requests a save code now request via the api for origin self.origin with type
self.visit_type.
2. Polling regularly at self.poll_interval seconds the completion status.
3. When either succeeded, failed or threshold exceeded, report approximate time
of completion. This will warn if thresholds are exceeded.
"""
start_time: float = time.time()
total_time: float = 0.0
scn_url = self.api_url_scn(self.api_url, self.origin, self.visit_type)
response = requests.post(scn_url)
assert response.status_code == 200, (response, response.text)
result: Dict = response.json()
status_key = "save_task_status"
request_date = result["save_request_date"]
origin_info = (self.visit_type, self.origin)
while result[status_key] in WAITING_STATUSES:
time.sleep(self.poll_interval)
response = requests.get(scn_url)
assert (
response.status_code == 200
), f"Unexpected response: {response}, {response.text}"
raw_result: List[Dict] = response.json()
assert len(raw_result) > 0, f"Unexpected result: {raw_result}"
if len(raw_result) > 1:
# retrieve only the one status result we are interested in
result = next(
filter(lambda r: r["save_request_date"] == request_date, raw_result)
)
else:
result = raw_result[0]
# this because the api can return multiple entries for the same origin
assert result["save_request_date"] == request_date
total_time = time.time() - start_time
if total_time > self.critical_threshold:
self.print_result(
"CRITICAL",
f"{REPORT_MSG} {origin_info} took more than {total_time:.2f}s "
f'and has status: {result["save_task_status"]}.',
total_time=total_time,
)
+ self.collect_prometheus_metric("duration", total_time, ["timeout"])
+ self.collect_prometheus_metric("status", 2)
return 2
if result[status_key] == "succeeded":
(status_code, status) = self.get_status(total_time)
self.print_result(
status,
f"{REPORT_MSG} {origin_info} took {total_time:.2f}s and succeeded.",
total_time=total_time,
)
+ self.collect_prometheus_metric("duration", total_time, ["succeeded"])
+ self.collect_prometheus_metric("status", status_code)
return status_code
elif result[status_key] == "failed":
self.print_result(
"CRITICAL",
f"{REPORT_MSG} {origin_info} took {total_time:.2f}s and failed.",
total_time=total_time,
)
+ self.collect_prometheus_metric("duration", total_time, ["failed"])
+ self.collect_prometheus_metric("status", 2)
return 2
else:
self.print_result(
"CRITICAL",
f"{REPORT_MSG} {origin_info} took {total_time:.2f}s "
"and resulted in unsupported status: "
f"{result['save_request_status']} ; {result[status_key]}.",
total_time=total_time,
)
+ self.collect_prometheus_metric("duration", total_time, ["failed"])
+ self.collect_prometheus_metric("status", 2)
return 2
diff --git a/swh/icinga_plugins/tests/test_base_check.py b/swh/icinga_plugins/tests/test_base_check.py
new file mode 100644
index 0000000..fe25dd3
--- /dev/null
+++ b/swh/icinga_plugins/tests/test_base_check.py
@@ -0,0 +1,57 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pytest
+
+from swh.icinga_plugins.base_check import BaseCheck
+
+
+def test_inexistent_metric():
+ base_check = BaseCheck({}, "test")
+
+ with pytest.raises(ValueError, match="No metric unknown found"):
+ base_check.collect_prometheus_metric("unknown", 10, [])
+
+
+def test_environment():
+ base_check = BaseCheck({"environment": "pytest"}, "test")
+
+ with pytest.raises(ValueError, match="No metric unknown found"):
+ base_check.collect_prometheus_metric("unknown", 10, [])
+
+
+def test_application_not_defined():
+ base_check = BaseCheck({"environment": "pytest"}, "test")
+ base_check.register_prometheus_gauge("gauge", "seconds")
+ base_check.application = None
+
+ with pytest.raises(ValueError, match="Application name must be specified"):
+ base_check.collect_prometheus_metric("gauge", 10, [])
+
+
+def test_save_without_directory(tmpdir):
+ config = {
+ "prometheus_enabled": True,
+ }
+
+ base_check = BaseCheck(config, "test")
+
+ with pytest.raises(AssertionError):
+ base_check.save_prometheus_metrics()
+
+
+def test_save(tmpdir):
+ application = "my_application"
+ config = {
+ "prometheus_enabled": True,
+ "prometheus_exporter_directory": tmpdir.strpath,
+ }
+
+ base_check = BaseCheck(config, application)
+ base_check.register_prometheus_gauge("gauge", "count")
+ base_check.collect_prometheus_metric("gauge", 10)
+ base_check.save_prometheus_metrics()
+
+ assert f"{tmpdir.strpath}/{application}.prom" in tmpdir.listdir()
diff --git a/swh/icinga_plugins/tests/test_deposit.py b/swh/icinga_plugins/tests/test_deposit.py
index d0acc62..18c472f 100644
--- a/swh/icinga_plugins/tests/test_deposit.py
+++ b/swh/icinga_plugins/tests/test_deposit.py
@@ -1,925 +1,1018 @@
# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
import io
import os
import tarfile
import time
from typing import Optional
import pytest
from swh.icinga_plugins.tests.utils import invoke
from .web_scenario import WebScenario
POLL_INTERVAL = 10
BASE_URL = "http://swh-deposit.example.org/1"
BASE_WEB_URL = "http+mock://swh-web.example.org"
PROVIDER_URL = "http://icinga-checker.example.org"
COMMON_OPTIONS = [
"--server",
BASE_URL,
"--username",
"test",
"--password",
"test",
"--collection",
"testcol",
"--swh-web-url",
BASE_WEB_URL,
"--provider-url",
PROVIDER_URL,
]
SAMPLE_METADATA = """
Test Software
swh
test-software
No One
"""
ENTRY_TEMPLATE = """
42
2019-12-19 18:11:00
foo.tar.gz
{status}
http://purl.org/net/sword/package/SimpleZip
"""
STATUS_TEMPLATE = """
42
{status}
{status_detail}%s
"""
def compute_origin():
# This is the same origin the checker would compute, because we mock time.time
# to be constant until time.sleep is called
return (
PROVIDER_URL
+ "/check-deposit-%s" % datetime.datetime.fromtimestamp(time.time()).isoformat()
)
def status_template(
status: str, status_detail: str = "", swhid: Optional[str] = None
) -> str:
- """Generate a proper status template out of status, status_detail and optional swhid
-
- """
+ """Generate a proper status template out of status, status_detail and optional swhid"""
if swhid is not None:
template = (
STATUS_TEMPLATE % f"\n {swhid}"
)
return template.format(status=status, status_detail=status_detail, swhid=swhid)
template = STATUS_TEMPLATE % ""
return template.format(status=status, status_detail=status_detail)
def test_status_template():
actual_status = status_template(status="deposited")
assert (
actual_status
== """
42
deposited
"""
)
actual_status = status_template(status="verified", status_detail="detail")
assert (
actual_status
== """
42
verified
detail
"""
)
actual_status = status_template(
status="done", swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
)
assert (
actual_status
== """
42
done
swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74
"""
)
@pytest.fixture(scope="session")
def tmp_path(tmp_path_factory):
return tmp_path_factory.mktemp(__name__)
@pytest.fixture(scope="session")
def sample_metadata(tmp_path):
- """Returns a sample metadata file's path
-
- """
+ """Returns a sample metadata file's path"""
path = os.path.join(tmp_path, "metadata.xml")
with open(path, "w") as fd:
fd.write(SAMPLE_METADATA)
return path
@pytest.fixture(scope="session")
def sample_archive(tmp_path):
- """Returns a sample archive's path
-
- """
+ """Returns a sample archive's path"""
path = os.path.join(tmp_path, "archive.tar.gz")
with tarfile.open(path, "w:gz") as tf:
tf.addfile(tarfile.TarInfo("hello.py"), io.BytesIO(b'print("Hello world")'))
return path
def test_deposit_immediate_success(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
- """Both deposit creation and deposit metadata update passed without delays
-
- """
+ """Both deposit creation and deposit metadata update passed without delays"""
origin = compute_origin()
scenario = WebScenario()
status_xml = status_template(
status="done",
status_detail="",
swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74",
)
# Initial deposit
scenario.add_step(
- "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="done"),
+ "post",
+ f"{BASE_URL}/testcol/",
+ ENTRY_TEMPLATE.format(status="done"),
)
# Checker gets the SWHID
swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
- status_xml = status_template(status="done", status_detail="", swhid=swhid,)
+ status_xml = status_template(
+ status="done",
+ status_detail="",
+ swhid=swhid,
+ )
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
# Then the checker checks the metadata appeared on the website
scenario.add_step(
"get",
f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
f"?authority=deposit_client+http%3A%2F%2Ficinga-checker.example.org"
f"&after=2022-03-04T17%3A02%3A39%2B00%3A00",
[
{
"swhid": swhid,
"origin": origin,
"discovery_date": "2999-03-03T10:48:47+00:00",
"metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
}
],
)
scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA)
# Then metadata update
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
# internal deposit client does call status, then update metadata then status api
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_xml,
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_xml,
)
scenario.add_step(
- "put", f"{BASE_URL}/testcol/42/atom/", status_xml,
+ "put",
+ f"{BASE_URL}/testcol/42/atom/",
+ status_xml,
)
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_xml,
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_xml,
)
scenario.install_mock(requests_mock)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
]
)
assert result.output == (
"DEPOSIT OK - Deposit took 0.00s and succeeded.\n"
"| 'load_time' = 0.00s\n"
"| 'total_time' = 0.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 0.00s\n"
"DEPOSIT OK - Deposit Metadata update took 0.00s and succeeded.\n"
"| 'total_time' = 0.00s\n"
"| 'update_time' = 0.00s\n"
)
assert result.exit_code == 0, f"Unexpected output: {result.output}"
def test_deposit_delays(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
"""Deposit creation passed with some delays, deposit metadata update passed without
delay
"""
origin = compute_origin()
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_template(status="verified"),
)
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_template(status="loading"),
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_template(status="loading"),
)
# Deposit done, checker gets the SWHID
swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
- status_xml = status_template(status="done", status_detail="", swhid=swhid,)
+ status_xml = status_template(
+ status="done",
+ status_detail="",
+ swhid=swhid,
+ )
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
# Then the checker checks the metadata appeared on the website
scenario.add_step(
"get",
f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
f"?authority=deposit_client+http%3A%2F%2Ficinga-checker.example.org"
f"&after=2022-03-04T17%3A02%3A39%2B00%3A00",
[
{
"swhid": swhid,
"origin": origin,
"discovery_date": "2999-03-03T10:48:47+00:00",
"metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
}
],
)
scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA)
# Then metadata update
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
# internal deposit client does call status, then update metadata then status api
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_xml,
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_xml,
)
scenario.add_step(
- "put", f"{BASE_URL}/testcol/42/atom/", status_xml,
+ "put",
+ f"{BASE_URL}/testcol/42/atom/",
+ status_xml,
)
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_xml,
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_xml,
)
scenario.install_mock(requests_mock)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
]
)
assert result.output == (
"DEPOSIT OK - Deposit took 30.00s and succeeded.\n"
"| 'load_time' = 20.00s\n"
"| 'total_time' = 30.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
"DEPOSIT OK - Deposit Metadata update took 0.00s and succeeded.\n"
"| 'total_time' = 30.00s\n"
"| 'update_time' = 0.00s\n"
)
assert result.exit_code == 0, f"Unexpected output: {result.output}"
def test_deposit_then_metadata_update_failed(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
- """Deposit creation passed, deposit metadata update failed
-
- """
+ """Deposit creation passed, deposit metadata update failed"""
origin = compute_origin()
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_template(status="verified"),
)
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_template(status="loading"),
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_template(status="loading"),
)
# Deposit done, checker gets the SWHID
swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
- status_xml = status_template(status="done", status_detail="", swhid=swhid,)
+ status_xml = status_template(
+ status="done",
+ status_detail="",
+ swhid=swhid,
+ )
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
# Then the checker checks the metadata appeared on the website
scenario.add_step(
"get",
f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
f"?authority=deposit_client+http%3A%2F%2Ficinga-checker.example.org"
f"&after=2022-03-04T17%3A02%3A39%2B00%3A00",
[
{
"swhid": swhid,
"origin": origin,
"discovery_date": "2999-03-03T10:48:47+00:00",
"metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
}
],
)
scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA)
# Then metadata update calls
failed_status_xml = status_template(
status="failed", # lying here
status_detail="Failure to ingest",
swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74",
)
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", failed_status_xml)
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", failed_status_xml)
scenario.install_mock(requests_mock)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT OK - Deposit took 30.00s and succeeded.\n"
"| 'load_time' = 20.00s\n"
"| 'total_time' = 30.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
"DEPOSIT CRITICAL - Deposit Metadata update failed: You can only update "
"metadata on deposit with status 'done' \n"
"| 'total_time' = 30.00s\n"
"| 'update_time' = 0.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_delay_warning(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
- """Deposit creation exceeded delays, no deposit update occurred.
-
- """
+ """Deposit creation exceeded delays, no deposit update occurred."""
origin = compute_origin()
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_template(status="verified"),
)
# Deposit done, checker gets the SWHID
swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
- status_xml = status_template(status="done", status_detail="", swhid=swhid,)
+ status_xml = status_template(
+ status="done",
+ status_detail="",
+ swhid=swhid,
+ )
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
# Then the checker checks the metadata appeared on the website
scenario.add_step(
"get",
f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
f"?authority=deposit_client+http%3A%2F%2Ficinga-checker.example.org"
f"&after=2022-03-04T17%3A02%3A39%2B00%3A00",
[
{
"swhid": swhid,
"origin": origin,
"discovery_date": "2999-03-03T10:48:47+00:00",
"metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
}
],
)
scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA)
scenario.install_mock(requests_mock)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"--warning",
"15",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT WARNING - Deposit took 20.00s and succeeded.\n"
"| 'load_time' = 10.00s\n"
"| 'total_time' = 20.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 1, f"Unexpected output: {result.output}"
def test_deposit_delay_critical(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
origin = compute_origin()
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_template(status="verified"),
)
# Deposit done, checker gets the SWHID
swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
- status_xml = status_template(status="done", status_detail="", swhid=swhid,)
+ status_xml = status_template(
+ status="done",
+ status_detail="",
+ swhid=swhid,
+ )
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_xml,
callback=lambda: time.sleep(60),
)
# Then the checker checks the metadata appeared on the website
scenario.add_step(
"get",
f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
f"?authority=deposit_client+http%3A%2F%2Ficinga-checker.example.org"
f"&after=2022-03-04T17%3A02%3A39%2B00%3A00",
[
{
"swhid": swhid,
"origin": origin,
"discovery_date": "2999-03-03T10:48:47+00:00",
"metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
}
],
)
scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA)
scenario.install_mock(requests_mock)
result = invoke(
[
"--critical",
"50",
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Deposit took 80.00s and succeeded.\n"
"| 'load_time' = 70.00s\n"
"| 'total_time' = 80.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_timeout(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
scenario = WebScenario()
scenario.add_step(
"post",
f"{BASE_URL}/testcol/",
ENTRY_TEMPLATE.format(status="deposited"),
callback=lambda: time.sleep(1500),
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="verified"),
callback=lambda: time.sleep(1500),
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="loading"),
callback=lambda: time.sleep(1500),
)
scenario.install_mock(requests_mock)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Timed out while in status loading "
"(4520.0s seconds since deposit started)\n"
"| 'total_time' = 4520.00s\n"
"| 'upload_time' = 1500.00s\n"
"| 'validation_time' = 1510.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_metadata_missing(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
origin = compute_origin()
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_template(status="verified"),
)
# Deposit done, checker gets the SWHID
swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
- status_xml = status_template(status="done", status_detail="", swhid=swhid,)
+ status_xml = status_template(
+ status="done",
+ status_detail="",
+ swhid=swhid,
+ )
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_xml,
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_xml,
)
# Then the checker checks the metadata appeared on the website
metadata_list = [
{
# Filtered out, because wrong origin
"swhid": swhid,
"origin": "http://wrong-origin.example.org",
"discovery_date": "2999-03-03T10:48:47+00:00",
"metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
},
]
scenario.add_step(
"get",
f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
f"?authority=deposit_client+http%3A%2F%2Ficinga-checker.example.org"
f"&after=2022-03-04T17%3A02%3A39%2B00%3A00",
metadata_list,
)
scenario.install_mock(requests_mock)
result = invoke(
[
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
f"DEPOSIT CRITICAL - No recent metadata on {swhid} with origin {origin} in: "
f"{metadata_list!r}\n"
"| 'load_time' = 10.00s\n"
"| 'total_time' = 20.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_metadata_error(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_template(status="verified"),
)
# Deposit done, checker gets the SWHID
swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
- status_xml = status_template(status="done", status_detail="", swhid=swhid,)
+ status_xml = status_template(
+ status="done",
+ status_detail="",
+ swhid=swhid,
+ )
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_xml,
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_xml,
)
# Then the checker checks the metadata appeared on the website
scenario.add_step(
"get",
f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
f"?authority=deposit_client+http%3A%2F%2Ficinga-checker.example.org"
f"&after=2022-03-04T17%3A02%3A39%2B00%3A00",
"foo\nbar",
status_code=400,
)
scenario.install_mock(requests_mock)
result = invoke(
[
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Getting the list of metadata returned code 400: "
"b'foo\\nbar'\n"
"| 'load_time' = 10.00s\n"
"| 'total_time' = 20.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_metadata_corrupt(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
origin = compute_origin()
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_template(status="verified"),
)
# Deposit done, checker gets the SWHID
swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
- status_xml = status_template(status="done", status_detail="", swhid=swhid,)
+ status_xml = status_template(
+ status="done",
+ status_detail="",
+ swhid=swhid,
+ )
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_xml,
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_xml,
)
# Then the checker checks the metadata appeared on the website
metadata_list = [
{
"swhid": swhid,
"origin": origin,
"discovery_date": "2999-03-03T09:48:47+00:00",
"metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
},
]
scenario.add_step(
"get",
f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
f"?authority=deposit_client+http%3A%2F%2Ficinga-checker.example.org"
f"&after=2022-03-04T17%3A02%3A39%2B00%3A00",
metadata_list,
)
scenario.add_step(
"get",
f"{BASE_WEB_URL}/the-metadata-url",
SAMPLE_METADATA[0:-1], # corrupting the metadata by dropping the last byte
)
scenario.install_mock(requests_mock)
result = invoke(
[
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
f"DEPOSIT CRITICAL - Metadata on {swhid} with origin {origin} (at "
f"{BASE_WEB_URL}/the-metadata-url) differs from uploaded Atom document (at "
f"{sample_metadata})\n"
"| 'load_time' = 10.00s\n"
"| 'total_time' = 20.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_rejected(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="rejected", status_detail="booo"),
)
scenario.install_mock(requests_mock)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Deposit was rejected: booo\n"
"| 'total_time' = 10.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_failed(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_template(status="verified"),
)
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_template(status="loading"),
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_template(status="loading"),
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="failed", status_detail="booo"),
)
scenario.install_mock(requests_mock)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Deposit loading failed: booo\n"
"| 'load_time' = 20.00s\n"
"| 'total_time' = 30.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_unexpected_status(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_template(status="verified"),
)
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_template(status="loading"),
+ "get",
+ f"{BASE_URL}/testcol/42/status/",
+ status_template(status="loading"),
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="what", status_detail="booo"),
)
scenario.install_mock(requests_mock)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Deposit got unexpected status: what (booo)\n"
"| 'load_time' = 20.00s\n"
"| 'total_time' = 30.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
diff --git a/swh/icinga_plugins/tests/test_save_code_now.py b/swh/icinga_plugins/tests/test_save_code_now.py
index bd5e95a..03d72d6 100644
--- a/swh/icinga_plugins/tests/test_save_code_now.py
+++ b/swh/icinga_plugins/tests/test_save_code_now.py
@@ -1,258 +1,259 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime, timezone
import random
from typing import Dict, Optional, Tuple
import pytest
from swh.icinga_plugins.save_code_now import (
REPORT_MSG,
WAITING_STATUSES,
SaveCodeNowCheck,
)
from .utils import invoke
from .web_scenario import WebScenario
def fake_response(
origin: str,
visit_type: str,
sor_status: str = "pending",
task_status: Optional[str] = None,
) -> Dict:
"""Fake a save code now request api response"""
visit_date = None
if task_status in ("failed", "succeeded"):
visit_date = str(datetime.now(tz=timezone.utc))
return {
"visit_type": visit_type,
"origin_url": origin,
"save_request_date": "to-replace",
"save_request_status": sor_status,
"save_task_status": task_status,
"visit_date": visit_date,
}
@pytest.fixture
def origin_info() -> Tuple[str, str]:
- """Build an origin info to request save code now
-
- """
+ """Build an origin info to request save code now"""
origin_name = random.choice(range(10))
return random.choice(["git", "svn", "hg"]), f"mock://fake-origin-url/{origin_name}"
def test_save_code_now_success(requests_mock, mocker, mocked_time, origin_info):
"""Successful ingestion scenario below threshold"""
scenario = WebScenario()
visit_type, origin = origin_info
root_api_url = "mock://swh-web.example.org"
api_url = SaveCodeNowCheck.api_url_scn(root_api_url, origin, visit_type)
# creation request
scenario.add_step(
"post",
api_url,
fake_response(origin, visit_type, "accepted", "not yet scheduled"),
)
response_scheduled = fake_response(origin, visit_type, "accepted", "scheduled")
# status polling requests
scenario.add_step("get", api_url, [response_scheduled])
# sometimes we can have multiple response so we fake that here
scenario.add_step("get", api_url, [response_scheduled, response_scheduled])
scenario.add_step(
"get", api_url, [fake_response(origin, visit_type, "accepted", "succeeded")]
)
scenario.install_mock(requests_mock)
# fmt: off
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory", "/tmp",
"check-savecodenow", "--swh-web-url", root_api_url,
"origin", origin,
"--visit-type", visit_type,
]
)
# fmt: on
assert result.output == (
f"{SaveCodeNowCheck.TYPE} OK - {REPORT_MSG} {origin_info} took "
f"30.00s and succeeded.\n"
f"| 'total_time' = 30.00s\n"
)
assert result.exit_code == 0, f"Unexpected result: {result.output}"
def test_save_code_now_failure(requests_mock, mocker, mocked_time, origin_info):
"""Failed ingestion scenario should be reported"""
scenario = WebScenario()
visit_type, origin = origin_info
root_api_url = "mock://swh-web.example.org"
api_url = SaveCodeNowCheck.api_url_scn(root_api_url, origin, visit_type)
# creation request
scenario.add_step(
"post",
api_url,
fake_response(origin, visit_type, "accepted", "not yet scheduled"),
)
# status polling requests
scenario.add_step(
"get", api_url, [fake_response(origin, visit_type, "accepted", "scheduled")]
)
scenario.add_step(
"get", api_url, [fake_response(origin, visit_type, "accepted", "failed")]
)
scenario.install_mock(requests_mock)
# fmt: off
result = invoke(
[
"check-savecodenow", "--swh-web-url", root_api_url,
"origin", origin,
"--visit-type", visit_type,
],
catch_exceptions=True,
)
# fmt: on
assert result.output == (
f"{SaveCodeNowCheck.TYPE} CRITICAL - {REPORT_MSG} {origin_info} took "
f"20.00s and failed.\n"
f"| 'total_time' = 20.00s\n"
)
assert result.exit_code == 2, f"Unexpected result: {result.output}"
def test_save_code_now_pending_state_unsupported(
requests_mock, mocker, mocked_time, origin_info
):
"""Pending save requests are not supported in the test so they should fail early
Pending requests are requests that need a moderator to accept the repository into
the save code now flow.
Do not actually use such origin to trigger the checks.
"""
scenario = WebScenario()
visit_type, origin = origin_info
root_api_url = "mock://swh-web2.example.org"
api_url = SaveCodeNowCheck.api_url_scn(root_api_url, origin, visit_type)
# creation request
scenario.add_step(
- "post", api_url, fake_response(origin, visit_type, "pending", "not created"),
+ "post",
+ api_url,
+ fake_response(origin, visit_type, "pending", "not created"),
)
scenario.install_mock(requests_mock)
# fmt: off
result = invoke(
[
"check-savecodenow", "--swh-web-url", root_api_url,
"origin", origin,
"--visit-type", visit_type,
],
catch_exceptions=True,
)
# fmt: on
assert result.output == (
f"{SaveCodeNowCheck.TYPE} CRITICAL - {REPORT_MSG} {origin_info} took "
f"0.00s and resulted in unsupported status: pending ; not created.\n"
f"| 'total_time' = 0.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_save_code_now_threshold_exceeded(
requests_mock, mocker, mocked_time, origin_info
):
- """Saving requests exceeding threshold should mention warning in output
-
- """
+ """Saving requests exceeding threshold should mention warning in output"""
scenario = WebScenario()
visit_type, origin = origin_info
root_api_url = "mock://swh-web2.example.org"
api_url = SaveCodeNowCheck.api_url_scn(root_api_url, origin, visit_type)
# creation request
scenario.add_step(
"post",
api_url,
fake_response(origin, visit_type, "accepted", "not yet scheduled"),
)
# we'll make the response being in the awaiting status
# beyond 13, this will exceed the threshold
for i in range(13):
waiting_status = random.choice(WAITING_STATUSES)
response_scheduled = fake_response(
origin, visit_type, "accepted", waiting_status
)
scenario.add_step("get", api_url, [response_scheduled])
scenario.install_mock(requests_mock)
# fmt: off
result = invoke(
[
- "check-savecodenow", "--swh-web-url", root_api_url,
+ "check-savecodenow",
+ "--swh-web-url", root_api_url,
"origin", origin,
"--visit-type", visit_type,
],
catch_exceptions=True,
)
# fmt: on
assert result.output == (
f"{SaveCodeNowCheck.TYPE} CRITICAL - {REPORT_MSG} {origin_info} took "
f"more than 130.00s and has status: {waiting_status}.\n"
f"| 'total_time' = 130.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_save_code_now_unexpected_failure(
requests_mock, mocker, mocked_time, origin_info
):
"""Unexpected failure if the webapi refuses to answer for example"""
scenario = WebScenario()
visit_type, origin = origin_info
root_api_url = "mock://swh-web.example.org"
api_url = SaveCodeNowCheck.api_url_scn(root_api_url, origin, visit_type)
# creation request
scenario.add_step(
"post",
api_url,
fake_response(origin, visit_type, "accepted", "not yet scheduled"),
)
# status polling requests
scenario.add_step(
"get", api_url, [fake_response(origin, visit_type, "accepted", "scheduled")]
)
# unexpected issue when communicating with the api
scenario.add_step("get", api_url, {}, status_code=500)
scenario.install_mock(requests_mock)
with pytest.raises(AssertionError):
# fmt: off
invoke(
[
"check-savecodenow", "--swh-web-url", root_api_url,
"origin", origin,
"--visit-type", visit_type,
],
)
# fmt: on
diff --git a/swh/icinga_plugins/tests/test_vault.py b/swh/icinga_plugins/tests/test_vault.py
index e421a90..07beb09 100644
--- a/swh/icinga_plugins/tests/test_vault.py
+++ b/swh/icinga_plugins/tests/test_vault.py
@@ -1,530 +1,551 @@
# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import io
import tarfile
import time
from swh.icinga_plugins.tests.utils import invoke
from .web_scenario import WebScenario
DIR_ID = "ab" * 20
url_api = f"mock://swh-web.example.org/api/1/vault/directory/{DIR_ID}/"
url_fetch = f"mock://swh-web.example.org/api/1/vault/directory/{DIR_ID}/raw/"
def _make_tarfile():
fd = io.BytesIO()
with tarfile.open(fileobj=fd, mode="w:gz") as tf:
tf.addfile(tarfile.TarInfo(f"swh:1:dir:{DIR_ID}/README"), b"this is a readme\n")
tarinfo = tarfile.TarInfo(f"swh:1:dir:{DIR_ID}")
tarinfo.type = tarfile.DIRTYPE
tf.addfile(tarinfo)
return fd.getvalue()
TARBALL = _make_tarfile()
response_pending = {
"obj_id": DIR_ID,
"obj_type": "directory",
"progress_message": "foo",
"status": "pending",
}
response_done = {
"fetch_url": url_fetch,
"id": 9,
"obj_id": DIR_ID,
"obj_type": "directory",
"status": "done",
}
response_done_no_fetch = {
"id": 9,
"obj_id": DIR_ID,
"obj_type": "directory",
"status": "done",
}
response_failed = {
"obj_id": DIR_ID,
"obj_type": "directory",
"progress_message": "foobar",
"status": "failed",
}
response_unknown_status = {
"obj_id": DIR_ID,
"obj_type": "directory",
"progress_message": "what",
"status": "boo",
}
class FakeStorage:
def __init__(self, foo, **kwargs):
pass
def directory_get_random(self):
return bytes.fromhex(DIR_ID)
def test_vault_immediate_success(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
"get", url_fetch, TARBALL, headers={"Content-Type": "application/gzip"}
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
]
)
assert result.output == (
f"VAULT OK - cooking directory {DIR_ID} took "
f"10.00s and succeeded.\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 0, result.output
def test_vault_delayed_success(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
"get", url_fetch, TARBALL, headers={"Content-Type": "application/gzip"}
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
]
)
assert result.output == (
f"VAULT OK - cooking directory {DIR_ID} took "
f"20.00s and succeeded.\n"
f"| 'total_time' = 20.00s\n"
)
assert result.exit_code == 0, result.output
def test_vault_failure(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_failed)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
f"VAULT CRITICAL - cooking directory {DIR_ID} took "
f"10.00s and failed with: foobar\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_unknown_status(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_unknown_status)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
f"VAULT CRITICAL - cooking directory {DIR_ID} took "
f"10.00s and resulted in unknown status: boo\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_timeout(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_pending)
scenario.add_step(
"get", url_api, response_pending, callback=lambda: time.sleep(4000)
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
f"VAULT CRITICAL - cooking directory {DIR_ID} took more than "
f"4020.00s and has status: foo\n"
f"| 'total_time' = 4020.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_cached_directory(requests_mock, mocker, mocked_time):
"""First serves a directory that's already in the cache, to
test that vault_check requests another one."""
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=200)
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
"get", url_fetch, TARBALL, headers={"Content-Type": "application/gzip"}
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
]
)
assert result.output == (
f"VAULT OK - cooking directory {DIR_ID} took "
f"10.00s and succeeded.\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 0, result.output
def test_vault_no_directory(requests_mock, mocker, mocked_time):
"""Tests with an empty storage"""
scenario = WebScenario()
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
mocker.patch(f"{__name__}.FakeStorage.directory_get_random", return_value=None)
result = invoke(
[
+ "--prometheus-exporter",
+ "--prometheus-exporter-directory",
+ "/tmp",
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == ("VAULT CRITICAL - No directory exists in the archive.\n")
assert result.exit_code == 2, result.output
def test_vault_fetch_failed(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
"get",
url_fetch,
"",
status_code=500,
headers={"Content-Type": "application/gzip"},
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
f"VAULT CRITICAL - cooking directory {DIR_ID} took "
f"10.00s and succeeded, but fetch failed with status code 500.\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_fetch_missing_content_type(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step("get", url_fetch, "")
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
"VAULT CRITICAL - Unexpected Content-Type when downloading bundle: None\n"
"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_corrupt_tarball_gzip(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
"get",
url_fetch,
b"this-is-not-a-tarball",
headers={"Content-Type": "application/gzip"},
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
"VAULT CRITICAL - ReadError while reading tarball: not a gzip file\n"
"| 'total_time' = 20.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_corrupt_tarball_member(requests_mock, mocker, mocked_time):
fd = io.BytesIO()
with tarfile.open(fileobj=fd, mode="w:gz") as tf:
tf.addfile(tarfile.TarInfo("wrong_dir_name/README"), b"this is a readme\n")
tarball = fd.getvalue()
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
- "get", url_fetch, tarball, headers={"Content-Type": "application/gzip"},
+ "get",
+ url_fetch,
+ tarball,
+ headers={"Content-Type": "application/gzip"},
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
"VAULT CRITICAL - Unexpected member in tarball: wrong_dir_name/README\n"
"| 'total_time' = 20.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_empty_tarball(requests_mock, mocker, mocked_time):
fd = io.BytesIO()
with tarfile.open(fileobj=fd, mode="w:gz"):
pass
tarball = fd.getvalue()
print(tarball)
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_pending)
scenario.add_step("get", url_api, response_done)
scenario.add_step(
- "get", url_fetch, tarball, headers={"Content-Type": "application/gzip"},
+ "get",
+ url_fetch,
+ tarball,
+ headers={"Content-Type": "application/gzip"},
)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
# This error message will need to be updated when https://bugs.python.org/issue46922
# is resolved.
assert result.output == (
"VAULT CRITICAL - StreamError while reading tarball (empty file?): "
"seeking backwards is not allowed\n"
"| 'total_time' = 20.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_no_fetch_url(requests_mock, mocker, mocked_time):
scenario = WebScenario()
scenario.add_step("get", url_api, {}, status_code=404)
scenario.add_step("post", url_api, response_pending)
scenario.add_step("get", url_api, response_done_no_fetch)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
f"VAULT CRITICAL - cooking directory {DIR_ID} took 10.00s and succeeded, "
f"but API response did not contain a fetch_url.\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 2, result.output
diff --git a/swh/icinga_plugins/tests/web_scenario.py b/swh/icinga_plugins/tests/web_scenario.py
index 18a7e90..454bb47 100644
--- a/swh/icinga_plugins/tests/web_scenario.py
+++ b/swh/icinga_plugins/tests/web_scenario.py
@@ -1,94 +1,96 @@
# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Wrapper around requests-mock to mock successive responses
from a web service.
Tests can build successive steps by calling :py:meth:`WebScenario.add_step`
with specifications of what endpoints should be called and in what order."""
import dataclasses
import json
from typing import Callable, Dict, List, Optional, Set, Union
import requests_mock
@dataclasses.dataclass(frozen=True)
class Step:
expected_method: str
expected_url: str
response: Union[str, bytes, Dict, List]
status_code: int = 200
headers: Dict[str, str] = dataclasses.field(default_factory=dict)
callback: Optional[Callable[[], int]] = None
@dataclasses.dataclass(frozen=True)
class Endpoint:
method: str
url: str
class WebScenario:
"""Stores the state of the successive calls to the web service
expected by tests."""
_steps: List[Step]
_endpoints: Set[Endpoint]
_current_step: int
def __init__(self):
self._steps = []
self._endpoints = set()
self._current_step = 0
def add_endpoint(self, *args, **kwargs):
"""Adds an endpoint to be mocked.
Arguments are the same as :py:class:Endpoint.
"""
self._endpoints.add(Endpoint(*args, **kwargs))
def add_step(self, *args, **kwargs):
"""Adds an expected call to the list of expected calls.
Also automatically calls :py:meth:`add_endpoint` so the
associated endpoint is mocked.
Arguments are the same as :py:class:`Step`.
"""
step = Step(*args, **kwargs)
self._steps.append(step)
self.add_endpoint(step.expected_method, step.expected_url)
def install_mock(self, mocker: requests_mock.Mocker):
"""Mocks entrypoints registered with :py:meth:`add_endpoint`
(or :py:meth:`add_step`) using the provided mocker.
"""
for endpoint in self._endpoints:
mocker.register_uri(
- endpoint.method.upper(), endpoint.url, content=self._request_callback,
+ endpoint.method.upper(),
+ endpoint.url,
+ content=self._request_callback,
)
def _request_callback(self, request, context):
step = self._steps[self._current_step]
assert request.url == step.expected_url
assert request.method.upper() == step.expected_method.upper()
self._current_step += 1
context.status_code = step.status_code
context.headers.update(step.headers)
if step.callback:
step.callback()
if isinstance(step.response, str):
return step.response.encode()
elif isinstance(step.response, bytes):
return step.response
else:
return json.dumps(step.response).encode()
diff --git a/swh/icinga_plugins/vault.py b/swh/icinga_plugins/vault.py
index 25d8693..8265920 100644
--- a/swh/icinga_plugins/vault.py
+++ b/swh/icinga_plugins/vault.py
@@ -1,174 +1,213 @@
-# Copyright (C) 2019 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import tarfile
import time
+from typing import List
import requests
from swh.storage import get_storage
from .base_check import BaseCheck
class NoDirectory(Exception):
pass
class VaultCheck(BaseCheck):
TYPE = "VAULT"
DEFAULT_WARNING_THRESHOLD = 0
DEFAULT_CRITICAL_THRESHOLD = 3600
def __init__(self, obj):
- super().__init__(obj)
+ super().__init__(obj, application="vault")
self._swh_storage = get_storage("remote", url=obj["swh_storage_url"])
self._swh_web_url = obj["swh_web_url"]
self._poll_interval = obj["poll_interval"]
+ self.register_prometheus_gauge("status", "")
+ self.register_prometheus_gauge("duration", "seconds", ["step", "status"])
+
def _url_for_dir(self, dir_id):
return self._swh_web_url + f"/api/1/vault/directory/{dir_id.hex()}/"
def _pick_directory(self):
dir_ = self._swh_storage.directory_get_random()
if dir_ is None:
raise NoDirectory()
return dir_
def _pick_uncached_directory(self):
while True:
dir_id = self._pick_directory()
response = requests.get(self._url_for_dir(dir_id))
if response.status_code == 404:
return dir_id
+ def _collect_prometheus_metrics(
+ self, status: int, duration: float, labels: List[str]
+ ) -> None:
+ self.collect_prometheus_metric("status", status)
+ self.collect_prometheus_metric(
+ "duration", duration, labels,
+ )
+
def main(self):
try:
dir_id = self._pick_uncached_directory()
except NoDirectory:
self.print_result("CRITICAL", "No directory exists in the archive.")
return 2
start_time = time.time()
total_time = 0
response = requests.post(self._url_for_dir(dir_id))
assert response.status_code == 200, (response, response.text)
result = response.json()
while result["status"] in ("new", "pending"):
time.sleep(self._poll_interval)
response = requests.get(self._url_for_dir(dir_id))
assert response.status_code == 200, (response, response.text)
result = response.json()
total_time = time.time() - start_time
if total_time > self.critical_threshold:
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took more than "
f"{total_time:.2f}s and has status: "
f'{result["progress_message"]}',
total_time=total_time,
)
+
+ self._collect_prometheus_metrics(2, total_time, ["cooking", "timeout"])
+
return 2
if result["status"] == "failed":
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
f'and failed with: {result["progress_message"]}',
total_time=total_time,
)
+
+ self._collect_prometheus_metrics(2, total_time, ["cooking", "failed"])
+
return 2
elif result["status"] != "done":
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
f'and resulted in unknown status: {result["status"]}',
total_time=total_time,
)
+
+ self._collect_prometheus_metrics(2, total_time, ["cooking", "unknown"])
return 2
(status_code, status) = self.get_status(total_time)
if "fetch_url" not in result:
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
f"and succeeded, but API response did not contain a fetch_url.",
total_time=total_time,
)
+ self._collect_prometheus_metrics(2, total_time, ["fetch", "no_url"])
return 2
with requests.get(result["fetch_url"], stream=True) as fetch_response:
try:
fetch_response.raise_for_status()
except requests.HTTPError:
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
f"and succeeded, but fetch failed with status code "
f"{fetch_response.status_code}.",
total_time=total_time,
)
+ self._collect_prometheus_metrics(2, total_time, ["fetch", "error"])
return 2
content_type = fetch_response.headers.get("Content-Type")
if content_type != "application/gzip":
self.print_result(
"CRITICAL",
f"Unexpected Content-Type when downloading bundle: {content_type}",
total_time=total_time,
)
+ self._collect_prometheus_metrics(
+ 2, total_time, ["download", "unexpected_content_type"]
+ )
return 2
try:
with tarfile.open(fileobj=fetch_response.raw, mode="r|gz") as tf:
# Note that we are streaming the tarfile from the network,
# so we are allowed at most one pass on the tf object;
# and the sooner we close it the better.
# Fortunately, checking only the first member is good enough:
tarinfo = tf.next()
swhid = f"swh:1:dir:{dir_id.hex()}"
if tarinfo.name != swhid and not tarinfo.name.startswith(
f"{swhid}/"
):
self.print_result(
"CRITICAL",
f"Unexpected member in tarball: {tarinfo.name}",
total_time=total_time,
)
+ self._collect_prometheus_metrics(
+ 2, total_time, ["check", "archive_content"]
+ )
return 2
except tarfile.ReadError as e:
self.print_result(
"CRITICAL",
f"ReadError while reading tarball: {e}",
total_time=total_time,
)
+ self._collect_prometheus_metrics(
+ 2, total_time, ["check", "archive_content"]
+ )
return 2
except tarfile.StreamError as e:
if e.args[0] == "seeking backwards is not allowed":
# Probably https://bugs.python.org/issue46922
self.print_result(
"CRITICAL",
f"StreamError while reading tarball (empty file?): {e}",
total_time=total_time,
)
+ self._collect_prometheus_metrics(
+ 2, total_time, ["check", "archive_content"]
+ )
return 2
self.print_result(
"CRITICAL",
f"StreamError while reading tarball: {e}",
total_time=total_time,
)
+ self._collect_prometheus_metrics(
+ 2, total_time, ["check", "archive_content"]
+ )
return 2
self.print_result(
status,
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
f"and succeeded.",
total_time=total_time,
)
+
+ self._collect_prometheus_metrics(status_code, total_time, ["end", ""])
return status_code
diff --git a/tox.ini b/tox.ini
index 601a983..9b44907 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,73 +1,74 @@
[tox]
envlist=black,flake8,mypy,py3
[testenv:py3]
deps =
.[testing]
pytest-cov
commands =
pytest --doctest-modules \
{envsitepackagesdir}/swh/icinga_plugins \
--cov={envsitepackagesdir}/swh/icinga_plugins \
--cov-branch {posargs}
[testenv:black]
skip_install = true
deps =
- black==19.10b0
+ black==22.3.0
commands =
{envpython} -m black --check swh
[testenv:flake8]
skip_install = true
deps =
- flake8
+ flake8==4.0.1
+ flake8-bugbear==22.3.23
commands =
{envpython} -m flake8
[testenv:mypy]
skip_install = true
deps =
.[testing]
- mypy==0.920
+ mypy==0.942
commands =
mypy swh
# build documentation outside swh-environment using the current
# git HEAD of swh-docs, is executed on CI for each diff to prevent
# breaking doc build
[testenv:sphinx]
whitelist_externals = make
usedevelop = true
extras =
testing
deps =
# fetch and install swh-docs in develop mode
-e git+https://forge.softwareheritage.org/source/swh-docs#egg=swh.docs
setenv =
SWH_PACKAGE_DOC_TOX_BUILD = 1
# turn warnings into errors
SPHINXOPTS = -W
commands =
make -I ../.tox/sphinx/src/swh-docs/swh/ -C docs
# build documentation only inside swh-environment using local state
# of swh-docs package
[testenv:sphinx-dev]
whitelist_externals = make
usedevelop = true
extras =
testing
deps =
# install swh-docs in develop mode
-e ../swh-docs
setenv =
SWH_PACKAGE_DOC_TOX_BUILD = 1
# turn warnings into errors
SPHINXOPTS = -W
commands =
make -I ../.tox/sphinx-dev/src/swh-docs/swh/ -C docs