diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3111d72..05398bb 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,37 +1,42 @@
repos:
-- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v2.4.0
- hooks:
- - id: trailing-whitespace
- - id: check-json
- - id: check-yaml
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.1.0
+ hooks:
+ - id: trailing-whitespace
+ - id: check-json
+ - id: check-yaml
-- repo: https://gitlab.com/pycqa/flake8
- rev: 3.8.3
- hooks:
- - id: flake8
+ - repo: https://gitlab.com/pycqa/flake8
+ rev: 4.0.1
+ hooks:
+ - id: flake8
-- repo: https://github.com/codespell-project/codespell
- rev: v1.16.0
- hooks:
- - id: codespell
+ - repo: https://github.com/codespell-project/codespell
+ rev: v2.1.0
+ hooks:
+ - id: codespell
+ name: Check source code spelling
+ stages: [commit]
+ - id: codespell
+ name: Check commit message spelling
+ stages: [commit-msg]
-- repo: local
- hooks:
- - id: mypy
- name: mypy
- entry: mypy
- args: [swh]
- pass_filenames: false
- language: system
- types: [python]
+ - repo: local
+ hooks:
+ - id: mypy
+ name: mypy
+ entry: mypy
+ args: [swh]
+ pass_filenames: false
+ language: system
+ types: [python]
-- repo: https://github.com/PyCQA/isort
- rev: 5.5.2
- hooks:
- - id: isort
+ - repo: https://github.com/PyCQA/isort
+ rev: 5.10.1
+ hooks:
+ - id: isort
-- repo: https://github.com/python/black
- rev: 19.10b0
- hooks:
- - id: black
+ - repo: https://github.com/python/black
+ rev: 19.10b0
+ hooks:
+ - id: black
diff --git a/PKG-INFO b/PKG-INFO
index 87bb719..d173898 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,29 +1,29 @@
Metadata-Version: 2.1
Name: swh.icinga_plugins
-Version: 0.3.2
+Version: 0.4.0
Summary: Icinga plugins for Software Heritage infrastructure monitoring
Home-page: https://forge.softwareheritage.org/diffusion/swh-icinga-plugins
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
Project-URL: Funding, https://www.softwareheritage.org/donate
Project-URL: Source, https://forge.softwareheritage.org/source/swh-icinga-plugins
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
Classifier: Operating System :: OS Independent
Classifier: Development Status :: 3 - Alpha
Requires-Python: >=3.7
Description-Content-Type: text/markdown
Provides-Extra: testing
License-File: LICENSE
License-File: AUTHORS
swh-icinga-plugins
==================
Scripts for end-to-end monitoring of the SWH infrastructure
diff --git a/requirements-test.txt b/requirements-test.txt
index e470c85..888161b 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,7 +1,7 @@
-pytest
+pytest < 7.0.0 # v7.0.0 removed _pytest.tmpdir.TempdirFactory, which is used by some of the pytest plugins we use
pytest-mock
requests-mock
types-click
types-requests
types-python-dateutil
types-PyYAML
diff --git a/swh.icinga_plugins.egg-info/PKG-INFO b/swh.icinga_plugins.egg-info/PKG-INFO
index 36c78fd..81453f7 100644
--- a/swh.icinga_plugins.egg-info/PKG-INFO
+++ b/swh.icinga_plugins.egg-info/PKG-INFO
@@ -1,29 +1,29 @@
Metadata-Version: 2.1
Name: swh.icinga-plugins
-Version: 0.3.2
+Version: 0.4.0
Summary: Icinga plugins for Software Heritage infrastructure monitoring
Home-page: https://forge.softwareheritage.org/diffusion/swh-icinga-plugins
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
Project-URL: Funding, https://www.softwareheritage.org/donate
Project-URL: Source, https://forge.softwareheritage.org/source/swh-icinga-plugins
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
Classifier: Operating System :: OS Independent
Classifier: Development Status :: 3 - Alpha
Requires-Python: >=3.7
Description-Content-Type: text/markdown
Provides-Extra: testing
License-File: LICENSE
License-File: AUTHORS
swh-icinga-plugins
==================
Scripts for end-to-end monitoring of the SWH infrastructure
diff --git a/swh.icinga_plugins.egg-info/entry_points.txt b/swh.icinga_plugins.egg-info/entry_points.txt
index 1d44810..7db0b62 100644
--- a/swh.icinga_plugins.egg-info/entry_points.txt
+++ b/swh.icinga_plugins.egg-info/entry_points.txt
@@ -1,4 +1,2 @@
-
- [swh.cli.subcommands]
- icinga_plugins=swh.icinga_plugins.cli
-
\ No newline at end of file
+[swh.cli.subcommands]
+icinga_plugins = swh.icinga_plugins.cli
diff --git a/swh.icinga_plugins.egg-info/requires.txt b/swh.icinga_plugins.egg-info/requires.txt
index f3113ea..eb46405 100644
--- a/swh.icinga_plugins.egg-info/requires.txt
+++ b/swh.icinga_plugins.egg-info/requires.txt
@@ -1,15 +1,15 @@
click
psycopg2
requests
swh.core[http]>=0.3
swh.deposit>=0.3
swh.storage>=0.0.162
[testing]
-pytest
+pytest<7.0.0
pytest-mock
requests-mock
types-click
types-requests
types-python-dateutil
types-PyYAML
diff --git a/swh/__init__.py b/swh/__init__.py
index 8d9f151..b36383a 100644
--- a/swh/__init__.py
+++ b/swh/__init__.py
@@ -1,4 +1,3 @@
from pkgutil import extend_path
-from typing import List
-__path__: List[str] = extend_path(__path__, __name__)
+__path__ = extend_path(__path__, __name__)
diff --git a/swh/icinga_plugins/base_check.py b/swh/icinga_plugins/base_check.py
index ce60aed..7110a9e 100644
--- a/swh/icinga_plugins/base_check.py
+++ b/swh/icinga_plugins/base_check.py
@@ -1,27 +1,32 @@
-# Copyright (C) 2019 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from typing import Dict
+
class BaseCheck:
- def __init__(self, obj):
- self.warning_threshold = obj.get(
- "warning_threshold", self.DEFAULT_WARNING_THRESHOLD
+ DEFAULT_WARNING_THRESHOLD = 60
+ DEFAULT_CRITICAL_THRESHOLD = 120
+
+ def __init__(self, obj: Dict[str, str]) -> None:
+ self.warning_threshold = float(
+ obj.get("warning_threshold", self.DEFAULT_WARNING_THRESHOLD)
)
- self.critical_threshold = obj.get(
- "critical_threshold", self.DEFAULT_CRITICAL_THRESHOLD
+ self.critical_threshold = float(
+ obj.get("critical_threshold", self.DEFAULT_CRITICAL_THRESHOLD)
)
def get_status(self, value):
if self.critical_threshold and value >= self.critical_threshold:
return (2, "CRITICAL")
elif self.warning_threshold and value >= self.warning_threshold:
return (1, "WARNING")
else:
return (0, "OK")
def print_result(self, status_type, status_string, **metrics):
print(f"{self.TYPE} {status_type} - {status_string}")
for (metric_name, metric_value) in sorted(metrics.items()):
print(f"| '{metric_name}' = {metric_value:.2f}s")
diff --git a/swh/icinga_plugins/cli.py b/swh/icinga_plugins/cli.py
index a98906f..b9412ae 100644
--- a/swh/icinga_plugins/cli.py
+++ b/swh/icinga_plugins/cli.py
@@ -1,135 +1,147 @@
# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
# WARNING: do not import unnecessary things here to keep cli startup time under
# control
import sys
import click
from swh.core.cli import CONTEXT_SETTINGS
from swh.core.cli import swh as swh_cli_group
@swh_cli_group.group(name="icinga_plugins", context_settings=CONTEXT_SETTINGS)
@click.option("-w", "--warning", type=int, help="Warning threshold.")
@click.option("-c", "--critical", type=int, help="Critical threshold.")
@click.pass_context
def icinga_cli_group(ctx, warning, critical):
"""Main command for Icinga plugins
"""
ctx.ensure_object(dict)
if warning:
ctx.obj["warning_threshold"] = int(warning)
if critical:
ctx.obj["critical_threshold"] = int(critical)
@icinga_cli_group.group(name="check-vault")
@click.option(
"--swh-storage-url", type=str, required=True, help="URL to an swh-storage HTTP API"
)
@click.option(
"--swh-web-url", type=str, required=True, help="URL to an swh-web instance"
)
@click.option(
"--poll-interval",
type=int,
default=10,
help="Interval (in seconds) between two polls to the API, "
"to check for cooking status.",
)
@click.pass_context
def check_vault(ctx, **kwargs):
ctx.obj.update(kwargs)
@check_vault.command(name="directory")
@click.pass_context
def check_vault_directory(ctx):
"""Picks a random directory, requests its cooking via swh-web,
and waits for completion."""
from .vault import VaultCheck
sys.exit(VaultCheck(ctx.obj).main())
@icinga_cli_group.group(name="check-savecodenow")
@click.option(
"--swh-web-url", type=str, required=True, help="URL to an swh-web instance"
)
@click.option(
"--poll-interval",
type=int,
default=10,
help="Interval (in seconds) between two polls to the API, "
"to check for save code now status.",
)
@click.pass_context
def check_scn(ctx, **kwargs):
ctx.obj.update(kwargs)
@check_scn.command(name="origin")
@click.argument("origin", type=str)
@click.option("--visit-type", type=str, required=True, help="Visit type for origin")
@click.pass_context
def check_scn_origin(ctx, origin, visit_type):
"""Requests a save code now via the api for a given origin with type visit_type, waits
for its completion, report approximate time of completion (failed or succeeded) and
warn if threshold exceeded.
"""
from .save_code_now import SaveCodeNowCheck
sys.exit(SaveCodeNowCheck(ctx.obj, origin, visit_type).main())
@icinga_cli_group.group(name="check-deposit")
@click.option(
"--server",
type=str,
default="https://deposit.softwareheritage.org/1",
help="URL to the SWORD server to test",
)
+@click.option(
+ "--provider-url",
+ type=str,
+ required=True,
+ help=(
+ "Root URL of the deposit client, as defined in the "
+ "'deposit_client.provider_url' column in the deposit DB"
+ ),
+)
@click.option("--username", type=str, required=True, help="Login for the SWORD server")
@click.option(
"--password", type=str, required=True, help="Password for the SWORD server"
)
@click.option(
"--collection",
type=str,
required=True,
help="Software collection to use on the SWORD server",
)
@click.option(
"--poll-interval",
type=int,
default=10,
help="Interval (in seconds) between two polls to the API, "
"to check for ingestion status.",
)
+@click.option(
+ "--swh-web-url", type=str, required=True, help="URL to an swh-web instance"
+)
@click.pass_context
def check_deposit(ctx, **kwargs):
ctx.obj.update(kwargs)
@check_deposit.command(name="single")
@click.option(
"--archive", type=click.Path(), required=True, help="Software artefact to upload"
)
@click.option(
"--metadata",
type=click.Path(),
required=True,
help="Metadata file for the software artefact.",
)
@click.pass_context
def check_deposit_single(ctx, **kwargs):
"""Checks the provided archive and metadata file and be deposited."""
from .deposit import DepositCheck
ctx.obj.update(kwargs)
sys.exit(DepositCheck(ctx.obj).main())
diff --git a/swh/icinga_plugins/deposit.py b/swh/icinga_plugins/deposit.py
index e7221ef..17c5a57 100644
--- a/swh/icinga_plugins/deposit.py
+++ b/swh/icinga_plugins/deposit.py
@@ -1,181 +1,255 @@
# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
import sys
import time
from typing import Any, Dict, Optional
+import requests
+
from swh.deposit.client import PublicApiDepositClient
from .base_check import BaseCheck
class DepositCheck(BaseCheck):
TYPE = "DEPOSIT"
DEFAULT_WARNING_THRESHOLD = 120
DEFAULT_CRITICAL_THRESHOLD = 3600
def __init__(self, obj):
super().__init__(obj)
+ self.api_url = obj["swh_web_url"].rstrip("/")
self._poll_interval = obj["poll_interval"]
self._archive_path = obj["archive"]
self._metadata_path = obj["metadata"]
self._collection = obj["collection"]
self._slug: Optional[str] = None
+ self._provider_url = obj["provider_url"]
self._client = PublicApiDepositClient(
{
"url": obj["server"],
"auth": {"username": obj["username"], "password": obj["password"]},
}
)
def upload_deposit(self):
- slug = "check-deposit-%s" % datetime.datetime.now().isoformat()
+ slug = (
+ "check-deposit-%s"
+ % datetime.datetime.fromtimestamp(time.time()).isoformat()
+ )
result = self._client.deposit_create(
archive=self._archive_path,
metadata=self._metadata_path,
collection=self._collection,
in_progress=False,
slug=slug,
)
self._slug = slug
self._deposit_id = result["deposit_id"]
return result
def update_deposit_with_metadata(self) -> Dict[str, Any]:
"""Trigger a metadata update on the deposit once it's completed.
"""
deposit = self.get_deposit_status()
swhid = deposit["deposit_swh_id"]
assert deposit["deposit_id"] == self._deposit_id
# We can reuse the initial metadata file we already sent
return self._client.deposit_update(
self._collection,
self._deposit_id,
self._slug,
metadata=self._metadata_path,
swhid=swhid,
)
def get_deposit_status(self):
return self._client.deposit_status(
collection=self._collection, deposit_id=self._deposit_id
)
def wait_while_status(self, statuses, start_time, metrics, result):
while result["deposit_status"] in statuses:
metrics["total_time"] = time.time() - start_time
if metrics["total_time"] > self.critical_threshold:
self.print_result(
"CRITICAL",
f"Timed out while in status "
f'{result["deposit_status"]} '
f'({metrics["total_time"]}s seconds since deposit '
f"started)",
**metrics,
)
sys.exit(2)
time.sleep(self._poll_interval)
result = self.get_deposit_status()
return result
def main(self):
start_time = time.time()
+ start_datetime = datetime.datetime.fromtimestamp(
+ start_time, tz=datetime.timezone.utc
+ )
metrics = {}
# Upload the archive and metadata
result = self.upload_deposit()
metrics["upload_time"] = time.time() - start_time
# Wait for validation
result = self.wait_while_status(["deposited"], start_time, metrics, result)
metrics["total_time"] = time.time() - start_time
metrics["validation_time"] = metrics["total_time"] - metrics["upload_time"]
# Check validation succeeded
if result["deposit_status"] == "rejected":
self.print_result(
"CRITICAL",
f'Deposit was rejected: {result["deposit_status_detail"]}',
**metrics,
)
return 2
# Wait for loading
result = self.wait_while_status(
["verified", "loading"], start_time, metrics, result
)
metrics["total_time"] = time.time() - start_time
metrics["load_time"] = (
metrics["total_time"] - metrics["upload_time"] - metrics["validation_time"]
)
# Check loading succeeded
if result["deposit_status"] == "failed":
self.print_result(
"CRITICAL",
f'Deposit loading failed: {result["deposit_status_detail"]}',
**metrics,
)
return 2
# Check for unexpected status
if result["deposit_status"] != "done":
self.print_result(
"CRITICAL",
f'Deposit got unexpected status: {result["deposit_status"]} '
f'({result["deposit_status_detail"]})',
**metrics,
)
return 2
+ # Get the SWHID
+ if "deposit_swh_id" not in result:
+ # if the deposit succeeded immediately (which is rare), it does not
+ # contain the SWHID, so we need to re-fetch its status.
+ result = self.get_deposit_status()
+ if result.get("deposit_swh_id") is None:
+ self.print_result(
+ "CRITICAL",
+ f"'deposit_swh_id' missing from result: {result!r}",
+ **metrics,
+ )
+ return 2
+
+ swhid = result["deposit_swh_id"]
+
+ # Check for unexpected status
+ if result["deposit_status"] != "done":
+ self.print_result(
+ "CRITICAL",
+ f'Deposit status went from "done" to: {result["deposit_status"]} '
+ f'({result["deposit_status_detail"]})',
+ **metrics,
+ )
+ return 2
+
+ # Get metadata list from swh-web
+ metadata_objects = requests.get(
+ f"{self.api_url}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
+ f"?authority=deposit_client%20{self._provider_url}"
+ ).json()
+ expected_origin = f"{self._provider_url}/{self._slug}"
+
+ # Filter out objects that were clearly not created by this deposit (ie. created
+ # before the deposit started, or that are from unrelated origins)
+ relevant_metadata_objects = [
+ d
+ for d in metadata_objects
+ if d.get("origin") == expected_origin
+ and datetime.datetime.fromisoformat(d["discovery_date"]) >= start_datetime
+ ]
+ if not relevant_metadata_objects:
+ self.print_result(
+ "CRITICAL",
+ f"No recent metadata on {swhid} with origin {expected_origin} in: "
+ f"{metadata_objects!r}",
+ **metrics,
+ )
+ return 2
+
+ # Check the metadata was loaded as-is
+ metadata_url = relevant_metadata_objects[0]["metadata_url"]
+ metadata_file = requests.get(metadata_url).content
+ with open(self._metadata_path, "rb") as fd:
+ expected_metadata_file = fd.read()
+ if metadata_file != expected_metadata_file:
+ self.print_result(
+ "CRITICAL",
+ f"Metadata on {swhid} with origin {expected_origin} "
+ f"(at {metadata_url}) differs from uploaded Atom document "
+ f"(at {self._metadata_path})",
+ **metrics,
+ )
+ return 2
+
# Everything went fine, check total time wasn't too large and
# print result
(status_code, status) = self.get_status(metrics["total_time"])
self.print_result(
status,
f'Deposit took {metrics["total_time"]:.2f}s and succeeded.',
**metrics,
)
if status_code != 0: # Stop if any problem in the initial scenario
return status_code
# Initial deposit is now completed, now we can update the deposit with metadata
result = self.update_deposit_with_metadata()
total_time = time.time() - start_time
metrics_update = {
"total_time": total_time,
"update_time": (
total_time
- metrics["upload_time"]
- metrics["validation_time"]
- metrics["load_time"]
),
}
if "error" in result:
self.print_result(
"CRITICAL",
f'Deposit Metadata update failed: {result["error"]} ',
**metrics_update,
)
return 2
(status_code, status) = self.get_status(metrics_update["total_time"])
self.print_result(
status,
f'Deposit Metadata update took {metrics_update["update_time"]:.2f}s '
"and succeeded.",
**metrics_update,
)
return status_code
diff --git a/swh/icinga_plugins/tests/test_deposit.py b/swh/icinga_plugins/tests/test_deposit.py
index a3bed1a..f25c0d2 100644
--- a/swh/icinga_plugins/tests/test_deposit.py
+++ b/swh/icinga_plugins/tests/test_deposit.py
@@ -1,618 +1,870 @@
-# Copyright (C) 2019-2021 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import datetime
import io
import os
import tarfile
import time
from typing import Optional
import pytest
from swh.icinga_plugins.tests.utils import invoke
from .web_scenario import WebScenario
+POLL_INTERVAL = 10
+
BASE_URL = "http://swh-deposit.example.org/1"
+BASE_WEB_URL = "mock://swh-web.example.org"
+PROVIDER_URL = "http://icinga-checker.example.org"
COMMON_OPTIONS = [
"--server",
BASE_URL,
"--username",
"test",
"--password",
"test",
"--collection",
"testcol",
+ "--swh-web-url",
+ BASE_WEB_URL,
+ "--provider-url",
+ PROVIDER_URL,
]
SAMPLE_METADATA = """
Test Software
swh
test-software
No One
"""
ENTRY_TEMPLATE = """
42
2019-12-19 18:11:00
foo.tar.gz
{status}
http://purl.org/net/sword/package/SimpleZip
"""
STATUS_TEMPLATE = """
42
{status}
{status_detail}%s
"""
+def compute_origin():
+ # This is the same origin the checker would compute, because we mock time.time
+ # to be constant until time.sleep is called
+ return (
+ PROVIDER_URL
+ + "/check-deposit-%s" % datetime.datetime.fromtimestamp(time.time()).isoformat()
+ )
+
+
def status_template(
status: str, status_detail: str = "", swhid: Optional[str] = None
) -> str:
"""Generate a proper status template out of status, status_detail and optional swhid
"""
if swhid is not None:
template = (
STATUS_TEMPLATE % f"\n {swhid}"
)
return template.format(status=status, status_detail=status_detail, swhid=swhid)
template = STATUS_TEMPLATE % ""
return template.format(status=status, status_detail=status_detail)
def test_status_template():
actual_status = status_template(status="deposited")
assert (
actual_status
== """
42
deposited
"""
)
actual_status = status_template(status="verified", status_detail="detail")
assert (
actual_status
== """
42
verified
detail
"""
)
- actual_status = status_template(status="done", swhid="10")
+ actual_status = status_template(
+ status="done", swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
+ )
assert (
actual_status
== """
42
done
- 10
+ swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74
"""
)
@pytest.fixture(scope="session")
def tmp_path(tmp_path_factory):
return tmp_path_factory.mktemp(__name__)
@pytest.fixture(scope="session")
def sample_metadata(tmp_path):
"""Returns a sample metadata file's path
"""
path = os.path.join(tmp_path, "metadata.xml")
with open(path, "w") as fd:
fd.write(SAMPLE_METADATA)
return path
@pytest.fixture(scope="session")
def sample_archive(tmp_path):
"""Returns a sample archive's path
"""
path = os.path.join(tmp_path, "archive.tar.gz")
with tarfile.open(path, "w:gz") as tf:
tf.addfile(tarfile.TarInfo("hello.py"), io.BytesIO(b'print("Hello world")'))
return path
def test_deposit_immediate_success(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
"""Both deposit creation and deposit metadata update passed without delays
"""
+ origin = compute_origin()
scenario = WebScenario()
status_xml = status_template(
status="done",
status_detail="",
swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74",
)
# Initial deposit
scenario.add_step(
- "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="done")
+ "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="done"),
)
- # Then metadata update
- status_xml = status_template(
- status="done",
- status_detail="",
- swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74",
+
+ # Checker gets the SWHID
+ swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
+ status_xml = status_template(status="done", status_detail="", swhid=swhid,)
+ scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
+
+ # Then the checker checks the metadata appeared on the website
+ scenario.add_step(
+ "get",
+ f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
+ f"?authority=deposit_client%20http://icinga-checker.example.org",
+ [
+ {
+ "swhid": swhid,
+ "origin": origin,
+ "discovery_date": "2999-03-03T10:48:47+00:00",
+ "metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
+ }
+ ],
)
+ scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA)
+
+ # Then metadata update
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
# internal deposit client does call status, then update metadata then status api
scenario.add_step(
"get", f"{BASE_URL}/testcol/42/status/", status_xml,
)
scenario.add_step(
"put", f"{BASE_URL}/testcol/42/atom/", status_xml,
)
scenario.add_step(
"get", f"{BASE_URL}/testcol/42/status/", status_xml,
)
scenario.install_mock(requests_mock)
result = invoke(
[
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
]
)
assert result.output == (
"DEPOSIT OK - Deposit took 0.00s and succeeded.\n"
"| 'load_time' = 0.00s\n"
"| 'total_time' = 0.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 0.00s\n"
"DEPOSIT OK - Deposit Metadata update took 0.00s and succeeded.\n"
"| 'total_time' = 0.00s\n"
"| 'update_time' = 0.00s\n"
)
assert result.exit_code == 0, f"Unexpected output: {result.output}"
def test_deposit_delays(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
"""Deposit creation passed with some delays, deposit metadata update passed without
delay
"""
+ origin = compute_origin()
+
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
)
scenario.add_step(
"get", f"{BASE_URL}/testcol/42/status/", status_template(status="loading"),
)
+
+ # Deposit done, checker gets the SWHID
+ swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
+ status_xml = status_template(status="done", status_detail="", swhid=swhid,)
+ scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
+
+ # Then the checker checks the metadata appeared on the website
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_template(status="done"),
+ "get",
+ f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
+ f"?authority=deposit_client%20http://icinga-checker.example.org",
+ [
+ {
+ "swhid": swhid,
+ "origin": origin,
+ "discovery_date": "2999-03-03T10:48:47+00:00",
+ "metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
+ }
+ ],
)
+ scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA)
+
# Then metadata update
- status_xml = status_template(
- status="done",
- status_detail="",
- swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74",
- )
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
# internal deposit client does call status, then update metadata then status api
scenario.add_step(
"get", f"{BASE_URL}/testcol/42/status/", status_xml,
)
scenario.add_step(
"put", f"{BASE_URL}/testcol/42/atom/", status_xml,
)
scenario.add_step(
"get", f"{BASE_URL}/testcol/42/status/", status_xml,
)
scenario.install_mock(requests_mock)
result = invoke(
[
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
]
)
assert result.output == (
"DEPOSIT OK - Deposit took 30.00s and succeeded.\n"
"| 'load_time' = 20.00s\n"
"| 'total_time' = 30.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
"DEPOSIT OK - Deposit Metadata update took 0.00s and succeeded.\n"
"| 'total_time' = 30.00s\n"
"| 'update_time' = 0.00s\n"
)
assert result.exit_code == 0, f"Unexpected output: {result.output}"
def test_deposit_then_metadata_update_failed(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
"""Deposit creation passed, deposit metadata update failed
"""
+ origin = compute_origin()
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
)
scenario.add_step(
"get", f"{BASE_URL}/testcol/42/status/", status_template(status="loading"),
)
+
+ # Deposit done, checker gets the SWHID
+ swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
+ status_xml = status_template(status="done", status_detail="", swhid=swhid,)
+ scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
+
+ # Then the checker checks the metadata appeared on the website
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_template(status="done"),
+ "get",
+ f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
+ f"?authority=deposit_client%20http://icinga-checker.example.org",
+ [
+ {
+ "swhid": swhid,
+ "origin": origin,
+ "discovery_date": "2999-03-03T10:48:47+00:00",
+ "metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
+ }
+ ],
)
+ scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA)
+
# Then metadata update calls
failed_status_xml = status_template(
status="failed", # lying here
status_detail="Failure to ingest",
swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74",
)
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", failed_status_xml)
scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", failed_status_xml)
scenario.install_mock(requests_mock)
result = invoke(
[
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT OK - Deposit took 30.00s and succeeded.\n"
"| 'load_time' = 20.00s\n"
"| 'total_time' = 30.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
"DEPOSIT CRITICAL - Deposit Metadata update failed: You can only update "
"metadata on deposit with status 'done' \n"
"| 'total_time' = 30.00s\n"
"| 'update_time' = 0.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_delay_warning(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
"""Deposit creation exceeded delays, no deposit update occurred.
"""
+ origin = compute_origin()
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
)
+
+ # Deposit done, checker gets the SWHID
+ swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
+ status_xml = status_template(status="done", status_detail="", swhid=swhid,)
+ scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml)
+
+ # Then the checker checks the metadata appeared on the website
scenario.add_step(
- "get", f"{BASE_URL}/testcol/42/status/", status_template(status="done"),
+ "get",
+ f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
+ f"?authority=deposit_client%20http://icinga-checker.example.org",
+ [
+ {
+ "swhid": swhid,
+ "origin": origin,
+ "discovery_date": "2999-03-03T10:48:47+00:00",
+ "metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
+ }
+ ],
)
+ scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA)
scenario.install_mock(requests_mock)
result = invoke(
[
"--warning",
"15",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT WARNING - Deposit took 20.00s and succeeded.\n"
"| 'load_time' = 10.00s\n"
"| 'total_time' = 20.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 1, f"Unexpected output: {result.output}"
def test_deposit_delay_critical(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
+ origin = compute_origin()
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
)
+
+ # Deposit done, checker gets the SWHID
+ swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
+ status_xml = status_template(status="done", status_detail="", swhid=swhid,)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
- status_template(status="done"),
+ status_xml,
callback=lambda: time.sleep(60),
)
+ # Then the checker checks the metadata appeared on the website
+ scenario.add_step(
+ "get",
+ f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
+ f"?authority=deposit_client%20http://icinga-checker.example.org",
+ [
+ {
+ "swhid": swhid,
+ "origin": origin,
+ "discovery_date": "2999-03-03T10:48:47+00:00",
+ "metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
+ }
+ ],
+ )
+ scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA)
+
scenario.install_mock(requests_mock)
result = invoke(
[
"--critical",
"50",
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Deposit took 80.00s and succeeded.\n"
"| 'load_time' = 70.00s\n"
"| 'total_time' = 80.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_timeout(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
scenario = WebScenario()
scenario.add_step(
"post",
f"{BASE_URL}/testcol/",
ENTRY_TEMPLATE.format(status="deposited"),
callback=lambda: time.sleep(1500),
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="verified"),
callback=lambda: time.sleep(1500),
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="loading"),
callback=lambda: time.sleep(1500),
)
scenario.install_mock(requests_mock)
result = invoke(
[
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Timed out while in status loading "
"(4520.0s seconds since deposit started)\n"
"| 'total_time' = 4520.00s\n"
"| 'upload_time' = 1500.00s\n"
"| 'validation_time' = 1510.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
+def test_deposit_metadata_missing(
+ requests_mock, mocker, sample_archive, sample_metadata, mocked_time
+):
+ origin = compute_origin()
+ scenario = WebScenario()
+
+ scenario.add_step(
+ "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
+ )
+ scenario.add_step(
+ "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
+ )
+
+ # Deposit done, checker gets the SWHID
+ swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
+ status_xml = status_template(status="done", status_detail="", swhid=swhid,)
+ scenario.add_step(
+ "get", f"{BASE_URL}/testcol/42/status/", status_xml,
+ )
+
+ # Then the checker checks the metadata appeared on the website
+ metadata_list = [
+ {
+ # Filtered out, because wrong origin
+ "swhid": swhid,
+ "origin": "http://wrong-origin.example.org",
+ "discovery_date": "2999-03-03T10:48:47+00:00",
+ "metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
+ },
+ {
+ # Filtered out, because too old
+ "swhid": swhid,
+ "origin": origin,
+ "discovery_date": "2022-03-03T09:48:47+00:00",
+ "metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
+ },
+ ]
+ scenario.add_step(
+ "get",
+ f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
+ f"?authority=deposit_client%20http://icinga-checker.example.org",
+ metadata_list,
+ )
+
+ scenario.install_mock(requests_mock)
+
+ result = invoke(
+ [
+ "check-deposit",
+ *COMMON_OPTIONS,
+ "single",
+ "--archive",
+ sample_archive,
+ "--metadata",
+ sample_metadata,
+ ],
+ catch_exceptions=True,
+ )
+
+ assert result.output == (
+ f"DEPOSIT CRITICAL - No recent metadata on {swhid} with origin {origin} in: "
+ f"{metadata_list!r}\n"
+ "| 'load_time' = 10.00s\n"
+ "| 'total_time' = 20.00s\n"
+ "| 'upload_time' = 0.00s\n"
+ "| 'validation_time' = 10.00s\n"
+ )
+ assert result.exit_code == 2, f"Unexpected output: {result.output}"
+
+
+def test_deposit_metadata_corrupt(
+ requests_mock, mocker, sample_archive, sample_metadata, mocked_time
+):
+ origin = compute_origin()
+ scenario = WebScenario()
+
+ scenario.add_step(
+ "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
+ )
+ scenario.add_step(
+ "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
+ )
+
+ # Deposit done, checker gets the SWHID
+ swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74"
+ status_xml = status_template(status="done", status_detail="", swhid=swhid,)
+ scenario.add_step(
+ "get", f"{BASE_URL}/testcol/42/status/", status_xml,
+ )
+
+ # Then the checker checks the metadata appeared on the website
+ metadata_list = [
+ {
+ "swhid": swhid,
+ "origin": origin,
+ "discovery_date": "2999-03-03T09:48:47+00:00",
+ "metadata_url": f"{BASE_WEB_URL}/the-metadata-url",
+ },
+ ]
+ scenario.add_step(
+ "get",
+ f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/"
+ f"?authority=deposit_client%20http://icinga-checker.example.org",
+ metadata_list,
+ )
+ scenario.add_step(
+ "get",
+ f"{BASE_WEB_URL}/the-metadata-url",
+ SAMPLE_METADATA[0:-1], # corrupting the metadata by dropping the last byte
+ )
+
+ scenario.install_mock(requests_mock)
+
+ result = invoke(
+ [
+ "check-deposit",
+ *COMMON_OPTIONS,
+ "single",
+ "--archive",
+ sample_archive,
+ "--metadata",
+ sample_metadata,
+ ],
+ catch_exceptions=True,
+ )
+
+ assert result.output == (
+ f"DEPOSIT CRITICAL - Metadata on {swhid} with origin {origin} (at "
+ f"{BASE_WEB_URL}/the-metadata-url) differs from uploaded Atom document (at "
+ f"{sample_metadata})\n"
+ "| 'load_time' = 10.00s\n"
+ "| 'total_time' = 20.00s\n"
+ "| 'upload_time' = 0.00s\n"
+ "| 'validation_time' = 10.00s\n"
+ )
+ assert result.exit_code == 2, f"Unexpected output: {result.output}"
+
+
def test_deposit_rejected(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="rejected", status_detail="booo"),
)
scenario.install_mock(requests_mock)
result = invoke(
[
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Deposit was rejected: booo\n"
"| 'total_time' = 10.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_failed(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
)
scenario.add_step(
"get", f"{BASE_URL}/testcol/42/status/", status_template(status="loading"),
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="failed", status_detail="booo"),
)
scenario.install_mock(requests_mock)
result = invoke(
[
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Deposit loading failed: booo\n"
"| 'load_time' = 20.00s\n"
"| 'total_time' = 30.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
def test_deposit_unexpected_status(
requests_mock, mocker, sample_archive, sample_metadata, mocked_time
):
scenario = WebScenario()
scenario.add_step(
"post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited")
)
scenario.add_step(
"get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"),
)
scenario.add_step(
"get", f"{BASE_URL}/testcol/42/status/", status_template(status="loading"),
)
scenario.add_step(
"get",
f"{BASE_URL}/testcol/42/status/",
status_template(status="what", status_detail="booo"),
)
scenario.install_mock(requests_mock)
result = invoke(
[
"check-deposit",
*COMMON_OPTIONS,
"single",
"--archive",
sample_archive,
"--metadata",
sample_metadata,
],
catch_exceptions=True,
)
assert result.output == (
"DEPOSIT CRITICAL - Deposit got unexpected status: what (booo)\n"
"| 'load_time' = 20.00s\n"
"| 'total_time' = 30.00s\n"
"| 'upload_time' = 0.00s\n"
"| 'validation_time' = 10.00s\n"
)
assert result.exit_code == 2, f"Unexpected output: {result.output}"
diff --git a/swh/icinga_plugins/tests/test_vault.py b/swh/icinga_plugins/tests/test_vault.py
index bf578b8..04cece2 100644
--- a/swh/icinga_plugins/tests/test_vault.py
+++ b/swh/icinga_plugins/tests/test_vault.py
@@ -1,280 +1,489 @@
# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import io
+import tarfile
import time
from swh.icinga_plugins.tests.utils import invoke
from .web_scenario import WebScenario
-dir_id = "ab" * 20
+DIR_ID = "ab" * 20
+
+url_api = f"mock://swh-web.example.org/api/1/vault/directory/{DIR_ID}/"
+url_fetch = f"mock://swh-web.example.org/api/1/vault/directory/{DIR_ID}/raw/"
+
+
+def _make_tarfile():
+ fd = io.BytesIO()
+ with tarfile.open(fileobj=fd, mode="w:gz") as tf:
+ tf.addfile(tarfile.TarInfo(f"swh:1:dir:{DIR_ID}/README"), b"this is a readme\n")
+
+ tarinfo = tarfile.TarInfo(f"swh:1:dir:{DIR_ID}")
+ tarinfo.type = tarfile.DIRTYPE
+ tf.addfile(tarinfo)
+ return fd.getvalue()
+
+
+TARBALL = _make_tarfile()
response_pending = {
- "obj_id": dir_id,
+ "obj_id": DIR_ID,
"obj_type": "directory",
"progress_message": "foo",
"status": "pending",
}
response_done = {
- "fetch_url": f"/api/1/vault/directory/{dir_id}/raw/",
+ "fetch_url": url_fetch,
"id": 9,
- "obj_id": dir_id,
+ "obj_id": DIR_ID,
+ "obj_type": "directory",
+ "status": "done",
+}
+
+response_done_no_fetch = {
+ "id": 9,
+ "obj_id": DIR_ID,
"obj_type": "directory",
"status": "done",
}
response_failed = {
- "obj_id": dir_id,
+ "obj_id": DIR_ID,
"obj_type": "directory",
"progress_message": "foobar",
"status": "failed",
}
response_unknown_status = {
- "obj_id": dir_id,
+ "obj_id": DIR_ID,
"obj_type": "directory",
"progress_message": "what",
"status": "boo",
}
class FakeStorage:
def __init__(self, foo, **kwargs):
pass
def directory_get_random(self):
- return bytes.fromhex(dir_id)
+ return bytes.fromhex(DIR_ID)
def test_vault_immediate_success(requests_mock, mocker, mocked_time):
scenario = WebScenario()
- url = f"mock://swh-web.example.org/api/1/vault/directory/{dir_id}/"
-
- scenario.add_step("get", url, {}, status_code=404)
- scenario.add_step("post", url, response_pending)
- scenario.add_step("get", url, response_done)
+ scenario.add_step("get", url_api, {}, status_code=404)
+ scenario.add_step("post", url_api, response_pending)
+ scenario.add_step("get", url_api, response_done)
+ scenario.add_step(
+ "get", url_fetch, TARBALL, headers={"Content-Type": "application/gzip"}
+ )
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
]
)
assert result.output == (
- f"VAULT OK - cooking directory {dir_id} took "
+ f"VAULT OK - cooking directory {DIR_ID} took "
f"10.00s and succeeded.\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 0, result.output
def test_vault_delayed_success(requests_mock, mocker, mocked_time):
scenario = WebScenario()
- url = f"mock://swh-web.example.org/api/1/vault/directory/{dir_id}/"
-
- scenario.add_step("get", url, {}, status_code=404)
- scenario.add_step("post", url, response_pending)
- scenario.add_step("get", url, response_pending)
- scenario.add_step("get", url, response_done)
+ scenario.add_step("get", url_api, {}, status_code=404)
+ scenario.add_step("post", url_api, response_pending)
+ scenario.add_step("get", url_api, response_pending)
+ scenario.add_step("get", url_api, response_done)
+ scenario.add_step(
+ "get", url_fetch, TARBALL, headers={"Content-Type": "application/gzip"}
+ )
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
]
)
assert result.output == (
- f"VAULT OK - cooking directory {dir_id} took "
+ f"VAULT OK - cooking directory {DIR_ID} took "
f"20.00s and succeeded.\n"
f"| 'total_time' = 20.00s\n"
)
assert result.exit_code == 0, result.output
def test_vault_failure(requests_mock, mocker, mocked_time):
scenario = WebScenario()
- url = f"mock://swh-web.example.org/api/1/vault/directory/{dir_id}/"
-
- scenario.add_step("get", url, {}, status_code=404)
- scenario.add_step("post", url, response_pending)
- scenario.add_step("get", url, response_failed)
+ scenario.add_step("get", url_api, {}, status_code=404)
+ scenario.add_step("post", url_api, response_pending)
+ scenario.add_step("get", url_api, response_failed)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
- f"VAULT CRITICAL - cooking directory {dir_id} took "
+ f"VAULT CRITICAL - cooking directory {DIR_ID} took "
f"10.00s and failed with: foobar\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_unknown_status(requests_mock, mocker, mocked_time):
scenario = WebScenario()
- url = f"mock://swh-web.example.org/api/1/vault/directory/{dir_id}/"
-
- scenario.add_step("get", url, {}, status_code=404)
- scenario.add_step("post", url, response_pending)
- scenario.add_step("get", url, response_unknown_status)
+ scenario.add_step("get", url_api, {}, status_code=404)
+ scenario.add_step("post", url_api, response_pending)
+ scenario.add_step("get", url_api, response_unknown_status)
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
- f"VAULT CRITICAL - cooking directory {dir_id} took "
+ f"VAULT CRITICAL - cooking directory {DIR_ID} took "
f"10.00s and resulted in unknown status: boo\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_timeout(requests_mock, mocker, mocked_time):
scenario = WebScenario()
- url = f"mock://swh-web.example.org/api/1/vault/directory/{dir_id}/"
-
- scenario.add_step("get", url, {}, status_code=404)
- scenario.add_step("post", url, response_pending)
- scenario.add_step("get", url, response_pending)
- scenario.add_step("get", url, response_pending, callback=lambda: time.sleep(4000))
+ scenario.add_step("get", url_api, {}, status_code=404)
+ scenario.add_step("post", url_api, response_pending)
+ scenario.add_step("get", url_api, response_pending)
+ scenario.add_step(
+ "get", url_api, response_pending, callback=lambda: time.sleep(4000)
+ )
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == (
- f"VAULT CRITICAL - cooking directory {dir_id} took more than "
+ f"VAULT CRITICAL - cooking directory {DIR_ID} took more than "
f"4020.00s and has status: foo\n"
f"| 'total_time' = 4020.00s\n"
)
assert result.exit_code == 2, result.output
def test_vault_cached_directory(requests_mock, mocker, mocked_time):
"""First serves a directory that's already in the cache, to
test that vault_check requests another one."""
scenario = WebScenario()
- url = f"mock://swh-web.example.org/api/1/vault/directory/{dir_id}/"
-
- scenario.add_step("get", url, {}, status_code=200)
- scenario.add_step("get", url, {}, status_code=404)
- scenario.add_step("post", url, response_pending)
- scenario.add_step("get", url, response_done)
+ scenario.add_step("get", url_api, {}, status_code=200)
+ scenario.add_step("get", url_api, {}, status_code=404)
+ scenario.add_step("post", url_api, response_pending)
+ scenario.add_step("get", url_api, response_done)
+ scenario.add_step(
+ "get", url_fetch, TARBALL, headers={"Content-Type": "application/gzip"}
+ )
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
]
)
assert result.output == (
- f"VAULT OK - cooking directory {dir_id} took "
+ f"VAULT OK - cooking directory {DIR_ID} took "
f"10.00s and succeeded.\n"
f"| 'total_time' = 10.00s\n"
)
assert result.exit_code == 0, result.output
def test_vault_no_directory(requests_mock, mocker, mocked_time):
"""Tests with an empty storage"""
scenario = WebScenario()
scenario.install_mock(requests_mock)
get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
get_storage_mock.side_effect = FakeStorage
mocker.patch(f"{__name__}.FakeStorage.directory_get_random", return_value=None)
result = invoke(
[
"check-vault",
"--swh-web-url",
"mock://swh-web.example.org",
"--swh-storage-url",
"foo://example.org",
"directory",
],
catch_exceptions=True,
)
assert result.output == ("VAULT CRITICAL - No directory exists in the archive.\n")
assert result.exit_code == 2, result.output
+
+
+def test_vault_fetch_failed(requests_mock, mocker, mocked_time):
+ scenario = WebScenario()
+
+ scenario.add_step("get", url_api, {}, status_code=404)
+ scenario.add_step("post", url_api, response_pending)
+ scenario.add_step("get", url_api, response_done)
+ scenario.add_step(
+ "get",
+ url_fetch,
+ "",
+ status_code=500,
+ headers={"Content-Type": "application/gzip"},
+ )
+
+ scenario.install_mock(requests_mock)
+
+ get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
+ get_storage_mock.side_effect = FakeStorage
+
+ result = invoke(
+ [
+ "check-vault",
+ "--swh-web-url",
+ "mock://swh-web.example.org",
+ "--swh-storage-url",
+ "foo://example.org",
+ "directory",
+ ],
+ catch_exceptions=True,
+ )
+
+ assert result.output == (
+ f"VAULT CRITICAL - cooking directory {DIR_ID} took "
+ f"10.00s and succeeded, but fetch failed with status code 500.\n"
+ f"| 'total_time' = 10.00s\n"
+ )
+ assert result.exit_code == 2, result.output
+
+
+def test_vault_fetch_missing_content_type(requests_mock, mocker, mocked_time):
+ scenario = WebScenario()
+
+ scenario.add_step("get", url_api, {}, status_code=404)
+ scenario.add_step("post", url_api, response_pending)
+ scenario.add_step("get", url_api, response_done)
+ scenario.add_step("get", url_fetch, "")
+
+ scenario.install_mock(requests_mock)
+
+ get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
+ get_storage_mock.side_effect = FakeStorage
+
+ result = invoke(
+ [
+ "check-vault",
+ "--swh-web-url",
+ "mock://swh-web.example.org",
+ "--swh-storage-url",
+ "foo://example.org",
+ "directory",
+ ],
+ catch_exceptions=True,
+ )
+
+ assert result.output == (
+ "VAULT CRITICAL - Unexpected Content-Type when downloading bundle: None\n"
+ "| 'total_time' = 10.00s\n"
+ )
+ assert result.exit_code == 2, result.output
+
+
+def test_vault_corrupt_tarball_gzip(requests_mock, mocker, mocked_time):
+ scenario = WebScenario()
+
+ scenario.add_step("get", url_api, {}, status_code=404)
+ scenario.add_step("post", url_api, response_pending)
+ scenario.add_step("get", url_api, response_pending)
+ scenario.add_step("get", url_api, response_done)
+ scenario.add_step(
+ "get",
+ url_fetch,
+ b"this-is-not-a-tarball",
+ headers={"Content-Type": "application/gzip", "Content-Length": "100000"},
+ )
+
+ scenario.install_mock(requests_mock)
+
+ get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
+ get_storage_mock.side_effect = FakeStorage
+
+ result = invoke(
+ [
+ "check-vault",
+ "--swh-web-url",
+ "mock://swh-web.example.org",
+ "--swh-storage-url",
+ "foo://example.org",
+ "directory",
+ ],
+ catch_exceptions=True,
+ )
+
+ assert result.output == (
+ "VAULT CRITICAL - Error while reading tarball: not a gzip file\n"
+ "| 'total_time' = 20.00s\n"
+ )
+ assert result.exit_code == 2, result.output
+
+
+def test_vault_corrupt_tarball_member(requests_mock, mocker, mocked_time):
+ fd = io.BytesIO()
+ with tarfile.open(fileobj=fd, mode="w:gz") as tf:
+ tf.addfile(tarfile.TarInfo("wrong_dir_name/README"), b"this is a readme\n")
+ tarball = fd.getvalue()
+
+ scenario = WebScenario()
+
+ scenario.add_step("get", url_api, {}, status_code=404)
+ scenario.add_step("post", url_api, response_pending)
+ scenario.add_step("get", url_api, response_pending)
+ scenario.add_step("get", url_api, response_done)
+ scenario.add_step(
+ "get",
+ url_fetch,
+ tarball,
+ headers={"Content-Type": "application/gzip", "Content-Length": "100000"},
+ )
+
+ scenario.install_mock(requests_mock)
+
+ get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
+ get_storage_mock.side_effect = FakeStorage
+
+ result = invoke(
+ [
+ "check-vault",
+ "--swh-web-url",
+ "mock://swh-web.example.org",
+ "--swh-storage-url",
+ "foo://example.org",
+ "directory",
+ ],
+ catch_exceptions=True,
+ )
+
+ assert result.output == (
+ "VAULT CRITICAL - Unexpected member in tarball: wrong_dir_name/README\n"
+ "| 'total_time' = 20.00s\n"
+ )
+ assert result.exit_code == 2, result.output
+
+
+def test_vault_no_fetch_url(requests_mock, mocker, mocked_time):
+ scenario = WebScenario()
+
+ scenario.add_step("get", url_api, {}, status_code=404)
+ scenario.add_step("post", url_api, response_pending)
+ scenario.add_step("get", url_api, response_done_no_fetch)
+
+ scenario.install_mock(requests_mock)
+
+ get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage")
+ get_storage_mock.side_effect = FakeStorage
+
+ result = invoke(
+ [
+ "check-vault",
+ "--swh-web-url",
+ "mock://swh-web.example.org",
+ "--swh-storage-url",
+ "foo://example.org",
+ "directory",
+ ],
+ catch_exceptions=True,
+ )
+
+ assert result.output == (
+ f"VAULT CRITICAL - cooking directory {DIR_ID} took 10.00s and succeeded, "
+ f"but API response did not contain a fetch_url.\n"
+ f"| 'total_time' = 10.00s\n"
+ )
+ assert result.exit_code == 2, result.output
diff --git a/swh/icinga_plugins/tests/web_scenario.py b/swh/icinga_plugins/tests/web_scenario.py
index e4225cb..18a7e90 100644
--- a/swh/icinga_plugins/tests/web_scenario.py
+++ b/swh/icinga_plugins/tests/web_scenario.py
@@ -1,92 +1,94 @@
-# Copyright (C) 2019 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Wrapper around requests-mock to mock successive responses
from a web service.
Tests can build successive steps by calling :py:meth:`WebScenario.add_step`
with specifications of what endpoints should be called and in what order."""
-from dataclasses import dataclass
+import dataclasses
import json
-from typing import Callable, List, Optional, Set
+from typing import Callable, Dict, List, Optional, Set, Union
import requests_mock
-@dataclass(frozen=True)
+@dataclasses.dataclass(frozen=True)
class Step:
expected_method: str
expected_url: str
- response: object
+ response: Union[str, bytes, Dict, List]
status_code: int = 200
+ headers: Dict[str, str] = dataclasses.field(default_factory=dict)
callback: Optional[Callable[[], int]] = None
-@dataclass(frozen=True)
+@dataclasses.dataclass(frozen=True)
class Endpoint:
method: str
url: str
class WebScenario:
"""Stores the state of the successive calls to the web service
expected by tests."""
_steps: List[Step]
_endpoints: Set[Endpoint]
_current_step: int
def __init__(self):
self._steps = []
self._endpoints = set()
self._current_step = 0
def add_endpoint(self, *args, **kwargs):
"""Adds an endpoint to be mocked.
Arguments are the same as :py:class:Endpoint.
"""
self._endpoints.add(Endpoint(*args, **kwargs))
def add_step(self, *args, **kwargs):
"""Adds an expected call to the list of expected calls.
Also automatically calls :py:meth:`add_endpoint` so the
associated endpoint is mocked.
Arguments are the same as :py:class:`Step`.
"""
step = Step(*args, **kwargs)
self._steps.append(step)
self.add_endpoint(step.expected_method, step.expected_url)
def install_mock(self, mocker: requests_mock.Mocker):
"""Mocks entrypoints registered with :py:meth:`add_endpoint`
(or :py:meth:`add_step`) using the provided mocker.
"""
for endpoint in self._endpoints:
mocker.register_uri(
- endpoint.method.upper(),
- endpoint.url,
- text=self._request_callback, # type: ignore # stubs are too strict
+ endpoint.method.upper(), endpoint.url, content=self._request_callback,
)
def _request_callback(self, request, context):
step = self._steps[self._current_step]
assert request.url == step.expected_url
assert request.method.upper() == step.expected_method.upper()
self._current_step += 1
context.status_code = step.status_code
+ context.headers.update(step.headers)
if step.callback:
step.callback()
if isinstance(step.response, str):
+ return step.response.encode()
+ elif isinstance(step.response, bytes):
return step.response
else:
- return json.dumps(step.response)
+ return json.dumps(step.response).encode()
diff --git a/swh/icinga_plugins/vault.py b/swh/icinga_plugins/vault.py
index 03d4d1c..3db33cc 100644
--- a/swh/icinga_plugins/vault.py
+++ b/swh/icinga_plugins/vault.py
@@ -1,100 +1,158 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import tarfile
import time
import requests
from swh.storage import get_storage
from .base_check import BaseCheck
class NoDirectory(Exception):
pass
class VaultCheck(BaseCheck):
TYPE = "VAULT"
DEFAULT_WARNING_THRESHOLD = 0
DEFAULT_CRITICAL_THRESHOLD = 3600
def __init__(self, obj):
super().__init__(obj)
self._swh_storage = get_storage("remote", url=obj["swh_storage_url"])
self._swh_web_url = obj["swh_web_url"]
self._poll_interval = obj["poll_interval"]
def _url_for_dir(self, dir_id):
return self._swh_web_url + f"/api/1/vault/directory/{dir_id.hex()}/"
def _pick_directory(self):
dir_ = self._swh_storage.directory_get_random()
if dir_ is None:
raise NoDirectory()
return dir_
def _pick_uncached_directory(self):
while True:
dir_id = self._pick_directory()
response = requests.get(self._url_for_dir(dir_id))
if response.status_code == 404:
return dir_id
def main(self):
try:
dir_id = self._pick_uncached_directory()
except NoDirectory:
self.print_result("CRITICAL", "No directory exists in the archive.")
return 2
start_time = time.time()
total_time = 0
response = requests.post(self._url_for_dir(dir_id))
assert response.status_code == 200, (response, response.text)
result = response.json()
while result["status"] in ("new", "pending"):
time.sleep(self._poll_interval)
response = requests.get(self._url_for_dir(dir_id))
assert response.status_code == 200, (response, response.text)
result = response.json()
total_time = time.time() - start_time
if total_time > self.critical_threshold:
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took more than "
f"{total_time:.2f}s and has status: "
f'{result["progress_message"]}',
total_time=total_time,
)
return 2
- if result["status"] == "done":
- (status_code, status) = self.get_status(total_time)
+ if result["status"] == "failed":
self.print_result(
- status,
+ "CRITICAL",
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
- f"and succeeded.",
+ f'and failed with: {result["progress_message"]}',
total_time=total_time,
)
- return status_code
- elif result["status"] == "failed":
+ return 2
+ elif result["status"] != "done":
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
- f'and failed with: {result["progress_message"]}',
+ f'and resulted in unknown status: {result["status"]}',
total_time=total_time,
)
return 2
- else:
+
+ (status_code, status) = self.get_status(total_time)
+
+ if "fetch_url" not in result:
self.print_result(
"CRITICAL",
f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
- f'and resulted in unknown status: {result["status"]}',
+ f"and succeeded, but API response did not contain a fetch_url.",
total_time=total_time,
)
return 2
+
+ with requests.get(result["fetch_url"], stream=True) as fetch_response:
+ try:
+ fetch_response.raise_for_status()
+ except requests.HTTPError:
+ self.print_result(
+ "CRITICAL",
+ f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
+ f"and succeeded, but fetch failed with status code "
+ f"{fetch_response.status_code}.",
+ total_time=total_time,
+ )
+ return 2
+
+ content_type = fetch_response.headers.get("Content-Type")
+ if content_type != "application/gzip":
+ self.print_result(
+ "CRITICAL",
+ f"Unexpected Content-Type when downloading bundle: {content_type}",
+ total_time=total_time,
+ )
+ return 2
+
+ try:
+ with tarfile.open(fileobj=fetch_response.raw, mode="r:gz") as tf:
+ # Note that we are streaming the tarfile from the network,
+ # so we are allowed at most one pass on the tf object;
+ # and the sooner we close it the better.
+ # Fortunately, checking only the first member is good enough:
+ tarinfo = tf.next()
+ swhid = f"swh:1:dir:{dir_id.hex()}"
+ if tarinfo.name != swhid and not tarinfo.name.startswith(
+ f"{swhid}/"
+ ):
+ self.print_result(
+ "CRITICAL",
+ f"Unexpected member in tarball: {tarinfo.name}",
+ total_time=total_time,
+ )
+ return 2
+ except tarfile.ReadError as e:
+ self.print_result(
+ "CRITICAL",
+ f"Error while reading tarball: {e}",
+ total_time=total_time,
+ )
+ return 2
+
+ self.print_result(
+ status,
+ f"cooking directory {dir_id.hex()} took {total_time:.2f}s "
+ f"and succeeded.",
+ total_time=total_time,
+ )
+ return status_code
diff --git a/tox.ini b/tox.ini
index f9c01f6..601a983 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,73 +1,73 @@
[tox]
envlist=black,flake8,mypy,py3
[testenv:py3]
deps =
.[testing]
pytest-cov
commands =
pytest --doctest-modules \
{envsitepackagesdir}/swh/icinga_plugins \
--cov={envsitepackagesdir}/swh/icinga_plugins \
--cov-branch {posargs}
[testenv:black]
skip_install = true
deps =
black==19.10b0
commands =
{envpython} -m black --check swh
[testenv:flake8]
skip_install = true
deps =
flake8
commands =
{envpython} -m flake8
[testenv:mypy]
skip_install = true
deps =
.[testing]
- mypy
+ mypy==0.920
commands =
mypy swh
# build documentation outside swh-environment using the current
# git HEAD of swh-docs, is executed on CI for each diff to prevent
# breaking doc build
[testenv:sphinx]
whitelist_externals = make
usedevelop = true
extras =
testing
deps =
# fetch and install swh-docs in develop mode
-e git+https://forge.softwareheritage.org/source/swh-docs#egg=swh.docs
setenv =
SWH_PACKAGE_DOC_TOX_BUILD = 1
# turn warnings into errors
SPHINXOPTS = -W
commands =
make -I ../.tox/sphinx/src/swh-docs/swh/ -C docs
# build documentation only inside swh-environment using local state
# of swh-docs package
[testenv:sphinx-dev]
whitelist_externals = make
usedevelop = true
extras =
testing
deps =
# install swh-docs in develop mode
-e ../swh-docs
setenv =
SWH_PACKAGE_DOC_TOX_BUILD = 1
# turn warnings into errors
SPHINXOPTS = -W
commands =
make -I ../.tox/sphinx-dev/src/swh-docs/swh/ -C docs