diff --git a/swh/icinga_plugins/cli.py b/swh/icinga_plugins/cli.py --- a/swh/icinga_plugins/cli.py +++ b/swh/icinga_plugins/cli.py @@ -94,6 +94,15 @@ default="https://deposit.softwareheritage.org/1", help="URL to the SWORD server to test", ) +@click.option( + "--provider-url", + type=str, + required=True, + help=( + "Root URL of the deposit client, as defined in the " + "'deposit_client.provider_url' column in the deposit DB" + ), +) @click.option("--username", type=str, required=True, help="Login for the SWORD server") @click.option( "--password", type=str, required=True, help="Password for the SWORD server" @@ -111,6 +120,9 @@ help="Interval (in seconds) between two polls to the API, " "to check for ingestion status.", ) +@click.option( + "--swh-web-url", type=str, required=True, help="URL to an swh-web instance" +) @click.pass_context def check_deposit(ctx, **kwargs): ctx.obj.update(kwargs) diff --git a/swh/icinga_plugins/deposit.py b/swh/icinga_plugins/deposit.py --- a/swh/icinga_plugins/deposit.py +++ b/swh/icinga_plugins/deposit.py @@ -8,6 +8,8 @@ import time from typing import Any, Dict, Optional +import requests + from swh.deposit.client import PublicApiDepositClient from .base_check import BaseCheck @@ -20,11 +22,13 @@ def __init__(self, obj): super().__init__(obj) + self.api_url = obj["swh_web_url"].rstrip("/") self._poll_interval = obj["poll_interval"] self._archive_path = obj["archive"] self._metadata_path = obj["metadata"] self._collection = obj["collection"] self._slug: Optional[str] = None + self._provider_url = obj["provider_url"] self._client = PublicApiDepositClient( { @@ -34,7 +38,10 @@ ) def upload_deposit(self): - slug = "check-deposit-%s" % datetime.datetime.now().isoformat() + slug = ( + "check-deposit-%s" + % datetime.datetime.fromtimestamp(time.time()).isoformat() + ) result = self._client.deposit_create( archive=self._archive_path, metadata=self._metadata_path, @@ -138,6 +145,47 @@ ) return 2 + # Get the SWHID + if "deposit_swh_id" not in result: + # if the deposit succeeded immediately (which is rare), it does not + # contain the SWHID, so we need to re-fetch its status. + result = self.get_deposit_status() + if result.get("deposit_swh_id") is None: + self.print_result( + "CRITICAL", + f"'deposit_swh_id' missing from result: {result!r}", + **metrics, + ) + return 2 + + swhid = result["deposit_swh_id"] + + # Check for unexpected status + if result["deposit_status"] != "done": + self.print_result( + "CRITICAL", + f'Deposit status went from "done" to: {result["deposit_status"]} ' + f'({result["deposit_status_detail"]})', + **metrics, + ) + return 2 + + # Get metadata from swh-web + metadata_objects = requests.get( + f"{self.api_url}/api/1/raw-extrinsic-metadata/swhid/{swhid}/" + f"?authority=deposit_client%20{self._provider_url}" + ).json() + expected_origin = f"{self._provider_url}/{self._slug}" + origins = [d.get("origin") for d in metadata_objects] + if expected_origin not in origins: + self.print_result( + "CRITICAL", + f"Deposited metadata on {swhid} with origin {expected_origin}, " + f"missing from the list of origins: {origins!r}", + **metrics, + ) + return 2 + # Everything went fine, check total time wasn't too large and # print result (status_code, status) = self.get_status(metrics["total_time"]) diff --git a/swh/icinga_plugins/tests/test_deposit.py b/swh/icinga_plugins/tests/test_deposit.py --- a/swh/icinga_plugins/tests/test_deposit.py +++ b/swh/icinga_plugins/tests/test_deposit.py @@ -1,8 +1,9 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import datetime import io import os import tarfile @@ -15,7 +16,11 @@ from .web_scenario import WebScenario +POLL_INTERVAL = 10 + BASE_URL = "http://swh-deposit.example.org/1" +BASE_WEB_URL = "mock://swh-web.example.org" +PROVIDER_URL = "http://icinga-checker.example.org" COMMON_OPTIONS = [ "--server", @@ -26,6 +31,10 @@ "test", "--collection", "testcol", + "--swh-web-url", + BASE_WEB_URL, + "--provider-url", + PROVIDER_URL, ] @@ -70,6 +79,15 @@ """ +def compute_origin(): + # This is the same origin the checker would compute, because we mock time.time + # to be constant until time.sleep is called + return ( + PROVIDER_URL + + "/check-deposit-%s" % datetime.datetime.fromtimestamp(time.time()).isoformat() + ) + + def status_template( status: str, status_detail: str = "", swhid: Optional[str] = None ) -> str: @@ -116,7 +134,9 @@ """ ) - actual_status = status_template(status="done", swhid="10") + actual_status = status_template( + status="done", swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74" + ) assert ( actual_status == """ @@ -127,7 +147,7 @@ 42 done - 10 + swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74 """ ) @@ -170,6 +190,7 @@ """Both deposit creation and deposit metadata update passed without delays """ + origin = compute_origin() scenario = WebScenario() status_xml = status_template( @@ -180,14 +201,23 @@ # Initial deposit scenario.add_step( - "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="done") + "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="done"), ) - # Then metadata update - status_xml = status_template( - status="done", - status_detail="", - swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74", + + # Checker gets the SWHID + swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74" + status_xml = status_template(status="done", status_detail="", swhid=swhid,) + scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml) + + # Then the checker checks the metadata appeared on the website + scenario.add_step( + "get", + f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/" + f"?authority=deposit_client%20http://icinga-checker.example.org", + [{"swhid": swhid, "origin": origin}], ) + + # Then metadata update scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml) # internal deposit client does call status, then update metadata then status api scenario.add_step( @@ -234,6 +264,8 @@ delay """ + origin = compute_origin() + scenario = WebScenario() scenario.add_step( @@ -245,15 +277,21 @@ scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="loading"), ) + + # Deposit done, checker gets the SWHID + swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74" + status_xml = status_template(status="done", status_detail="", swhid=swhid,) + scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml) + + # Then the checker checks the metadata appeared on the website scenario.add_step( - "get", f"{BASE_URL}/testcol/42/status/", status_template(status="done"), + "get", + f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/" + f"?authority=deposit_client%20http://icinga-checker.example.org", + [{"swhid": swhid, "origin": origin}], ) + # Then metadata update - status_xml = status_template( - status="done", - status_detail="", - swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74", - ) scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml) # internal deposit client does call status, then update metadata then status api scenario.add_step( @@ -299,6 +337,7 @@ """Deposit creation passed, deposit metadata update failed """ + origin = compute_origin() scenario = WebScenario() scenario.add_step( @@ -310,9 +349,20 @@ scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="loading"), ) + + # Deposit done, checker gets the SWHID + swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74" + status_xml = status_template(status="done", status_detail="", swhid=swhid,) + scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml) + + # Then the checker checks the metadata appeared on the website scenario.add_step( - "get", f"{BASE_URL}/testcol/42/status/", status_template(status="done"), + "get", + f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/" + f"?authority=deposit_client%20http://icinga-checker.example.org", + [{"swhid": swhid, "origin": origin}], ) + # Then metadata update calls failed_status_xml = status_template( status="failed", # lying here @@ -357,6 +407,7 @@ """Deposit creation exceeded delays, no deposit update occurred. """ + origin = compute_origin() scenario = WebScenario() scenario.add_step( @@ -365,8 +416,18 @@ scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"), ) + + # Deposit done, checker gets the SWHID + swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74" + status_xml = status_template(status="done", status_detail="", swhid=swhid,) + scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml) + + # Then the checker checks the metadata appeared on the website scenario.add_step( - "get", f"{BASE_URL}/testcol/42/status/", status_template(status="done"), + "get", + f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/" + f"?authority=deposit_client%20http://icinga-checker.example.org", + [{"swhid": swhid, "origin": origin}], ) scenario.install_mock(requests_mock) @@ -399,6 +460,7 @@ def test_deposit_delay_critical( requests_mock, mocker, sample_archive, sample_metadata, mocked_time ): + origin = compute_origin() scenario = WebScenario() scenario.add_step( @@ -407,13 +469,25 @@ scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"), ) + + # Deposit done, checker gets the SWHID + swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74" + status_xml = status_template(status="done", status_detail="", swhid=swhid,) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", - status_template(status="done"), + status_xml, callback=lambda: time.sleep(60), ) + # Then the checker checks the metadata appeared on the website + scenario.add_step( + "get", + f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/" + f"?authority=deposit_client%20http://icinga-checker.example.org", + [{"swhid": swhid, "origin": origin}], + ) + scenario.install_mock(requests_mock) result = invoke( @@ -490,6 +564,60 @@ assert result.exit_code == 2, f"Unexpected output: {result.output}" +def test_deposit_metadata_missing( + requests_mock, mocker, sample_archive, sample_metadata, mocked_time +): + origin = compute_origin() + scenario = WebScenario() + + scenario.add_step( + "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited") + ) + scenario.add_step( + "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"), + ) + + # Deposit done, checker gets the SWHID + swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74" + status_xml = status_template(status="done", status_detail="", swhid=swhid,) + scenario.add_step( + "get", f"{BASE_URL}/testcol/42/status/", status_xml, + ) + + # Then the checker checks the metadata appeared on the website + scenario.add_step( + "get", + f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/" + f"?authority=deposit_client%20http://icinga-checker.example.org", + [{"swhid": swhid, "origin": "http://wrong-origin.example.org"}], + ) + + scenario.install_mock(requests_mock) + + result = invoke( + [ + "check-deposit", + *COMMON_OPTIONS, + "single", + "--archive", + sample_archive, + "--metadata", + sample_metadata, + ], + catch_exceptions=True, + ) + + assert result.output == ( + f"DEPOSIT CRITICAL - Deposited metadata on {swhid} with origin {origin}, " + f"missing from the list of origins: ['http://wrong-origin.example.org']\n" + "| 'load_time' = 10.00s\n" + "| 'total_time' = 20.00s\n" + "| 'upload_time' = 0.00s\n" + "| 'validation_time' = 10.00s\n" + ) + assert result.exit_code == 2, f"Unexpected output: {result.output}" + + def test_deposit_rejected( requests_mock, mocker, sample_archive, sample_metadata, mocked_time ):