diff --git a/PKG-INFO b/PKG-INFO index d173898..cecf969 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,29 +1,29 @@ Metadata-Version: 2.1 Name: swh.icinga_plugins -Version: 0.4.0 +Version: 0.4.1 Summary: Icinga plugins for Software Heritage infrastructure monitoring Home-page: https://forge.softwareheritage.org/diffusion/swh-icinga-plugins Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-icinga-plugins Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 3 - Alpha Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing License-File: LICENSE License-File: AUTHORS swh-icinga-plugins ================== Scripts for end-to-end monitoring of the SWH infrastructure diff --git a/swh.icinga_plugins.egg-info/PKG-INFO b/swh.icinga_plugins.egg-info/PKG-INFO index 81453f7..eb43cc8 100644 --- a/swh.icinga_plugins.egg-info/PKG-INFO +++ b/swh.icinga_plugins.egg-info/PKG-INFO @@ -1,29 +1,29 @@ Metadata-Version: 2.1 Name: swh.icinga-plugins -Version: 0.4.0 +Version: 0.4.1 Summary: Icinga plugins for Software Heritage infrastructure monitoring Home-page: https://forge.softwareheritage.org/diffusion/swh-icinga-plugins Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-icinga-plugins Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 3 - Alpha Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing License-File: LICENSE License-File: AUTHORS swh-icinga-plugins ================== Scripts for end-to-end monitoring of the SWH infrastructure diff --git a/swh/icinga_plugins/deposit.py b/swh/icinga_plugins/deposit.py index 17c5a57..fad334a 100644 --- a/swh/icinga_plugins/deposit.py +++ b/swh/icinga_plugins/deposit.py @@ -1,255 +1,252 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import sys import time from typing import Any, Dict, Optional import requests from swh.deposit.client import PublicApiDepositClient from .base_check import BaseCheck class DepositCheck(BaseCheck): TYPE = "DEPOSIT" DEFAULT_WARNING_THRESHOLD = 120 DEFAULT_CRITICAL_THRESHOLD = 3600 def __init__(self, obj): super().__init__(obj) self.api_url = obj["swh_web_url"].rstrip("/") self._poll_interval = obj["poll_interval"] self._archive_path = obj["archive"] self._metadata_path = obj["metadata"] self._collection = obj["collection"] self._slug: Optional[str] = None self._provider_url = obj["provider_url"] self._client = PublicApiDepositClient( { "url": obj["server"], "auth": {"username": obj["username"], "password": obj["password"]}, } ) def upload_deposit(self): slug = ( "check-deposit-%s" % datetime.datetime.fromtimestamp(time.time()).isoformat() ) result = self._client.deposit_create( archive=self._archive_path, metadata=self._metadata_path, collection=self._collection, in_progress=False, slug=slug, ) self._slug = slug self._deposit_id = result["deposit_id"] return result def update_deposit_with_metadata(self) -> Dict[str, Any]: """Trigger a metadata update on the deposit once it's completed. """ deposit = self.get_deposit_status() swhid = deposit["deposit_swh_id"] assert deposit["deposit_id"] == self._deposit_id # We can reuse the initial metadata file we already sent return self._client.deposit_update( self._collection, self._deposit_id, self._slug, metadata=self._metadata_path, swhid=swhid, ) def get_deposit_status(self): return self._client.deposit_status( collection=self._collection, deposit_id=self._deposit_id ) def wait_while_status(self, statuses, start_time, metrics, result): while result["deposit_status"] in statuses: metrics["total_time"] = time.time() - start_time if metrics["total_time"] > self.critical_threshold: self.print_result( "CRITICAL", f"Timed out while in status " f'{result["deposit_status"]} ' f'({metrics["total_time"]}s seconds since deposit ' f"started)", **metrics, ) sys.exit(2) time.sleep(self._poll_interval) result = self.get_deposit_status() return result def main(self): start_time = time.time() start_datetime = datetime.datetime.fromtimestamp( start_time, tz=datetime.timezone.utc ) metrics = {} # Upload the archive and metadata result = self.upload_deposit() metrics["upload_time"] = time.time() - start_time # Wait for validation result = self.wait_while_status(["deposited"], start_time, metrics, result) metrics["total_time"] = time.time() - start_time metrics["validation_time"] = metrics["total_time"] - metrics["upload_time"] # Check validation succeeded if result["deposit_status"] == "rejected": self.print_result( "CRITICAL", f'Deposit was rejected: {result["deposit_status_detail"]}', **metrics, ) return 2 # Wait for loading result = self.wait_while_status( ["verified", "loading"], start_time, metrics, result ) metrics["total_time"] = time.time() - start_time metrics["load_time"] = ( metrics["total_time"] - metrics["upload_time"] - metrics["validation_time"] ) # Check loading succeeded if result["deposit_status"] == "failed": self.print_result( "CRITICAL", f'Deposit loading failed: {result["deposit_status_detail"]}', **metrics, ) return 2 # Check for unexpected status if result["deposit_status"] != "done": self.print_result( "CRITICAL", f'Deposit got unexpected status: {result["deposit_status"]} ' f'({result["deposit_status_detail"]})', **metrics, ) return 2 # Get the SWHID if "deposit_swh_id" not in result: # if the deposit succeeded immediately (which is rare), it does not # contain the SWHID, so we need to re-fetch its status. result = self.get_deposit_status() if result.get("deposit_swh_id") is None: self.print_result( "CRITICAL", f"'deposit_swh_id' missing from result: {result!r}", **metrics, ) return 2 swhid = result["deposit_swh_id"] # Check for unexpected status if result["deposit_status"] != "done": self.print_result( "CRITICAL", f'Deposit status went from "done" to: {result["deposit_status"]} ' f'({result["deposit_status_detail"]})', **metrics, ) return 2 # Get metadata list from swh-web metadata_objects = requests.get( f"{self.api_url}/api/1/raw-extrinsic-metadata/swhid/{swhid}/" f"?authority=deposit_client%20{self._provider_url}" + f"&after={start_datetime.isoformat()}" ).json() expected_origin = f"{self._provider_url}/{self._slug}" - # Filter out objects that were clearly not created by this deposit (ie. created - # before the deposit started, or that are from unrelated origins) + # Filter out objects that were clearly not created by this deposit relevant_metadata_objects = [ - d - for d in metadata_objects - if d.get("origin") == expected_origin - and datetime.datetime.fromisoformat(d["discovery_date"]) >= start_datetime + d for d in metadata_objects if d.get("origin") == expected_origin ] if not relevant_metadata_objects: self.print_result( "CRITICAL", f"No recent metadata on {swhid} with origin {expected_origin} in: " f"{metadata_objects!r}", **metrics, ) return 2 # Check the metadata was loaded as-is metadata_url = relevant_metadata_objects[0]["metadata_url"] metadata_file = requests.get(metadata_url).content with open(self._metadata_path, "rb") as fd: expected_metadata_file = fd.read() if metadata_file != expected_metadata_file: self.print_result( "CRITICAL", f"Metadata on {swhid} with origin {expected_origin} " f"(at {metadata_url}) differs from uploaded Atom document " f"(at {self._metadata_path})", **metrics, ) return 2 # Everything went fine, check total time wasn't too large and # print result (status_code, status) = self.get_status(metrics["total_time"]) self.print_result( status, f'Deposit took {metrics["total_time"]:.2f}s and succeeded.', **metrics, ) if status_code != 0: # Stop if any problem in the initial scenario return status_code # Initial deposit is now completed, now we can update the deposit with metadata result = self.update_deposit_with_metadata() total_time = time.time() - start_time metrics_update = { "total_time": total_time, "update_time": ( total_time - metrics["upload_time"] - metrics["validation_time"] - metrics["load_time"] ), } if "error" in result: self.print_result( "CRITICAL", f'Deposit Metadata update failed: {result["error"]} ', **metrics_update, ) return 2 (status_code, status) = self.get_status(metrics_update["total_time"]) self.print_result( status, f'Deposit Metadata update took {metrics_update["update_time"]:.2f}s ' "and succeeded.", **metrics_update, ) return status_code diff --git a/swh/icinga_plugins/tests/conftest.py b/swh/icinga_plugins/tests/conftest.py index 5aa807a..ca98ab8 100644 --- a/swh/icinga_plugins/tests/conftest.py +++ b/swh/icinga_plugins/tests/conftest.py @@ -1,25 +1,23 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import time - import pytest @pytest.fixture def mocked_time(mocker): - start_time = time.time() + start_time = 1646413359.0 # 2022-03-04-17:02:39Z time_offset = 0 def fake_sleep(seconds): nonlocal time_offset time_offset += seconds def fake_time(): return start_time + time_offset mocker.patch("time.sleep", side_effect=fake_sleep) mocker.patch("time.time", side_effect=fake_time) diff --git a/swh/icinga_plugins/tests/test_deposit.py b/swh/icinga_plugins/tests/test_deposit.py index f25c0d2..1aaacae 100644 --- a/swh/icinga_plugins/tests/test_deposit.py +++ b/swh/icinga_plugins/tests/test_deposit.py @@ -1,870 +1,870 @@ # Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import io import os import tarfile import time from typing import Optional import pytest from swh.icinga_plugins.tests.utils import invoke from .web_scenario import WebScenario POLL_INTERVAL = 10 BASE_URL = "http://swh-deposit.example.org/1" BASE_WEB_URL = "mock://swh-web.example.org" PROVIDER_URL = "http://icinga-checker.example.org" COMMON_OPTIONS = [ "--server", BASE_URL, "--username", "test", "--password", "test", "--collection", "testcol", "--swh-web-url", BASE_WEB_URL, "--provider-url", PROVIDER_URL, ] SAMPLE_METADATA = """ Test Software swh test-software No One """ ENTRY_TEMPLATE = """ 42 2019-12-19 18:11:00 foo.tar.gz {status} http://purl.org/net/sword/package/SimpleZip """ STATUS_TEMPLATE = """ 42 {status} {status_detail}%s """ def compute_origin(): # This is the same origin the checker would compute, because we mock time.time # to be constant until time.sleep is called return ( PROVIDER_URL + "/check-deposit-%s" % datetime.datetime.fromtimestamp(time.time()).isoformat() ) def status_template( status: str, status_detail: str = "", swhid: Optional[str] = None ) -> str: """Generate a proper status template out of status, status_detail and optional swhid """ if swhid is not None: template = ( STATUS_TEMPLATE % f"\n {swhid}" ) return template.format(status=status, status_detail=status_detail, swhid=swhid) template = STATUS_TEMPLATE % "" return template.format(status=status, status_detail=status_detail) def test_status_template(): actual_status = status_template(status="deposited") assert ( actual_status == """ 42 deposited """ ) actual_status = status_template(status="verified", status_detail="detail") assert ( actual_status == """ 42 verified detail """ ) actual_status = status_template( status="done", swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74" ) assert ( actual_status == """ 42 done swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74 """ ) @pytest.fixture(scope="session") def tmp_path(tmp_path_factory): return tmp_path_factory.mktemp(__name__) @pytest.fixture(scope="session") def sample_metadata(tmp_path): """Returns a sample metadata file's path """ path = os.path.join(tmp_path, "metadata.xml") with open(path, "w") as fd: fd.write(SAMPLE_METADATA) return path @pytest.fixture(scope="session") def sample_archive(tmp_path): """Returns a sample archive's path """ path = os.path.join(tmp_path, "archive.tar.gz") with tarfile.open(path, "w:gz") as tf: tf.addfile(tarfile.TarInfo("hello.py"), io.BytesIO(b'print("Hello world")')) return path def test_deposit_immediate_success( requests_mock, mocker, sample_archive, sample_metadata, mocked_time ): """Both deposit creation and deposit metadata update passed without delays """ origin = compute_origin() scenario = WebScenario() status_xml = status_template( status="done", status_detail="", swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74", ) # Initial deposit scenario.add_step( "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="done"), ) # Checker gets the SWHID swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74" status_xml = status_template(status="done", status_detail="", swhid=swhid,) scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml) # Then the checker checks the metadata appeared on the website scenario.add_step( "get", f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/" - f"?authority=deposit_client%20http://icinga-checker.example.org", + f"?authority=deposit_client%20http://icinga-checker.example.org" + f"&after=2022-03-04T17:02:39+00:00", [ { "swhid": swhid, "origin": origin, "discovery_date": "2999-03-03T10:48:47+00:00", "metadata_url": f"{BASE_WEB_URL}/the-metadata-url", } ], ) scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA) # Then metadata update scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml) # internal deposit client does call status, then update metadata then status api scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_xml, ) scenario.add_step( "put", f"{BASE_URL}/testcol/42/atom/", status_xml, ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_xml, ) scenario.install_mock(requests_mock) result = invoke( [ "check-deposit", *COMMON_OPTIONS, "single", "--archive", sample_archive, "--metadata", sample_metadata, ] ) assert result.output == ( "DEPOSIT OK - Deposit took 0.00s and succeeded.\n" "| 'load_time' = 0.00s\n" "| 'total_time' = 0.00s\n" "| 'upload_time' = 0.00s\n" "| 'validation_time' = 0.00s\n" "DEPOSIT OK - Deposit Metadata update took 0.00s and succeeded.\n" "| 'total_time' = 0.00s\n" "| 'update_time' = 0.00s\n" ) assert result.exit_code == 0, f"Unexpected output: {result.output}" def test_deposit_delays( requests_mock, mocker, sample_archive, sample_metadata, mocked_time ): """Deposit creation passed with some delays, deposit metadata update passed without delay """ origin = compute_origin() scenario = WebScenario() scenario.add_step( "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited") ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"), ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="loading"), ) # Deposit done, checker gets the SWHID swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74" status_xml = status_template(status="done", status_detail="", swhid=swhid,) scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml) # Then the checker checks the metadata appeared on the website scenario.add_step( "get", f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/" - f"?authority=deposit_client%20http://icinga-checker.example.org", + f"?authority=deposit_client%20http://icinga-checker.example.org" + f"&after=2022-03-04T17:02:39+00:00", [ { "swhid": swhid, "origin": origin, "discovery_date": "2999-03-03T10:48:47+00:00", "metadata_url": f"{BASE_WEB_URL}/the-metadata-url", } ], ) scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA) # Then metadata update scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml) # internal deposit client does call status, then update metadata then status api scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_xml, ) scenario.add_step( "put", f"{BASE_URL}/testcol/42/atom/", status_xml, ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_xml, ) scenario.install_mock(requests_mock) result = invoke( [ "check-deposit", *COMMON_OPTIONS, "single", "--archive", sample_archive, "--metadata", sample_metadata, ] ) assert result.output == ( "DEPOSIT OK - Deposit took 30.00s and succeeded.\n" "| 'load_time' = 20.00s\n" "| 'total_time' = 30.00s\n" "| 'upload_time' = 0.00s\n" "| 'validation_time' = 10.00s\n" "DEPOSIT OK - Deposit Metadata update took 0.00s and succeeded.\n" "| 'total_time' = 30.00s\n" "| 'update_time' = 0.00s\n" ) assert result.exit_code == 0, f"Unexpected output: {result.output}" def test_deposit_then_metadata_update_failed( requests_mock, mocker, sample_archive, sample_metadata, mocked_time ): """Deposit creation passed, deposit metadata update failed """ origin = compute_origin() scenario = WebScenario() scenario.add_step( "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited") ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"), ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="loading"), ) # Deposit done, checker gets the SWHID swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74" status_xml = status_template(status="done", status_detail="", swhid=swhid,) scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml) # Then the checker checks the metadata appeared on the website scenario.add_step( "get", f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/" - f"?authority=deposit_client%20http://icinga-checker.example.org", + f"?authority=deposit_client%20http://icinga-checker.example.org" + f"&after=2022-03-04T17:02:39+00:00", [ { "swhid": swhid, "origin": origin, "discovery_date": "2999-03-03T10:48:47+00:00", "metadata_url": f"{BASE_WEB_URL}/the-metadata-url", } ], ) scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA) # Then metadata update calls failed_status_xml = status_template( status="failed", # lying here status_detail="Failure to ingest", swhid="swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74", ) scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", failed_status_xml) scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", failed_status_xml) scenario.install_mock(requests_mock) result = invoke( [ "check-deposit", *COMMON_OPTIONS, "single", "--archive", sample_archive, "--metadata", sample_metadata, ], catch_exceptions=True, ) assert result.output == ( "DEPOSIT OK - Deposit took 30.00s and succeeded.\n" "| 'load_time' = 20.00s\n" "| 'total_time' = 30.00s\n" "| 'upload_time' = 0.00s\n" "| 'validation_time' = 10.00s\n" "DEPOSIT CRITICAL - Deposit Metadata update failed: You can only update " "metadata on deposit with status 'done' \n" "| 'total_time' = 30.00s\n" "| 'update_time' = 0.00s\n" ) assert result.exit_code == 2, f"Unexpected output: {result.output}" def test_deposit_delay_warning( requests_mock, mocker, sample_archive, sample_metadata, mocked_time ): """Deposit creation exceeded delays, no deposit update occurred. """ origin = compute_origin() scenario = WebScenario() scenario.add_step( "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited") ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"), ) # Deposit done, checker gets the SWHID swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74" status_xml = status_template(status="done", status_detail="", swhid=swhid,) scenario.add_step("get", f"{BASE_URL}/testcol/42/status/", status_xml) # Then the checker checks the metadata appeared on the website scenario.add_step( "get", f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/" - f"?authority=deposit_client%20http://icinga-checker.example.org", + f"?authority=deposit_client%20http://icinga-checker.example.org" + f"&after=2022-03-04T17:02:39+00:00", [ { "swhid": swhid, "origin": origin, "discovery_date": "2999-03-03T10:48:47+00:00", "metadata_url": f"{BASE_WEB_URL}/the-metadata-url", } ], ) scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA) scenario.install_mock(requests_mock) result = invoke( [ "--warning", "15", "check-deposit", *COMMON_OPTIONS, "single", "--archive", sample_archive, "--metadata", sample_metadata, ], catch_exceptions=True, ) assert result.output == ( "DEPOSIT WARNING - Deposit took 20.00s and succeeded.\n" "| 'load_time' = 10.00s\n" "| 'total_time' = 20.00s\n" "| 'upload_time' = 0.00s\n" "| 'validation_time' = 10.00s\n" ) assert result.exit_code == 1, f"Unexpected output: {result.output}" def test_deposit_delay_critical( requests_mock, mocker, sample_archive, sample_metadata, mocked_time ): origin = compute_origin() scenario = WebScenario() scenario.add_step( "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited") ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"), ) # Deposit done, checker gets the SWHID swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74" status_xml = status_template(status="done", status_detail="", swhid=swhid,) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_xml, callback=lambda: time.sleep(60), ) # Then the checker checks the metadata appeared on the website scenario.add_step( "get", f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/" - f"?authority=deposit_client%20http://icinga-checker.example.org", + f"?authority=deposit_client%20http://icinga-checker.example.org" + f"&after=2022-03-04T17:02:39+00:00", [ { "swhid": swhid, "origin": origin, "discovery_date": "2999-03-03T10:48:47+00:00", "metadata_url": f"{BASE_WEB_URL}/the-metadata-url", } ], ) scenario.add_step("get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA) scenario.install_mock(requests_mock) result = invoke( [ "--critical", "50", "check-deposit", *COMMON_OPTIONS, "single", "--archive", sample_archive, "--metadata", sample_metadata, ], catch_exceptions=True, ) assert result.output == ( "DEPOSIT CRITICAL - Deposit took 80.00s and succeeded.\n" "| 'load_time' = 70.00s\n" "| 'total_time' = 80.00s\n" "| 'upload_time' = 0.00s\n" "| 'validation_time' = 10.00s\n" ) assert result.exit_code == 2, f"Unexpected output: {result.output}" def test_deposit_timeout( requests_mock, mocker, sample_archive, sample_metadata, mocked_time ): scenario = WebScenario() scenario.add_step( "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited"), callback=lambda: time.sleep(1500), ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"), callback=lambda: time.sleep(1500), ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="loading"), callback=lambda: time.sleep(1500), ) scenario.install_mock(requests_mock) result = invoke( [ "check-deposit", *COMMON_OPTIONS, "single", "--archive", sample_archive, "--metadata", sample_metadata, ], catch_exceptions=True, ) assert result.output == ( "DEPOSIT CRITICAL - Timed out while in status loading " "(4520.0s seconds since deposit started)\n" "| 'total_time' = 4520.00s\n" "| 'upload_time' = 1500.00s\n" "| 'validation_time' = 1510.00s\n" ) assert result.exit_code == 2, f"Unexpected output: {result.output}" def test_deposit_metadata_missing( requests_mock, mocker, sample_archive, sample_metadata, mocked_time ): origin = compute_origin() scenario = WebScenario() scenario.add_step( "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited") ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"), ) # Deposit done, checker gets the SWHID swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74" status_xml = status_template(status="done", status_detail="", swhid=swhid,) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_xml, ) # Then the checker checks the metadata appeared on the website metadata_list = [ { # Filtered out, because wrong origin "swhid": swhid, "origin": "http://wrong-origin.example.org", "discovery_date": "2999-03-03T10:48:47+00:00", "metadata_url": f"{BASE_WEB_URL}/the-metadata-url", }, - { - # Filtered out, because too old - "swhid": swhid, - "origin": origin, - "discovery_date": "2022-03-03T09:48:47+00:00", - "metadata_url": f"{BASE_WEB_URL}/the-metadata-url", - }, ] scenario.add_step( "get", f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/" - f"?authority=deposit_client%20http://icinga-checker.example.org", + f"?authority=deposit_client%20http://icinga-checker.example.org" + f"&after=2022-03-04T17:02:39+00:00", metadata_list, ) scenario.install_mock(requests_mock) result = invoke( [ "check-deposit", *COMMON_OPTIONS, "single", "--archive", sample_archive, "--metadata", sample_metadata, ], catch_exceptions=True, ) assert result.output == ( f"DEPOSIT CRITICAL - No recent metadata on {swhid} with origin {origin} in: " f"{metadata_list!r}\n" "| 'load_time' = 10.00s\n" "| 'total_time' = 20.00s\n" "| 'upload_time' = 0.00s\n" "| 'validation_time' = 10.00s\n" ) assert result.exit_code == 2, f"Unexpected output: {result.output}" def test_deposit_metadata_corrupt( requests_mock, mocker, sample_archive, sample_metadata, mocked_time ): origin = compute_origin() scenario = WebScenario() scenario.add_step( "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited") ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"), ) # Deposit done, checker gets the SWHID swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74" status_xml = status_template(status="done", status_detail="", swhid=swhid,) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_xml, ) # Then the checker checks the metadata appeared on the website metadata_list = [ { "swhid": swhid, "origin": origin, "discovery_date": "2999-03-03T09:48:47+00:00", "metadata_url": f"{BASE_WEB_URL}/the-metadata-url", }, ] scenario.add_step( "get", f"{BASE_WEB_URL}/api/1/raw-extrinsic-metadata/swhid/{swhid}/" - f"?authority=deposit_client%20http://icinga-checker.example.org", + f"?authority=deposit_client%20http://icinga-checker.example.org" + f"&after=2022-03-04T17:02:39+00:00", metadata_list, ) scenario.add_step( "get", f"{BASE_WEB_URL}/the-metadata-url", SAMPLE_METADATA[0:-1], # corrupting the metadata by dropping the last byte ) scenario.install_mock(requests_mock) result = invoke( [ "check-deposit", *COMMON_OPTIONS, "single", "--archive", sample_archive, "--metadata", sample_metadata, ], catch_exceptions=True, ) assert result.output == ( f"DEPOSIT CRITICAL - Metadata on {swhid} with origin {origin} (at " f"{BASE_WEB_URL}/the-metadata-url) differs from uploaded Atom document (at " f"{sample_metadata})\n" "| 'load_time' = 10.00s\n" "| 'total_time' = 20.00s\n" "| 'upload_time' = 0.00s\n" "| 'validation_time' = 10.00s\n" ) assert result.exit_code == 2, f"Unexpected output: {result.output}" def test_deposit_rejected( requests_mock, mocker, sample_archive, sample_metadata, mocked_time ): scenario = WebScenario() scenario.add_step( "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited") ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="rejected", status_detail="booo"), ) scenario.install_mock(requests_mock) result = invoke( [ "check-deposit", *COMMON_OPTIONS, "single", "--archive", sample_archive, "--metadata", sample_metadata, ], catch_exceptions=True, ) assert result.output == ( "DEPOSIT CRITICAL - Deposit was rejected: booo\n" "| 'total_time' = 10.00s\n" "| 'upload_time' = 0.00s\n" "| 'validation_time' = 10.00s\n" ) assert result.exit_code == 2, f"Unexpected output: {result.output}" def test_deposit_failed( requests_mock, mocker, sample_archive, sample_metadata, mocked_time ): scenario = WebScenario() scenario.add_step( "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited") ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"), ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="loading"), ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="failed", status_detail="booo"), ) scenario.install_mock(requests_mock) result = invoke( [ "check-deposit", *COMMON_OPTIONS, "single", "--archive", sample_archive, "--metadata", sample_metadata, ], catch_exceptions=True, ) assert result.output == ( "DEPOSIT CRITICAL - Deposit loading failed: booo\n" "| 'load_time' = 20.00s\n" "| 'total_time' = 30.00s\n" "| 'upload_time' = 0.00s\n" "| 'validation_time' = 10.00s\n" ) assert result.exit_code == 2, f"Unexpected output: {result.output}" def test_deposit_unexpected_status( requests_mock, mocker, sample_archive, sample_metadata, mocked_time ): scenario = WebScenario() scenario.add_step( "post", f"{BASE_URL}/testcol/", ENTRY_TEMPLATE.format(status="deposited") ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="verified"), ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="loading"), ) scenario.add_step( "get", f"{BASE_URL}/testcol/42/status/", status_template(status="what", status_detail="booo"), ) scenario.install_mock(requests_mock) result = invoke( [ "check-deposit", *COMMON_OPTIONS, "single", "--archive", sample_archive, "--metadata", sample_metadata, ], catch_exceptions=True, ) assert result.output == ( "DEPOSIT CRITICAL - Deposit got unexpected status: what (booo)\n" "| 'load_time' = 20.00s\n" "| 'total_time' = 30.00s\n" "| 'upload_time' = 0.00s\n" "| 'validation_time' = 10.00s\n" ) assert result.exit_code == 2, f"Unexpected output: {result.output}" diff --git a/swh/icinga_plugins/tests/test_vault.py b/swh/icinga_plugins/tests/test_vault.py index 04cece2..e421a90 100644 --- a/swh/icinga_plugins/tests/test_vault.py +++ b/swh/icinga_plugins/tests/test_vault.py @@ -1,489 +1,530 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import io import tarfile import time from swh.icinga_plugins.tests.utils import invoke from .web_scenario import WebScenario DIR_ID = "ab" * 20 url_api = f"mock://swh-web.example.org/api/1/vault/directory/{DIR_ID}/" url_fetch = f"mock://swh-web.example.org/api/1/vault/directory/{DIR_ID}/raw/" def _make_tarfile(): fd = io.BytesIO() with tarfile.open(fileobj=fd, mode="w:gz") as tf: tf.addfile(tarfile.TarInfo(f"swh:1:dir:{DIR_ID}/README"), b"this is a readme\n") tarinfo = tarfile.TarInfo(f"swh:1:dir:{DIR_ID}") tarinfo.type = tarfile.DIRTYPE tf.addfile(tarinfo) return fd.getvalue() TARBALL = _make_tarfile() response_pending = { "obj_id": DIR_ID, "obj_type": "directory", "progress_message": "foo", "status": "pending", } response_done = { "fetch_url": url_fetch, "id": 9, "obj_id": DIR_ID, "obj_type": "directory", "status": "done", } response_done_no_fetch = { "id": 9, "obj_id": DIR_ID, "obj_type": "directory", "status": "done", } response_failed = { "obj_id": DIR_ID, "obj_type": "directory", "progress_message": "foobar", "status": "failed", } response_unknown_status = { "obj_id": DIR_ID, "obj_type": "directory", "progress_message": "what", "status": "boo", } class FakeStorage: def __init__(self, foo, **kwargs): pass def directory_get_random(self): return bytes.fromhex(DIR_ID) def test_vault_immediate_success(requests_mock, mocker, mocked_time): scenario = WebScenario() scenario.add_step("get", url_api, {}, status_code=404) scenario.add_step("post", url_api, response_pending) scenario.add_step("get", url_api, response_done) scenario.add_step( "get", url_fetch, TARBALL, headers={"Content-Type": "application/gzip"} ) scenario.install_mock(requests_mock) get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage") get_storage_mock.side_effect = FakeStorage result = invoke( [ "check-vault", "--swh-web-url", "mock://swh-web.example.org", "--swh-storage-url", "foo://example.org", "directory", ] ) assert result.output == ( f"VAULT OK - cooking directory {DIR_ID} took " f"10.00s and succeeded.\n" f"| 'total_time' = 10.00s\n" ) assert result.exit_code == 0, result.output def test_vault_delayed_success(requests_mock, mocker, mocked_time): scenario = WebScenario() scenario.add_step("get", url_api, {}, status_code=404) scenario.add_step("post", url_api, response_pending) scenario.add_step("get", url_api, response_pending) scenario.add_step("get", url_api, response_done) scenario.add_step( "get", url_fetch, TARBALL, headers={"Content-Type": "application/gzip"} ) scenario.install_mock(requests_mock) get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage") get_storage_mock.side_effect = FakeStorage result = invoke( [ "check-vault", "--swh-web-url", "mock://swh-web.example.org", "--swh-storage-url", "foo://example.org", "directory", ] ) assert result.output == ( f"VAULT OK - cooking directory {DIR_ID} took " f"20.00s and succeeded.\n" f"| 'total_time' = 20.00s\n" ) assert result.exit_code == 0, result.output def test_vault_failure(requests_mock, mocker, mocked_time): scenario = WebScenario() scenario.add_step("get", url_api, {}, status_code=404) scenario.add_step("post", url_api, response_pending) scenario.add_step("get", url_api, response_failed) scenario.install_mock(requests_mock) get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage") get_storage_mock.side_effect = FakeStorage result = invoke( [ "check-vault", "--swh-web-url", "mock://swh-web.example.org", "--swh-storage-url", "foo://example.org", "directory", ], catch_exceptions=True, ) assert result.output == ( f"VAULT CRITICAL - cooking directory {DIR_ID} took " f"10.00s and failed with: foobar\n" f"| 'total_time' = 10.00s\n" ) assert result.exit_code == 2, result.output def test_vault_unknown_status(requests_mock, mocker, mocked_time): scenario = WebScenario() scenario.add_step("get", url_api, {}, status_code=404) scenario.add_step("post", url_api, response_pending) scenario.add_step("get", url_api, response_unknown_status) scenario.install_mock(requests_mock) get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage") get_storage_mock.side_effect = FakeStorage result = invoke( [ "check-vault", "--swh-web-url", "mock://swh-web.example.org", "--swh-storage-url", "foo://example.org", "directory", ], catch_exceptions=True, ) assert result.output == ( f"VAULT CRITICAL - cooking directory {DIR_ID} took " f"10.00s and resulted in unknown status: boo\n" f"| 'total_time' = 10.00s\n" ) assert result.exit_code == 2, result.output def test_vault_timeout(requests_mock, mocker, mocked_time): scenario = WebScenario() scenario.add_step("get", url_api, {}, status_code=404) scenario.add_step("post", url_api, response_pending) scenario.add_step("get", url_api, response_pending) scenario.add_step( "get", url_api, response_pending, callback=lambda: time.sleep(4000) ) scenario.install_mock(requests_mock) get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage") get_storage_mock.side_effect = FakeStorage result = invoke( [ "check-vault", "--swh-web-url", "mock://swh-web.example.org", "--swh-storage-url", "foo://example.org", "directory", ], catch_exceptions=True, ) assert result.output == ( f"VAULT CRITICAL - cooking directory {DIR_ID} took more than " f"4020.00s and has status: foo\n" f"| 'total_time' = 4020.00s\n" ) assert result.exit_code == 2, result.output def test_vault_cached_directory(requests_mock, mocker, mocked_time): """First serves a directory that's already in the cache, to test that vault_check requests another one.""" scenario = WebScenario() scenario.add_step("get", url_api, {}, status_code=200) scenario.add_step("get", url_api, {}, status_code=404) scenario.add_step("post", url_api, response_pending) scenario.add_step("get", url_api, response_done) scenario.add_step( "get", url_fetch, TARBALL, headers={"Content-Type": "application/gzip"} ) scenario.install_mock(requests_mock) get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage") get_storage_mock.side_effect = FakeStorage result = invoke( [ "check-vault", "--swh-web-url", "mock://swh-web.example.org", "--swh-storage-url", "foo://example.org", "directory", ] ) assert result.output == ( f"VAULT OK - cooking directory {DIR_ID} took " f"10.00s and succeeded.\n" f"| 'total_time' = 10.00s\n" ) assert result.exit_code == 0, result.output def test_vault_no_directory(requests_mock, mocker, mocked_time): """Tests with an empty storage""" scenario = WebScenario() scenario.install_mock(requests_mock) get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage") get_storage_mock.side_effect = FakeStorage mocker.patch(f"{__name__}.FakeStorage.directory_get_random", return_value=None) result = invoke( [ "check-vault", "--swh-web-url", "mock://swh-web.example.org", "--swh-storage-url", "foo://example.org", "directory", ], catch_exceptions=True, ) assert result.output == ("VAULT CRITICAL - No directory exists in the archive.\n") assert result.exit_code == 2, result.output def test_vault_fetch_failed(requests_mock, mocker, mocked_time): scenario = WebScenario() scenario.add_step("get", url_api, {}, status_code=404) scenario.add_step("post", url_api, response_pending) scenario.add_step("get", url_api, response_done) scenario.add_step( "get", url_fetch, "", status_code=500, headers={"Content-Type": "application/gzip"}, ) scenario.install_mock(requests_mock) get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage") get_storage_mock.side_effect = FakeStorage result = invoke( [ "check-vault", "--swh-web-url", "mock://swh-web.example.org", "--swh-storage-url", "foo://example.org", "directory", ], catch_exceptions=True, ) assert result.output == ( f"VAULT CRITICAL - cooking directory {DIR_ID} took " f"10.00s and succeeded, but fetch failed with status code 500.\n" f"| 'total_time' = 10.00s\n" ) assert result.exit_code == 2, result.output def test_vault_fetch_missing_content_type(requests_mock, mocker, mocked_time): scenario = WebScenario() scenario.add_step("get", url_api, {}, status_code=404) scenario.add_step("post", url_api, response_pending) scenario.add_step("get", url_api, response_done) scenario.add_step("get", url_fetch, "") scenario.install_mock(requests_mock) get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage") get_storage_mock.side_effect = FakeStorage result = invoke( [ "check-vault", "--swh-web-url", "mock://swh-web.example.org", "--swh-storage-url", "foo://example.org", "directory", ], catch_exceptions=True, ) assert result.output == ( "VAULT CRITICAL - Unexpected Content-Type when downloading bundle: None\n" "| 'total_time' = 10.00s\n" ) assert result.exit_code == 2, result.output def test_vault_corrupt_tarball_gzip(requests_mock, mocker, mocked_time): scenario = WebScenario() scenario.add_step("get", url_api, {}, status_code=404) scenario.add_step("post", url_api, response_pending) scenario.add_step("get", url_api, response_pending) scenario.add_step("get", url_api, response_done) scenario.add_step( "get", url_fetch, b"this-is-not-a-tarball", - headers={"Content-Type": "application/gzip", "Content-Length": "100000"}, + headers={"Content-Type": "application/gzip"}, ) scenario.install_mock(requests_mock) get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage") get_storage_mock.side_effect = FakeStorage result = invoke( [ "check-vault", "--swh-web-url", "mock://swh-web.example.org", "--swh-storage-url", "foo://example.org", "directory", ], catch_exceptions=True, ) assert result.output == ( - "VAULT CRITICAL - Error while reading tarball: not a gzip file\n" + "VAULT CRITICAL - ReadError while reading tarball: not a gzip file\n" "| 'total_time' = 20.00s\n" ) assert result.exit_code == 2, result.output def test_vault_corrupt_tarball_member(requests_mock, mocker, mocked_time): fd = io.BytesIO() with tarfile.open(fileobj=fd, mode="w:gz") as tf: tf.addfile(tarfile.TarInfo("wrong_dir_name/README"), b"this is a readme\n") tarball = fd.getvalue() scenario = WebScenario() scenario.add_step("get", url_api, {}, status_code=404) scenario.add_step("post", url_api, response_pending) scenario.add_step("get", url_api, response_pending) scenario.add_step("get", url_api, response_done) scenario.add_step( - "get", - url_fetch, - tarball, - headers={"Content-Type": "application/gzip", "Content-Length": "100000"}, + "get", url_fetch, tarball, headers={"Content-Type": "application/gzip"}, ) scenario.install_mock(requests_mock) get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage") get_storage_mock.side_effect = FakeStorage result = invoke( [ "check-vault", "--swh-web-url", "mock://swh-web.example.org", "--swh-storage-url", "foo://example.org", "directory", ], catch_exceptions=True, ) assert result.output == ( "VAULT CRITICAL - Unexpected member in tarball: wrong_dir_name/README\n" "| 'total_time' = 20.00s\n" ) assert result.exit_code == 2, result.output +def test_vault_empty_tarball(requests_mock, mocker, mocked_time): + fd = io.BytesIO() + with tarfile.open(fileobj=fd, mode="w:gz"): + pass + tarball = fd.getvalue() + print(tarball) + + scenario = WebScenario() + + scenario.add_step("get", url_api, {}, status_code=404) + scenario.add_step("post", url_api, response_pending) + scenario.add_step("get", url_api, response_pending) + scenario.add_step("get", url_api, response_done) + scenario.add_step( + "get", url_fetch, tarball, headers={"Content-Type": "application/gzip"}, + ) + + scenario.install_mock(requests_mock) + + get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage") + get_storage_mock.side_effect = FakeStorage + + result = invoke( + [ + "check-vault", + "--swh-web-url", + "mock://swh-web.example.org", + "--swh-storage-url", + "foo://example.org", + "directory", + ], + catch_exceptions=True, + ) + + # This error message will need to be updated when https://bugs.python.org/issue46922 + # is resolved. + assert result.output == ( + "VAULT CRITICAL - StreamError while reading tarball (empty file?): " + "seeking backwards is not allowed\n" + "| 'total_time' = 20.00s\n" + ) + assert result.exit_code == 2, result.output + + def test_vault_no_fetch_url(requests_mock, mocker, mocked_time): scenario = WebScenario() scenario.add_step("get", url_api, {}, status_code=404) scenario.add_step("post", url_api, response_pending) scenario.add_step("get", url_api, response_done_no_fetch) scenario.install_mock(requests_mock) get_storage_mock = mocker.patch("swh.icinga_plugins.vault.get_storage") get_storage_mock.side_effect = FakeStorage result = invoke( [ "check-vault", "--swh-web-url", "mock://swh-web.example.org", "--swh-storage-url", "foo://example.org", "directory", ], catch_exceptions=True, ) assert result.output == ( f"VAULT CRITICAL - cooking directory {DIR_ID} took 10.00s and succeeded, " f"but API response did not contain a fetch_url.\n" f"| 'total_time' = 10.00s\n" ) assert result.exit_code == 2, result.output diff --git a/swh/icinga_plugins/vault.py b/swh/icinga_plugins/vault.py index 3db33cc..25d8693 100644 --- a/swh/icinga_plugins/vault.py +++ b/swh/icinga_plugins/vault.py @@ -1,158 +1,174 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import tarfile import time import requests from swh.storage import get_storage from .base_check import BaseCheck class NoDirectory(Exception): pass class VaultCheck(BaseCheck): TYPE = "VAULT" DEFAULT_WARNING_THRESHOLD = 0 DEFAULT_CRITICAL_THRESHOLD = 3600 def __init__(self, obj): super().__init__(obj) self._swh_storage = get_storage("remote", url=obj["swh_storage_url"]) self._swh_web_url = obj["swh_web_url"] self._poll_interval = obj["poll_interval"] def _url_for_dir(self, dir_id): return self._swh_web_url + f"/api/1/vault/directory/{dir_id.hex()}/" def _pick_directory(self): dir_ = self._swh_storage.directory_get_random() if dir_ is None: raise NoDirectory() return dir_ def _pick_uncached_directory(self): while True: dir_id = self._pick_directory() response = requests.get(self._url_for_dir(dir_id)) if response.status_code == 404: return dir_id def main(self): try: dir_id = self._pick_uncached_directory() except NoDirectory: self.print_result("CRITICAL", "No directory exists in the archive.") return 2 start_time = time.time() total_time = 0 response = requests.post(self._url_for_dir(dir_id)) assert response.status_code == 200, (response, response.text) result = response.json() while result["status"] in ("new", "pending"): time.sleep(self._poll_interval) response = requests.get(self._url_for_dir(dir_id)) assert response.status_code == 200, (response, response.text) result = response.json() total_time = time.time() - start_time if total_time > self.critical_threshold: self.print_result( "CRITICAL", f"cooking directory {dir_id.hex()} took more than " f"{total_time:.2f}s and has status: " f'{result["progress_message"]}', total_time=total_time, ) return 2 if result["status"] == "failed": self.print_result( "CRITICAL", f"cooking directory {dir_id.hex()} took {total_time:.2f}s " f'and failed with: {result["progress_message"]}', total_time=total_time, ) return 2 elif result["status"] != "done": self.print_result( "CRITICAL", f"cooking directory {dir_id.hex()} took {total_time:.2f}s " f'and resulted in unknown status: {result["status"]}', total_time=total_time, ) return 2 (status_code, status) = self.get_status(total_time) if "fetch_url" not in result: self.print_result( "CRITICAL", f"cooking directory {dir_id.hex()} took {total_time:.2f}s " f"and succeeded, but API response did not contain a fetch_url.", total_time=total_time, ) return 2 with requests.get(result["fetch_url"], stream=True) as fetch_response: try: fetch_response.raise_for_status() except requests.HTTPError: self.print_result( "CRITICAL", f"cooking directory {dir_id.hex()} took {total_time:.2f}s " f"and succeeded, but fetch failed with status code " f"{fetch_response.status_code}.", total_time=total_time, ) return 2 content_type = fetch_response.headers.get("Content-Type") if content_type != "application/gzip": self.print_result( "CRITICAL", f"Unexpected Content-Type when downloading bundle: {content_type}", total_time=total_time, ) return 2 try: - with tarfile.open(fileobj=fetch_response.raw, mode="r:gz") as tf: + with tarfile.open(fileobj=fetch_response.raw, mode="r|gz") as tf: # Note that we are streaming the tarfile from the network, # so we are allowed at most one pass on the tf object; # and the sooner we close it the better. # Fortunately, checking only the first member is good enough: tarinfo = tf.next() swhid = f"swh:1:dir:{dir_id.hex()}" if tarinfo.name != swhid and not tarinfo.name.startswith( f"{swhid}/" ): self.print_result( "CRITICAL", f"Unexpected member in tarball: {tarinfo.name}", total_time=total_time, ) return 2 except tarfile.ReadError as e: self.print_result( "CRITICAL", - f"Error while reading tarball: {e}", + f"ReadError while reading tarball: {e}", + total_time=total_time, + ) + return 2 + except tarfile.StreamError as e: + if e.args[0] == "seeking backwards is not allowed": + # Probably https://bugs.python.org/issue46922 + self.print_result( + "CRITICAL", + f"StreamError while reading tarball (empty file?): {e}", + total_time=total_time, + ) + return 2 + + self.print_result( + "CRITICAL", + f"StreamError while reading tarball: {e}", total_time=total_time, ) return 2 self.print_result( status, f"cooking directory {dir_id.hex()} took {total_time:.2f}s " f"and succeeded.", total_time=total_time, ) return status_code