diff --git a/swh/web/common/origin_save.py b/swh/web/common/origin_save.py --- a/swh/web/common/origin_save.py +++ b/swh/web/common/origin_save.py @@ -500,7 +500,7 @@ - **name**: associated celery task name - **message**: relevant log message from task execution - - **duration**: task execution time (only if it succeeded) + - **delay**: task execution time (only if it succeeded) - **worker**: name of the worker that executed the task """ try: @@ -568,8 +568,8 @@ if results["hits"]["total"]["value"] >= 1: task_run_info = results["hits"]["hits"][-1]["_source"] if "swh_logging_args_runtime" in task_run_info: - duration = task_run_info["swh_logging_args_runtime"] - task_run["duration"] = duration + delay = task_run_info["swh_logging_args_runtime"] + task_run["delay"] = delay if "message" in task_run_info: task_run["message"] = task_run_info["message"] if "swh_logging_args_name" in task_run_info: @@ -622,9 +622,27 @@ ) +# Metric on the delay of save code now request per status and visit_type. This is the +# time difference between the save code now is requested and the time it got ingested. +ACCEPTED_SAVE_REQUESTS_DELAY_METRIC = "swh_web_save_requests_delay_seconds" + +_accepted_save_requests_delay_gauge = Gauge( + name=ACCEPTED_SAVE_REQUESTS_DELAY_METRIC, + documentation="Save Requests Duration", + labelnames=["load_task_status", "visit_type"], + registry=SWH_WEB_METRICS_REGISTRY, +) + + def compute_save_requests_metrics() -> None: - """Compute a couple of Prometheus metrics related to - origin save requests""" + """Compute Prometheus metrics related to origin save requests: + + - Number of submitted origin save requests + - Number of accepted origin save requests + - Save Code Now requests delay between request time and actual time of + successful/failed ingestion + + """ request_statuses = ( SAVE_REQUEST_ACCEPTED, @@ -653,6 +671,14 @@ for labels in labels_set: _accepted_save_requests_gauge.labels(*labels).set(0) + duration_load_task_statuses = ( + SAVE_TASK_FAILED, + SAVE_TASK_SUCCEEDED, + ) + + for labels in product(duration_load_task_statuses, visit_types): + _accepted_save_requests_delay_gauge.labels(*labels).set(0) + for sor in SaveOriginRequest.objects.all(): if sor.status == SAVE_REQUEST_ACCEPTED: _accepted_save_requests_gauge.labels( @@ -662,3 +688,13 @@ _submitted_save_requests_gauge.labels( status=sor.status, visit_type=sor.visit_type ).inc() + + if ( + sor.loading_task_status in (SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED) + and sor.visit_date is not None + and sor.request_date is not None + ): + delay = sor.visit_date.timestamp() - sor.request_date.timestamp() + _accepted_save_requests_delay_gauge.labels( + load_task_status=sor.loading_task_status, visit_type=sor.visit_type + ).inc(delay) diff --git a/swh/web/tests/misc/test_metrics.py b/swh/web/tests/misc/test_metrics.py --- a/swh/web/tests/misc/test_metrics.py +++ b/swh/web/tests/misc/test_metrics.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -22,6 +22,7 @@ SaveOriginRequest, ) from swh.web.common.origin_save import ( + ACCEPTED_SAVE_REQUESTS_DELAY_METRIC, ACCEPTED_SAVE_REQUESTS_METRIC, SUBMITTED_SAVE_REQUESTS_METRIC, get_savable_visit_types, @@ -97,3 +98,25 @@ ) assert_contains(resp, metric_text) + + # delay metrics + + labels_set = product(visit_types, (SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED,)) + for labels in labels_set: + sors = accepted_requests.filter( + visit_type=labels[0], + loading_task_status=labels[1], + visit_date__isnull=False, + ) + + delay = 0 + for sor in sors: + delay += sor.visit_date.timestamp() - sor.request_date.timestamp() + + metric_delay_text = ( + f"{ACCEPTED_SAVE_REQUESTS_DELAY_METRIC}{{" + f'load_task_status="{labels[1]}",' + f'visit_type="{labels[0]}"}} {float(delay)}\n' + ) + + assert_contains(resp, metric_delay_text)