diff --git a/swh/web/metrics/prometheus.py b/swh/web/metrics/prometheus.py index 3a26667b..e8004725 100644 --- a/swh/web/metrics/prometheus.py +++ b/swh/web/metrics/prometheus.py @@ -1,124 +1,147 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from itertools import product from prometheus_client import Gauge from prometheus_client.registry import CollectorRegistry from swh.web.save_code_now.models import ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_PENDING, SAVE_REQUEST_REJECTED, SAVE_TASK_FAILED, SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_RUNNING, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, SaveOriginRequest, ) from swh.web.save_code_now.origin_save import get_savable_visit_types +from swh.web.save_origin_webhooks.generic_receiver import SUPPORTED_FORGE_TYPES SWH_WEB_METRICS_REGISTRY = CollectorRegistry(auto_describe=True) SUBMITTED_SAVE_REQUESTS_METRIC = "swh_web_submitted_save_requests" _submitted_save_requests_gauge = Gauge( name=SUBMITTED_SAVE_REQUESTS_METRIC, documentation="Number of submitted origin save requests", labelnames=["status", "visit_type"], registry=SWH_WEB_METRICS_REGISTRY, ) +SUBMITTED_SAVE_REQUESTS_FROM_WEBHOOKS_METRIC = ( + "swh_web_submitted_save_requests_from_webhooks" +) + +_submitted_save_requests_from_webhooks_gauge = Gauge( + name=SUBMITTED_SAVE_REQUESTS_FROM_WEBHOOKS_METRIC, + documentation="Number of submitted origin save requests through forge webhook receivers", + labelnames=["status", "webhook_origin"], + registry=SWH_WEB_METRICS_REGISTRY, +) + ACCEPTED_SAVE_REQUESTS_METRIC = "swh_web_accepted_save_requests" _accepted_save_requests_gauge = Gauge( name=ACCEPTED_SAVE_REQUESTS_METRIC, documentation="Number of accepted origin save requests", labelnames=["load_task_status", "visit_type"], registry=SWH_WEB_METRICS_REGISTRY, ) # Metric on the delay of save code now request per status and visit_type. This is the # time difference between the save code now is requested and the time it got ingested. ACCEPTED_SAVE_REQUESTS_DELAY_METRIC = "swh_web_save_requests_delay_seconds" _accepted_save_requests_delay_gauge = Gauge( name=ACCEPTED_SAVE_REQUESTS_DELAY_METRIC, documentation="Save Requests Duration", labelnames=["load_task_status", "visit_type"], registry=SWH_WEB_METRICS_REGISTRY, ) def compute_save_requests_metrics() -> None: """Compute Prometheus metrics related to origin save requests: - Number of submitted origin save requests - Number of accepted origin save requests - Save Code Now requests delay between request time and actual time of ingestion """ request_statuses = ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING, ) load_task_statuses = ( SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED, SAVE_TASK_RUNNING, ) # for metrics, we want access to all visit types visit_types = get_savable_visit_types(privileged_user=True) labels_set = product(request_statuses, visit_types) for labels in labels_set: _submitted_save_requests_gauge.labels(*labels).set(0) + labels_set = product(request_statuses, SUPPORTED_FORGE_TYPES) + + for labels in labels_set: + _submitted_save_requests_from_webhooks_gauge.labels(*labels).set(0) + labels_set = product(load_task_statuses, visit_types) for labels in labels_set: _accepted_save_requests_gauge.labels(*labels).set(0) duration_load_task_statuses = ( SAVE_TASK_FAILED, SAVE_TASK_SUCCEEDED, ) for labels in product(duration_load_task_statuses, visit_types): _accepted_save_requests_delay_gauge.labels(*labels).set(0) for sor in SaveOriginRequest.objects.all(): if sor.status == SAVE_REQUEST_ACCEPTED: _accepted_save_requests_gauge.labels( load_task_status=sor.loading_task_status, visit_type=sor.visit_type, ).inc() _submitted_save_requests_gauge.labels( status=sor.status, visit_type=sor.visit_type ).inc() + if sor.from_webhook: + _submitted_save_requests_from_webhooks_gauge.labels( + status=sor.status, + webhook_origin=sor.webhook_origin, + ).inc() + if ( sor.loading_task_status in (SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED) and sor.visit_date is not None and sor.request_date is not None ): delay = sor.visit_date.timestamp() - sor.request_date.timestamp() _accepted_save_requests_delay_gauge.labels( load_task_status=sor.loading_task_status, visit_type=sor.visit_type, ).inc(delay) diff --git a/swh/web/metrics/tests/test_metrics.py b/swh/web/metrics/tests/test_metrics.py index 75a9ab7f..b9e5431b 100644 --- a/swh/web/metrics/tests/test_metrics.py +++ b/swh/web/metrics/tests/test_metrics.py @@ -1,137 +1,160 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import timedelta from itertools import product import random from prometheus_client.exposition import CONTENT_TYPE_LATEST import pytest from swh.web.metrics.prometheus import ( ACCEPTED_SAVE_REQUESTS_DELAY_METRIC, ACCEPTED_SAVE_REQUESTS_METRIC, + SUBMITTED_SAVE_REQUESTS_FROM_WEBHOOKS_METRIC, SUBMITTED_SAVE_REQUESTS_METRIC, get_savable_visit_types, ) from swh.web.save_code_now.models import ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_PENDING, SAVE_REQUEST_REJECTED, SAVE_TASK_FAILED, SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_RUNNING, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, SaveOriginRequest, ) +from swh.web.save_origin_webhooks.generic_receiver import SUPPORTED_FORGE_TYPES from swh.web.tests.django_asserts import assert_contains from swh.web.tests.helpers import check_http_get_response from swh.web.utils import reverse @pytest.mark.django_db def test_origin_save_metrics(client, swh_scheduler): visit_types = get_savable_visit_types() request_statuses = ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING, ) load_task_statuses = ( SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED, SAVE_TASK_RUNNING, ) - for _ in range(random.randint(50, 100)): + supported_forge_types = list(SUPPORTED_FORGE_TYPES) + + for i in range(random.randint(50, 100)): visit_type = random.choice(visit_types) request_satus = random.choice(request_statuses) load_task_status = random.choice(load_task_statuses) + from_webhook = i % 2 == 0 sor = SaveOriginRequest.objects.create( origin_url="origin", visit_type=visit_type, status=request_satus, loading_task_status=load_task_status, + from_webhook=from_webhook, + webhook_origin=random.choice(supported_forge_types) + if from_webhook + else None, ) if load_task_status in (SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED): delay = random.choice(range(60)) sor.visit_date = sor.request_date + timedelta(seconds=delay) sor.save() # Note that this injects dates in the future for the sake of the test only url = reverse("metrics-prometheus") resp = check_http_get_response( client, url, status_code=200, content_type=CONTENT_TYPE_LATEST ) accepted_requests = SaveOriginRequest.objects.filter(status=SAVE_REQUEST_ACCEPTED) labels_set = product(visit_types, load_task_statuses) for labels in labels_set: sor_count = accepted_requests.filter( visit_type=labels[0], loading_task_status=labels[1] ).count() metric_text = ( f"{ACCEPTED_SAVE_REQUESTS_METRIC}{{" f'load_task_status="{labels[1]}",' f'visit_type="{labels[0]}"}} {float(sor_count)}\n' ) assert_contains(resp, metric_text) labels_set = product(visit_types, request_statuses) for labels in labels_set: sor_count = SaveOriginRequest.objects.filter( visit_type=labels[0], status=labels[1] ).count() metric_text = ( f"{SUBMITTED_SAVE_REQUESTS_METRIC}{{" f'status="{labels[1]}",' f'visit_type="{labels[0]}"}} {float(sor_count)}\n' ) assert_contains(resp, metric_text) + labels_set = product(request_statuses, SUPPORTED_FORGE_TYPES) + for labels in labels_set: + sor_count = SaveOriginRequest.objects.filter( + status=labels[0], webhook_origin=labels[1] + ).count() + + metric_text = ( + f"{SUBMITTED_SAVE_REQUESTS_FROM_WEBHOOKS_METRIC}{{" + f'status="{labels[0]}",' + f'webhook_origin="{labels[1]}"}} {float(sor_count)}\n' + ) + + assert_contains(resp, metric_text) + # delay metrics save_requests = SaveOriginRequest.objects.all() labels_set = product( visit_types, ( SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED, ), ) for labels in labels_set: sors = save_requests.filter( visit_type=labels[0], loading_task_status=labels[1], visit_date__isnull=False, ) delay = 0 for sor in sors: delay += sor.visit_date.timestamp() - sor.request_date.timestamp() metric_delay_text = ( f"{ACCEPTED_SAVE_REQUESTS_DELAY_METRIC}{{" f'load_task_status="{labels[1]}",' f'visit_type="{labels[0]}"}} {float(delay)}\n' ) assert_contains(resp, metric_delay_text)