Changeset View
Standalone View
swh/web/common/origin_save.py
# Copyright (C) 2018-2019 The Software Heritage developers | # Copyright (C) 2018-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from bisect import bisect_right | from bisect import bisect_right | ||||
from datetime import datetime, timezone, timedelta | from datetime import datetime, timezone, timedelta | ||||
from itertools import product | |||||
import json | import json | ||||
import logging | import logging | ||||
from django.core.exceptions import ObjectDoesNotExist | from django.core.exceptions import ObjectDoesNotExist | ||||
from django.core.exceptions import ValidationError | from django.core.exceptions import ValidationError | ||||
from django.core.validators import URLValidator | from django.core.validators import URLValidator | ||||
from django.utils.html import escape | from django.utils.html import escape | ||||
from prometheus_client import Gauge | |||||
import requests | import requests | ||||
import sentry_sdk | import sentry_sdk | ||||
from swh.web import config | from swh.web import config | ||||
from swh.web.common import service | from swh.web.common import service | ||||
from swh.web.common.exc import BadInputExc, ForbiddenExc, NotFoundExc | from swh.web.common.exc import BadInputExc, ForbiddenExc, NotFoundExc | ||||
from swh.web.common.models import ( | from swh.web.common.models import ( | ||||
SaveUnauthorizedOrigin, SaveAuthorizedOrigin, SaveOriginRequest, | SaveUnauthorizedOrigin, SaveAuthorizedOrigin, SaveOriginRequest, | ||||
SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING, | SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING, | ||||
SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, | SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, | ||||
SAVE_TASK_SUCCEED, SAVE_TASK_FAILED, SAVE_TASK_RUNNING | SAVE_TASK_SUCCEED, SAVE_TASK_FAILED, SAVE_TASK_RUNNING, | ||||
SAVE_TASK_NOT_CREATED | |||||
) | ) | ||||
from swh.web.common.origin_visits import get_origin_visits | from swh.web.common.origin_visits import get_origin_visits | ||||
from swh.web.common.utils import parse_timestamp | from swh.web.common.utils import parse_timestamp, SWH_WEB_METRICS_REGISTRY | ||||
from swh.scheduler.utils import create_oneshot_task_dict | from swh.scheduler.utils import create_oneshot_task_dict | ||||
scheduler = config.scheduler() | scheduler = config.scheduler() | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
▲ Show 20 Lines • Show All 484 Lines • ▼ Show 20 Lines | try: | ||||
task_run['worker'] = task_run_info['hostname'] | task_run['worker'] = task_run_info['hostname'] | ||||
elif 'host' in task_run_info: | elif 'host' in task_run_info: | ||||
task_run['worker'] = task_run_info['host'] | task_run['worker'] = task_run_info['host'] | ||||
except Exception as exc: | except Exception as exc: | ||||
logger.warning('Request to Elasticsearch failed\n%s', exc) | logger.warning('Request to Elasticsearch failed\n%s', exc) | ||||
sentry_sdk.capture_exception(exc) | sentry_sdk.capture_exception(exc) | ||||
return task_run | return task_run | ||||
SUBMITTED_SAVE_REQUESTS_METRIC = 'swh_web_submitted_save_requests' | |||||
_submitted_save_requests_gauge = Gauge( | |||||
name=SUBMITTED_SAVE_REQUESTS_METRIC, | |||||
documentation='Number of submitted origin save requests', | |||||
labelnames=['status', 'visit_type'], | |||||
registry=SWH_WEB_METRICS_REGISTRY) | |||||
ACCEPTED_SAVE_REQUESTS_METRIC = 'swh_web_accepted_save_requests' | |||||
_accepted_save_requests_gauge = Gauge( | |||||
name=ACCEPTED_SAVE_REQUESTS_METRIC, | |||||
documentation='Number of accepted origin save requests', | |||||
vlorentz: In your other diff, you used counters, with a status tag. Why did you change that? | |||||
Done Inline ActionsTo avoid having too much labels combination and useless metrics. For instance a rejected request will never have a loading task created so the load_task_status label value will always be "not created" here. So I think it is better to have a separate metric for each save request status. anlambert: To avoid having too much labels combination and useless metrics. For instance a rejected… | |||||
Not Done Inline ActionsWill Prometheus create a time-series for these extra combinations if we never report a value for them, though? The problem with using different metric names is that, afaik, they can't be aggregated. And we probably want to aggregate accepted and rejected requests, to get the total number of handled requests. (and maybe pending requests too, to have the total number of requests received) vlorentz: Will Prometheus create a time-series for these extra combinations if we never report a value… | |||||
Done Inline ActionsHow about dropping the REJECTED_SAVE_REQUESTS_METRIC and PENDING_SAVE_REQUESTS_METRIC and merge them in the following gauge ? Gauge( name=SAVE_REQUESTS_METRIC, documentation='Number of origin save requests', labelnames=['status', 'visit_type'], registry=SWH_WEB_METRICS_REGISTRY) I think keeping the metric about accepted save requests is interesting as for instance it could be used to calculate the ratio of successful requests. anlambert: How about dropping the REJECTED_SAVE_REQUESTS_METRIC and PENDING_SAVE_REQUESTS_METRIC and merge… | |||||
Not Done Inline ActionsCan you calculate such a ratio in prometheus between different metrics? vlorentz: Can you calculate such a ratio in prometheus between different metrics? | |||||
Done Inline ActionsSure, you can write this query in PromQL: sum(swh_web_accepted_save_requests{load_task_status="succeed"}) / sum(swh_web_submitted_save_requests) anlambert: Sure, you can write this query in PromQL:
```
sum… | |||||
Not Done Inline Actionsok vlorentz: ok | |||||
labelnames=['load_task_status', 'visit_type'], | |||||
registry=SWH_WEB_METRICS_REGISTRY) | |||||
def compute_save_requests_metrics(): | |||||
"""Compute a couple of Prometheus metrics related to | |||||
origin save requests""" | |||||
request_statuses = (SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, | |||||
SAVE_REQUEST_PENDING) | |||||
load_task_statuses = (SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, | |||||
SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEED, | |||||
SAVE_TASK_FAILED, SAVE_TASK_RUNNING) | |||||
visit_types = get_savable_visit_types() | |||||
labels_set = product(request_statuses, visit_types) | |||||
for labels in labels_set: | |||||
_submitted_save_requests_gauge.labels(*labels).set(0) | |||||
labels_set = product(load_task_statuses, visit_types) | |||||
for labels in labels_set: | |||||
_accepted_save_requests_gauge.labels(*labels).set(0) | |||||
for sor in SaveOriginRequest.objects.all(): | |||||
if sor.status == SAVE_REQUEST_ACCEPTED: | |||||
_accepted_save_requests_gauge.labels( | |||||
load_task_status=sor.loading_task_status, | |||||
visit_type=sor.visit_type).inc() | |||||
_submitted_save_requests_gauge.labels( | |||||
status=sor.status, visit_type=sor.visit_type).inc() |
In your other diff, you used counters, with a status tag. Why did you change that?