Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/origin_save.py
Show All 10 Lines | |||||
import requests | import requests | ||||
from django.core.exceptions import ObjectDoesNotExist | from django.core.exceptions import ObjectDoesNotExist | ||||
from django.core.exceptions import ValidationError | from django.core.exceptions import ValidationError | ||||
from django.core.validators import URLValidator | from django.core.validators import URLValidator | ||||
from django.utils.html import escape | from django.utils.html import escape | ||||
from swh.core.statsd import statsd | |||||
from swh.web import config | from swh.web import config | ||||
from swh.web.common import service | from swh.web.common import service | ||||
from swh.web.common.exc import BadInputExc, ForbiddenExc, NotFoundExc | from swh.web.common.exc import BadInputExc, ForbiddenExc, NotFoundExc | ||||
from swh.web.common.models import ( | from swh.web.common.models import ( | ||||
SaveUnauthorizedOrigin, SaveAuthorizedOrigin, SaveOriginRequest, | SaveUnauthorizedOrigin, SaveAuthorizedOrigin, SaveOriginRequest, | ||||
SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING, | SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING, | ||||
SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, | SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, | ||||
SAVE_TASK_SUCCEED, SAVE_TASK_FAILED, SAVE_TASK_RUNNING | SAVE_TASK_SUCCEED, SAVE_TASK_FAILED, SAVE_TASK_RUNNING | ||||
) | ) | ||||
from swh.web.common.origin_visits import get_origin_visits | from swh.web.common.origin_visits import get_origin_visits | ||||
from swh.web.common.utils import parse_timestamp | from swh.web.common.utils import parse_timestamp | ||||
from swh.scheduler.utils import create_oneshot_task_dict | from swh.scheduler.utils import create_oneshot_task_dict | ||||
scheduler = config.scheduler() | scheduler = config.scheduler() | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
SAVE_REQUESTS_COUNT_METRIC = 'swh_web_save_requests' | |||||
PENDING_SAVE_REQUESTS_COUNT_METRIC = 'swh_web_pending_save_requests' | |||||
def get_origin_save_authorized_urls(): | def get_origin_save_authorized_urls(): | ||||
""" | """ | ||||
Get the list of origin url prefixes authorized to be | Get the list of origin url prefixes authorized to be | ||||
immediately loaded into the archive (whitelist). | immediately loaded into the archive (whitelist). | ||||
Returns: | Returns: | ||||
list: The list of authorized origin url prefix | list: The list of authorized origin url prefix | ||||
▲ Show 20 Lines • Show All 232 Lines • ▼ Show 20 Lines | if save_request_status == SAVE_REQUEST_ACCEPTED: | ||||
kwargs['repo_url'] = origin_url | kwargs['repo_url'] = origin_url | ||||
elif visit_type == 'hg': | elif visit_type == 'hg': | ||||
kwargs['origin_url'] = origin_url | kwargs['origin_url'] = origin_url | ||||
elif visit_type == 'svn': | elif visit_type == 'svn': | ||||
kwargs['origin_url'] = origin_url | kwargs['origin_url'] = origin_url | ||||
kwargs['svn_url'] = origin_url | kwargs['svn_url'] = origin_url | ||||
sor = None | sor = None | ||||
# get list of previously sumitted save requests | # get list of previously submitted save requests | ||||
current_sors = \ | current_sors = \ | ||||
list(SaveOriginRequest.objects.filter(visit_type=visit_type, | list(SaveOriginRequest.objects.filter(visit_type=visit_type, | ||||
origin_url=origin_url)) | origin_url=origin_url)) | ||||
can_create_task = False | can_create_task = False | ||||
# if no save requests previously submitted, create the scheduler task | # if no save requests previously submitted, create the scheduler task | ||||
if not current_sors: | if not current_sors: | ||||
can_create_task = True | can_create_task = True | ||||
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines | elif save_request_status == SAVE_REQUEST_PENDING: | ||||
status=save_request_status) | status=save_request_status) | ||||
# origin can not be saved as its url is blacklisted, | # origin can not be saved as its url is blacklisted, | ||||
# log the request to the database anyway | # log the request to the database anyway | ||||
else: | else: | ||||
sor = SaveOriginRequest.objects.create(visit_type=visit_type, | sor = SaveOriginRequest.objects.create(visit_type=visit_type, | ||||
origin_url=origin_url, | origin_url=origin_url, | ||||
status=save_request_status) | status=save_request_status) | ||||
statsd.increment(SAVE_REQUESTS_COUNT_METRIC, | |||||
tags={'status': save_request_status, | |||||
'visit_type': visit_type}) | |||||
vlorentz: it should be an an `else` statement, we don't want a counter for pending tasks. | |||||
if save_request_status == SAVE_REQUEST_PENDING: | |||||
update_pending_save_requests_stats() | |||||
if save_request_status == SAVE_REQUEST_REJECTED: | if save_request_status == SAVE_REQUEST_REJECTED: | ||||
raise ForbiddenExc(('The "save code now" request has been rejected ' | raise ForbiddenExc(('The "save code now" request has been rejected ' | ||||
'because the provided origin url is blacklisted.')) | 'because the provided origin url is blacklisted.')) | ||||
return _save_request_dict(sor, task) | return _save_request_dict(sor, task) | ||||
def get_save_origin_requests_from_queryset(requests_queryset): | def get_save_origin_requests_from_queryset(requests_queryset): | ||||
▲ Show 20 Lines • Show All 168 Lines • ▼ Show 20 Lines | try: | ||||
task_run['worker'] = task_run_info['hostname'] | task_run['worker'] = task_run_info['hostname'] | ||||
elif 'host' in task_run_info: | elif 'host' in task_run_info: | ||||
task_run['worker'] = task_run_info['host'] | task_run['worker'] = task_run_info['host'] | ||||
except Exception as e: | except Exception as e: | ||||
logger.warning('Request to Elasticsearch failed\n%s' % str(e)) | logger.warning('Request to Elasticsearch failed\n%s' % str(e)) | ||||
pass | pass | ||||
return task_run | return task_run | ||||
def update_pending_save_requests_stats(): | |||||
pending_save_requests = SaveOriginRequest.objects.filter( | |||||
status=SAVE_REQUEST_PENDING) | |||||
statsd.gauge(PENDING_SAVE_REQUESTS_COUNT_METRIC, | |||||
pending_save_requests.count()) |
it should be an an else statement, we don't want a counter for pending tasks.