Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/origin_save.py
Show First 20 Lines • Show All 84 Lines • ▼ Show 20 Lines | def can_save_origin(origin_url): | ||||
for url_prefix in get_origin_save_authorized_urls(): | for url_prefix in get_origin_save_authorized_urls(): | ||||
if origin_url.startswith(url_prefix): | if origin_url.startswith(url_prefix): | ||||
return SAVE_REQUEST_ACCEPTED | return SAVE_REQUEST_ACCEPTED | ||||
# otherwise, the origin url needs to be manually verified | # otherwise, the origin url needs to be manually verified | ||||
return SAVE_REQUEST_PENDING | return SAVE_REQUEST_PENDING | ||||
# map origin type to scheduler task | # map visit type to scheduler task | ||||
# TODO: do not hardcode the task name here (T1157) | # TODO: do not hardcode the task name here (T1157) | ||||
_origin_type_task = { | _visit_type_task = { | ||||
'git': 'load-git', | 'git': 'load-git', | ||||
'hg': 'load-hg', | 'hg': 'load-hg', | ||||
'svn': 'load-svn' | 'svn': 'load-svn' | ||||
} | } | ||||
# map scheduler task status to origin save status | # map scheduler task status to origin save status | ||||
_save_task_status = { | _save_task_status = { | ||||
'next_run_not_scheduled': SAVE_TASK_NOT_YET_SCHEDULED, | 'next_run_not_scheduled': SAVE_TASK_NOT_YET_SCHEDULED, | ||||
'next_run_scheduled': SAVE_TASK_SCHEDULED, | 'next_run_scheduled': SAVE_TASK_SCHEDULED, | ||||
'completed': SAVE_TASK_SUCCEED, | 'completed': SAVE_TASK_SUCCEED, | ||||
'disabled': SAVE_TASK_FAILED | 'disabled': SAVE_TASK_FAILED | ||||
} | } | ||||
def get_savable_origin_types(): | def get_savable_visit_types(): | ||||
return sorted(list(_origin_type_task.keys())) | return sorted(list(_visit_type_task.keys())) | ||||
def _check_origin_type_savable(origin_type): | def _check_visit_type_savable(visit_type): | ||||
""" | """ | ||||
Get the list of software origin types that can be loaded | Get the list of visit types that can be performed | ||||
through a save request. | through a save request. | ||||
Returns: | Returns: | ||||
list: the list of saveable origin types | list: the list of saveable visit types | ||||
""" | """ | ||||
allowed_origin_types = ', '.join(get_savable_origin_types()) | allowed_visit_types = ', '.join(get_savable_visit_types()) | ||||
if origin_type not in _origin_type_task: | if visit_type not in _visit_type_task: | ||||
raise BadInputExc('Origin of type %s can not be saved! ' | raise BadInputExc('Visit of type %s can not be saved! ' | ||||
'Allowed types are the following: %s' % | 'Allowed types are the following: %s' % | ||||
(origin_type, allowed_origin_types)) | (visit_type, allowed_visit_types)) | ||||
_validate_url = URLValidator(schemes=['http', 'https', 'svn', 'git']) | _validate_url = URLValidator(schemes=['http', 'https', 'svn', 'git']) | ||||
def _check_origin_url_valid(origin_url): | def _check_origin_url_valid(origin_url): | ||||
try: | try: | ||||
_validate_url(origin_url) | _validate_url(origin_url) | ||||
except ValidationError: | except ValidationError: | ||||
raise BadInputExc('The provided origin url (%s) is not valid!' % | raise BadInputExc('The provided origin url (%s) is not valid!' % | ||||
escape(origin_url)) | escape(origin_url)) | ||||
def _get_visit_info_for_save_request(save_request): | def _get_visit_info_for_save_request(save_request): | ||||
visit_date = None | visit_date = None | ||||
visit_status = None | visit_status = None | ||||
try: | try: | ||||
origin = {'type': save_request.origin_type, | origin = {'url': save_request.origin_url} | ||||
'url': save_request.origin_url} | |||||
origin_info = service.lookup_origin(origin) | origin_info = service.lookup_origin(origin) | ||||
origin_visits = get_origin_visits(origin_info) | origin_visits = get_origin_visits(origin_info) | ||||
visit_dates = [parse_timestamp(v['date']) | visit_dates = [parse_timestamp(v['date']) | ||||
for v in origin_visits] | for v in origin_visits] | ||||
i = bisect_right(visit_dates, save_request.request_date) | i = bisect_right(visit_dates, save_request.request_date) | ||||
if i != len(visit_dates): | if i != len(visit_dates): | ||||
visit_date = visit_dates[i] | visit_date = visit_dates[i] | ||||
visit_status = origin_visits[i]['status'] | visit_status = origin_visits[i]['status'] | ||||
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines | def _save_request_dict(save_request, task=None): | ||||
if save_request.loading_task_status != save_task_status: | if save_request.loading_task_status != save_task_status: | ||||
save_request.loading_task_status = save_task_status | save_request.loading_task_status = save_task_status | ||||
must_save = True | must_save = True | ||||
if must_save: | if must_save: | ||||
save_request.save() | save_request.save() | ||||
return {'id': save_request.id, | return {'id': save_request.id, | ||||
'origin_type': save_request.origin_type, | 'visit_type': save_request.visit_type, | ||||
'origin_url': save_request.origin_url, | 'origin_url': save_request.origin_url, | ||||
'save_request_date': save_request.request_date.isoformat(), | 'save_request_date': save_request.request_date.isoformat(), | ||||
'save_request_status': save_request.status, | 'save_request_status': save_request.status, | ||||
'save_task_status': save_task_status, | 'save_task_status': save_task_status, | ||||
'visit_date': visit_date.isoformat() if visit_date else None} | 'visit_date': visit_date.isoformat() if visit_date else None} | ||||
def create_save_origin_request(origin_type, origin_url): | def create_save_origin_request(visit_type, origin_url): | ||||
""" | """ | ||||
Create a loading task to save a software origin into the archive. | Create a loading task to save a software origin into the archive. | ||||
This function aims to create a software origin loading task | This function aims to create a software origin loading task | ||||
trough the use of the swh-scheduler component. | trough the use of the swh-scheduler component. | ||||
First, some checks are performed to see if the origin type and | First, some checks are performed to see if the visit type and origin | ||||
url are valid but also if the the save request can be accepted. | url are valid but also if the the save request can be accepted. | ||||
If those checks passed, the loading task is then created. | If those checks passed, the loading task is then created. | ||||
Otherwise, the save request is put in pending or rejected state. | Otherwise, the save request is put in pending or rejected state. | ||||
All the submitted save requests are logged into the swh-web | All the submitted save requests are logged into the swh-web | ||||
database to keep track of them. | database to keep track of them. | ||||
Args: | Args: | ||||
origin_type (str): the type of origin to save (currently only | visit_type (str): the type of visit to perform (currently only | ||||
``git`` but ``svn`` and ``hg`` will soon be available) | ``git`` but ``svn`` and ``hg`` will soon be available) | ||||
origin_url (str): the url of the origin to save | origin_url (str): the url of the origin to save | ||||
Raises: | Raises: | ||||
BadInputExc: the origin type or url is invalid | BadInputExc: the visit type or origin url is invalid | ||||
ForbiddenExc: the provided origin url is blacklisted | ForbiddenExc: the provided origin url is blacklisted | ||||
Returns: | Returns: | ||||
dict: A dict describing the save request with the following keys: | dict: A dict describing the save request with the following keys: | ||||
* **origin_type**: the type of the origin to save | * **visit_type**: the type of visit to perform | ||||
* **origin_url**: the url of the origin | * **origin_url**: the url of the origin | ||||
* **save_request_date**: the date the request was submitted | * **save_request_date**: the date the request was submitted | ||||
* **save_request_status**: the request status, either **accepted**, | * **save_request_status**: the request status, either **accepted**, | ||||
**rejected** or **pending** | **rejected** or **pending** | ||||
* **save_task_status**: the origin loading task status, either | * **save_task_status**: the origin loading task status, either | ||||
**not created**, **not yet scheduled**, **scheduled**, | **not created**, **not yet scheduled**, **scheduled**, | ||||
**succeed** or **failed** | **succeed** or **failed** | ||||
""" | """ | ||||
_check_origin_type_savable(origin_type) | _check_visit_type_savable(visit_type) | ||||
_check_origin_url_valid(origin_url) | _check_origin_url_valid(origin_url) | ||||
save_request_status = can_save_origin(origin_url) | save_request_status = can_save_origin(origin_url) | ||||
task = None | task = None | ||||
# if the origin save request is accepted, create a scheduler | # if the origin save request is accepted, create a scheduler | ||||
# task to load it into the archive | # task to load it into the archive | ||||
if save_request_status == SAVE_REQUEST_ACCEPTED: | if save_request_status == SAVE_REQUEST_ACCEPTED: | ||||
# create a task with high priority | # create a task with high priority | ||||
kwargs = {'priority': 'high'} | kwargs = {'priority': 'high'} | ||||
# set task parameters according to the origin type | # set task parameters according to the visit type | ||||
if origin_type == 'git': | if visit_type == 'git': | ||||
kwargs['repo_url'] = origin_url | kwargs['repo_url'] = origin_url | ||||
elif origin_type == 'hg': | elif visit_type == 'hg': | ||||
kwargs['origin_url'] = origin_url | kwargs['origin_url'] = origin_url | ||||
elif origin_type == 'svn': | elif visit_type == 'svn': | ||||
kwargs['origin_url'] = origin_url | kwargs['origin_url'] = origin_url | ||||
kwargs['svn_url'] = origin_url | kwargs['svn_url'] = origin_url | ||||
sor = None | sor = None | ||||
# get list of previously sumitted save requests | # get list of previously sumitted save requests | ||||
current_sors = \ | current_sors = \ | ||||
list(SaveOriginRequest.objects.filter(origin_type=origin_type, | list(SaveOriginRequest.objects.filter(visit_type=visit_type, | ||||
origin_url=origin_url)) | origin_url=origin_url)) | ||||
can_create_task = False | can_create_task = False | ||||
# if no save requests previously submitted, create the scheduler task | # if no save requests previously submitted, create the scheduler task | ||||
if not current_sors: | if not current_sors: | ||||
can_create_task = True | can_create_task = True | ||||
else: | else: | ||||
# get the latest submitted save request | # get the latest submitted save request | ||||
Show All 15 Lines | if save_request_status == SAVE_REQUEST_ACCEPTED: | ||||
can_create_task = True | can_create_task = True | ||||
sor = None | sor = None | ||||
else: | else: | ||||
can_create_task = False | can_create_task = False | ||||
if can_create_task: | if can_create_task: | ||||
# effectively create the scheduler task | # effectively create the scheduler task | ||||
task_dict = create_oneshot_task_dict( | task_dict = create_oneshot_task_dict( | ||||
_origin_type_task[origin_type], **kwargs) | _visit_type_task[visit_type], **kwargs) | ||||
task = scheduler.create_tasks([task_dict])[0] | task = scheduler.create_tasks([task_dict])[0] | ||||
# pending save request has been accepted | # pending save request has been accepted | ||||
if sor: | if sor: | ||||
sor.status = SAVE_REQUEST_ACCEPTED | sor.status = SAVE_REQUEST_ACCEPTED | ||||
sor.loading_task_id = task['id'] | sor.loading_task_id = task['id'] | ||||
sor.save() | sor.save() | ||||
else: | else: | ||||
sor = SaveOriginRequest.objects.create(origin_type=origin_type, | sor = SaveOriginRequest.objects.create(visit_type=visit_type, | ||||
origin_url=origin_url, | origin_url=origin_url, | ||||
status=save_request_status, # noqa | status=save_request_status, # noqa | ||||
loading_task_id=task['id']) # noqa | loading_task_id=task['id']) # noqa | ||||
# save request must be manually reviewed for acceptation | # save request must be manually reviewed for acceptation | ||||
elif save_request_status == SAVE_REQUEST_PENDING: | elif save_request_status == SAVE_REQUEST_PENDING: | ||||
# check if there is already such a save request already submitted, | # check if there is already such a save request already submitted, | ||||
# no need to add it to the database in that case | # no need to add it to the database in that case | ||||
try: | try: | ||||
sor = SaveOriginRequest.objects.get(origin_type=origin_type, | sor = SaveOriginRequest.objects.get(visit_type=visit_type, | ||||
origin_url=origin_url, | origin_url=origin_url, | ||||
status=save_request_status) | status=save_request_status) | ||||
# if not add it to the database | # if not add it to the database | ||||
except ObjectDoesNotExist: | except ObjectDoesNotExist: | ||||
sor = SaveOriginRequest.objects.create(origin_type=origin_type, | sor = SaveOriginRequest.objects.create(visit_type=visit_type, | ||||
origin_url=origin_url, | origin_url=origin_url, | ||||
status=save_request_status) | status=save_request_status) | ||||
# origin can not be saved as its url is blacklisted, | # origin can not be saved as its url is blacklisted, | ||||
# log the request to the database anyway | # log the request to the database anyway | ||||
else: | else: | ||||
sor = SaveOriginRequest.objects.create(origin_type=origin_type, | sor = SaveOriginRequest.objects.create(visit_type=visit_type, | ||||
origin_url=origin_url, | origin_url=origin_url, | ||||
status=save_request_status) | status=save_request_status) | ||||
if save_request_status == SAVE_REQUEST_REJECTED: | if save_request_status == SAVE_REQUEST_REJECTED: | ||||
raise ForbiddenExc('The origin url is blacklisted and will not be ' | raise ForbiddenExc('The origin url is blacklisted and will not be ' | ||||
'loaded into the archive.') | 'loaded into the archive.') | ||||
return _save_request_dict(sor, task) | return _save_request_dict(sor, task) | ||||
Show All 19 Lines | if task_ids: | ||||
tasks = scheduler.get_tasks(task_ids) | tasks = scheduler.get_tasks(task_ids) | ||||
tasks = {task['id']: task for task in tasks} | tasks = {task['id']: task for task in tasks} | ||||
for sor in requests_queryset: | for sor in requests_queryset: | ||||
sr_dict = _save_request_dict(sor, tasks.get(sor.loading_task_id)) | sr_dict = _save_request_dict(sor, tasks.get(sor.loading_task_id)) | ||||
save_requests.append(sr_dict) | save_requests.append(sr_dict) | ||||
return save_requests | return save_requests | ||||
def get_save_origin_requests(origin_type, origin_url): | def get_save_origin_requests(visit_type, origin_url): | ||||
""" | """ | ||||
Get all save requests for a given software origin. | Get all save requests for a given software origin. | ||||
Args: | Args: | ||||
origin_type (str): the type of the origin | visit_type (str): the type of visit | ||||
origin_url (str): the url of the origin | origin_url (str): the url of the origin | ||||
Raises: | Raises: | ||||
BadInputExc: the origin type or url is invalid | BadInputExc: the visit type or origin url is invalid | ||||
NotFoundExc: no save requests can be found for the given origin | NotFoundExc: no save requests can be found for the given origin | ||||
Returns: | Returns: | ||||
list: A list of save origin requests dict as described in | list: A list of save origin requests dict as described in | ||||
:func:`swh.web.common.origin_save.create_save_origin_request` | :func:`swh.web.common.origin_save.create_save_origin_request` | ||||
""" | """ | ||||
_check_origin_type_savable(origin_type) | _check_visit_type_savable(visit_type) | ||||
_check_origin_url_valid(origin_url) | _check_origin_url_valid(origin_url) | ||||
sors = SaveOriginRequest.objects.filter(origin_type=origin_type, | sors = SaveOriginRequest.objects.filter(visit_type=visit_type, | ||||
origin_url=origin_url) | origin_url=origin_url) | ||||
if sors.count() == 0: | if sors.count() == 0: | ||||
raise NotFoundExc(('No save requests found for origin with type ' | raise NotFoundExc(('No save requests found for visit of type ' | ||||
'%s and url %s.') % (origin_type, origin_url)) | '%s on origin with url %s.') | ||||
% (visit_type, origin_url)) | |||||
return get_save_origin_requests_from_queryset(sors) | return get_save_origin_requests_from_queryset(sors) | ||||
def get_save_origin_task_info(save_request_id): | def get_save_origin_task_info(save_request_id): | ||||
""" | """ | ||||
Get detailed information about an accepted save origin request | Get detailed information about an accepted save origin request | ||||
and its associated loading task. | and its associated loading task. | ||||
▲ Show 20 Lines • Show All 117 Lines • Show Last 20 Lines |