Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/origin_save.py
Show First 20 Lines • Show All 153 Lines • ▼ Show 20 Lines | except ValidationError: | ||||
raise BadInputExc( | raise BadInputExc( | ||||
"The provided origin url (%s) is not valid!" % escape(origin_url) | "The provided origin url (%s) is not valid!" % escape(origin_url) | ||||
) | ) | ||||
def _get_visit_info_for_save_request( | def _get_visit_info_for_save_request( | ||||
save_request: SaveOriginRequest, | save_request: SaveOriginRequest, | ||||
) -> Tuple[Optional[datetime], Optional[str]]: | ) -> Tuple[Optional[datetime], Optional[str]]: | ||||
"""Retrieve visit information out of a save request | |||||
Args: | |||||
save_request: Input save origin request to retrieve information for. | |||||
Returns: | |||||
Tuple of (visit date, optional visit status) for such save request origin | |||||
""" | |||||
visit_date = None | visit_date = None | ||||
visit_status = None | visit_status = None | ||||
time_now = datetime.now(tz=timezone.utc) | time_now = datetime.now(tz=timezone.utc) | ||||
time_delta = time_now - save_request.request_date | time_delta = time_now - save_request.request_date | ||||
# stop trying to find a visit date one month after save request submission | # stop trying to find a visit date one month after save request submission | ||||
# as those requests to storage are expensive and associated loading task | # as those requests to storage are expensive and associated loading task | ||||
# surely ended up with errors | # surely ended up with errors | ||||
if time_delta.days <= 30: | if time_delta.days <= 30: | ||||
Show All 10 Lines | if time_delta.days <= 30: | ||||
except Exception as exc: | except Exception as exc: | ||||
sentry_sdk.capture_exception(exc) | sentry_sdk.capture_exception(exc) | ||||
return visit_date, visit_status | return visit_date, visit_status | ||||
def _check_visit_update_status( | def _check_visit_update_status( | ||||
save_request: SaveOriginRequest, save_task_status: str | save_request: SaveOriginRequest, save_task_status: str | ||||
) -> Tuple[Optional[datetime], str]: | ) -> Tuple[Optional[datetime], str]: | ||||
"""Given a save request and a save task status, determine whether a save request was | |||||
successful or failed. | |||||
Args: | |||||
save_request: Input save origin request to retrieve information for. | |||||
Returns: | |||||
Tuple of (optional visit date, save task status) for such save request origin | |||||
""" | |||||
visit_date, visit_status = _get_visit_info_for_save_request(save_request) | visit_date, visit_status = _get_visit_info_for_save_request(save_request) | ||||
save_request.visit_date = visit_date | save_request.visit_date = visit_date | ||||
# visit has been performed, mark the saving task as succeed | |||||
if visit_date and visit_status is not None: | if visit_date and visit_status is not None: | ||||
# visit has been performed, mark the saving task as succeeded | |||||
save_task_status = SAVE_TASK_SUCCEEDED | save_task_status = SAVE_TASK_SUCCEEDED | ||||
elif visit_status in ("created", "ongoing"): | elif visit_status in ("created", "ongoing"): | ||||
# visit is currently running | |||||
save_task_status = SAVE_TASK_RUNNING | save_task_status = SAVE_TASK_RUNNING | ||||
elif visit_status in ("not_found", "failed"): | elif visit_status in ("not_found", "failed"): | ||||
save_task_status = SAVE_TASK_FAILED | save_task_status = SAVE_TASK_FAILED | ||||
else: | else: | ||||
time_now = datetime.now(tz=timezone.utc) | time_now = datetime.now(tz=timezone.utc) | ||||
time_delta = time_now - save_request.request_date | time_delta = time_now - save_request.request_date | ||||
# consider the task as failed if it is still in scheduled state | # consider the task as failed if it is still in scheduled state | ||||
# 30 days after its submission | # 30 days after its submission | ||||
if time_delta.days > 30: | if time_delta.days > 30: | ||||
save_task_status = SAVE_TASK_FAILED | save_task_status = SAVE_TASK_FAILED | ||||
return visit_date, save_task_status | return visit_date, save_task_status | ||||
def _save_request_dict( | def _save_request_dict( | ||||
save_request: SaveOriginRequest, | save_request: SaveOriginRequest, | ||||
task: Optional[Dict[str, Any]] = None, | task: Optional[Dict[str, Any]] = None, | ||||
task_run: Optional[Dict[str, Any]] = None, | task_run: Optional[Dict[str, Any]] = None, | ||||
) -> Dict[str, Any]: | ) -> Dict[str, Any]: | ||||
"""Update save request information out of task and task_run information. | |||||
Args: | |||||
save_request: Save request | |||||
task: Associated scheduler task information about the save request | |||||
task_run: Most recent run occurrence of the associated task | |||||
Returns: | |||||
Summary of the save request information updated. | |||||
""" | |||||
must_save = False | must_save = False | ||||
visit_date = save_request.visit_date | visit_date = save_request.visit_date | ||||
# save task still in scheduler db | # save task still in scheduler db | ||||
if task: | if task: | ||||
save_task_status = _save_task_status[task["status"]] | save_task_status = _save_task_status[task["status"]] | ||||
if task_run: | if task_run: | ||||
save_task_status = _save_task_run_status[task_run["status"]] | save_task_status = _save_task_run_status[task_run["status"]] | ||||
# Consider request from which a visit date has already been found | # Consider request from which a visit date has already been found | ||||
# as succeeded to avoid retrieving it again | # as succeeded to avoid retrieving it again | ||||
if save_task_status == SAVE_TASK_SCHEDULED and visit_date: | if save_task_status == SAVE_TASK_SCHEDULED and visit_date: | ||||
save_task_status = SAVE_TASK_SUCCEEDED | save_task_status = SAVE_TASK_SUCCEEDED | ||||
if ( | if ( | ||||
save_task_status in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEEDED) | save_task_status in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEEDED) | ||||
and not visit_date | and not visit_date | ||||
▲ Show 20 Lines • Show All 474 Lines • Show Last 20 Lines |