Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/origin_save.py
Show First 20 Lines • Show All 166 Lines • ▼ Show 20 Lines | if time_delta.days <= 30: | ||||
origin = {"url": save_request.origin_url} | origin = {"url": save_request.origin_url} | ||||
origin_info = archive.lookup_origin(origin) | origin_info = archive.lookup_origin(origin) | ||||
origin_visits = get_origin_visits(origin_info) | origin_visits = get_origin_visits(origin_info) | ||||
visit_dates = [parse_iso8601_date_to_utc(v["date"]) for v in origin_visits] | visit_dates = [parse_iso8601_date_to_utc(v["date"]) for v in origin_visits] | ||||
i = bisect_right(visit_dates, save_request.request_date) | i = bisect_right(visit_dates, save_request.request_date) | ||||
if i != len(visit_dates): | if i != len(visit_dates): | ||||
visit_date = visit_dates[i] | visit_date = visit_dates[i] | ||||
visit_status = origin_visits[i]["status"] | visit_status = origin_visits[i]["status"] | ||||
if origin_visits[i]["status"] not in ("full", "partial"): | if origin_visits[i]["status"] not in ("full", "partial", "not_found"): | ||||
visit_date = None | visit_date = None | ||||
except Exception as exc: | except Exception as exc: | ||||
sentry_sdk.capture_exception(exc) | sentry_sdk.capture_exception(exc) | ||||
return visit_date, visit_status | return visit_date, visit_status | ||||
def _check_visit_update_status(save_request, save_task_status): | def _check_visit_update_status(save_request, save_task_status): | ||||
visit_date, visit_status = _get_visit_info_for_save_request(save_request) | visit_date, visit_status = _get_visit_info_for_save_request(save_request) | ||||
save_request.visit_date = visit_date | save_request.visit_date = visit_date | ||||
# visit has been performed, mark the saving task as succeed | # visit has been performed, mark the saving task as succeed | ||||
if visit_date and visit_status is not None: | if visit_date and visit_status is not None: | ||||
save_task_status = SAVE_TASK_SUCCEEDED | save_task_status = SAVE_TASK_SUCCEEDED | ||||
elif visit_status in ("created", "ongoing"): | elif visit_status in ("created", "ongoing"): | ||||
save_task_status = SAVE_TASK_RUNNING | save_task_status = SAVE_TASK_RUNNING | ||||
elif visit_status in ("not_found", "failed"): | |||||
save_task_status = SAVE_TASK_FAILED | |||||
else: | else: | ||||
time_now = datetime.now(tz=timezone.utc) | time_now = datetime.now(tz=timezone.utc) | ||||
time_delta = time_now - save_request.request_date | time_delta = time_now - save_request.request_date | ||||
# consider the task as failed if it is still in scheduled state | # consider the task as failed if it is still in scheduled state | ||||
# 30 days after its submission | # 30 days after its submission | ||||
if time_delta.days > 30: | if time_delta.days > 30: | ||||
save_task_status = SAVE_TASK_FAILED | save_task_status = SAVE_TASK_FAILED | ||||
return visit_date, save_task_status | return visit_date, save_task_status | ||||
Show All 10 Lines | if task: | ||||
# Consider request from which a visit date has already been found | # Consider request from which a visit date has already been found | ||||
# as succeeded to avoid retrieving it again | # as succeeded to avoid retrieving it again | ||||
if save_task_status == SAVE_TASK_SCHEDULED and visit_date: | if save_task_status == SAVE_TASK_SCHEDULED and visit_date: | ||||
save_task_status = SAVE_TASK_SUCCEEDED | save_task_status = SAVE_TASK_SUCCEEDED | ||||
if ( | if ( | ||||
save_task_status in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEEDED) | save_task_status in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEEDED) | ||||
and not visit_date | and not visit_date | ||||
): | ): | ||||
visit_date, _ = _get_visit_info_for_save_request(save_request) | visit_date, visit_status = _get_visit_info_for_save_request(save_request) | ||||
save_request.visit_date = visit_date | save_request.visit_date = visit_date | ||||
if visit_status in ("failed", "not_found"): | |||||
save_task_status = SAVE_TASK_FAILED | |||||
must_save = True | must_save = True | ||||
# Check tasks still marked as scheduled / not yet scheduled | # Check tasks still marked as scheduled / not yet scheduled | ||||
if save_task_status in (SAVE_TASK_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED): | if save_task_status in (SAVE_TASK_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED): | ||||
visit_date, save_task_status = _check_visit_update_status( | visit_date, save_task_status = _check_visit_update_status( | ||||
save_request, save_task_status | save_request, save_task_status | ||||
) | ) | ||||
# save task may have been archived | # save task may have been archived | ||||
▲ Show 20 Lines • Show All 418 Lines • Show Last 20 Lines |