Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/origin_save.py
Show First 20 Lines • Show All 178 Lines • ▼ Show 20 Lines | if time_delta.days <= 30: | ||||
try: | try: | ||||
origin_info = archive.lookup_origin(OriginInfo(url=save_request.origin_url)) | origin_info = archive.lookup_origin(OriginInfo(url=save_request.origin_url)) | ||||
origin_visits = get_origin_visits(origin_info) | origin_visits = get_origin_visits(origin_info) | ||||
visit_dates = [parse_iso8601_date_to_utc(v["date"]) for v in origin_visits] | visit_dates = [parse_iso8601_date_to_utc(v["date"]) for v in origin_visits] | ||||
i = bisect_right(visit_dates, save_request.request_date) | i = bisect_right(visit_dates, save_request.request_date) | ||||
if i != len(visit_dates): | if i != len(visit_dates): | ||||
visit_date = visit_dates[i] | visit_date = visit_dates[i] | ||||
visit_status = origin_visits[i]["status"] | visit_status = origin_visits[i]["status"] | ||||
if origin_visits[i]["status"] not in ("full", "partial", "not_found"): | if visit_status not in ("full", "partial", "not_found"): | ||||
visit_date = None | visit_date = None | ||||
except Exception as exc: | except Exception as exc: | ||||
sentry_sdk.capture_exception(exc) | sentry_sdk.capture_exception(exc) | ||||
return visit_date, visit_status | return visit_date, visit_status | ||||
def _check_visit_update_status( | def _check_visit_update_status( | ||||
save_request: SaveOriginRequest, save_task_status: str | save_request: SaveOriginRequest, save_task_status: str | ||||
) -> Tuple[Optional[datetime], str]: | ) -> Tuple[Optional[datetime], str]: | ||||
"""Given a save request and a save task status, determine whether a save request was | """Given a save request and a save task status, determine whether a save request was | ||||
successful or failed. | successful or failed. | ||||
Args: | Args: | ||||
save_request: Input save origin request to retrieve information for. | save_request: Input save origin request to retrieve information for. | ||||
Returns: | Returns: | ||||
Tuple of (optional visit date, save task status) for such save request origin | Tuple of (optional visit date, save task status) for such save request origin | ||||
""" | """ | ||||
visit_date, visit_status = _get_visit_info_for_save_request(save_request) | visit_date, visit_status = _get_visit_info_for_save_request(save_request) | ||||
save_request.visit_date = visit_date | save_request.visit_date = visit_date | ||||
if visit_date and visit_status is not None: | save_request.visit_status = visit_status | ||||
if visit_date and visit_status in ("full", "partial"): | |||||
# visit has been performed, mark the saving task as succeeded | # visit has been performed, mark the saving task as succeeded | ||||
save_task_status = SAVE_TASK_SUCCEEDED | save_task_status = SAVE_TASK_SUCCEEDED | ||||
elif visit_status in ("created", "ongoing"): | elif visit_status in ("created", "ongoing"): | ||||
# visit is currently running | # visit is currently running | ||||
save_task_status = SAVE_TASK_RUNNING | save_task_status = SAVE_TASK_RUNNING | ||||
elif visit_status in ("not_found", "failed"): | elif visit_status in ("not_found", "failed"): | ||||
save_task_status = SAVE_TASK_FAILED | save_task_status = SAVE_TASK_FAILED | ||||
else: | else: | ||||
time_now = datetime.now(tz=timezone.utc) | time_now = datetime.now(tz=timezone.utc) | ||||
time_delta = time_now - save_request.request_date | time_delta = time_now - save_request.request_date | ||||
# consider the task as failed if it is still in scheduled state | # consider the task as failed if it is still in scheduled state | ||||
# 30 days after its submission | # 30 days after its submission | ||||
ardumont: I think that possibly happens due to missing updating the tasks in the save code now ui [1]. | |||||
Done Inline ActionsIIRC we had some service downtime a couple of years ago due to a hardware issue. and all save requests tasks in the queue got lost. anlambert: IIRC we had some service downtime a couple of years ago due to a hardware issue. and all save… | |||||
Done Inline ActionsYeah, i recall something like that now that you refreshed me a bit on this. Note that I don't think that excludes what i said nonetheless ;) ardumont: Yeah, i recall something like that now that you refreshed me a bit on this.
Note that I don't… | |||||
if time_delta.days > 30: | if time_delta.days > 30: | ||||
save_task_status = SAVE_TASK_FAILED | save_task_status = SAVE_TASK_FAILED | ||||
return visit_date, save_task_status | return visit_date, save_task_status | ||||
def _save_request_dict( | def _save_request_dict( | ||||
save_request: SaveOriginRequest, | save_request: SaveOriginRequest, | ||||
task: Optional[Dict[str, Any]] = None, | task: Optional[Dict[str, Any]] = None, | ||||
Show All 12 Lines | ) -> Dict[str, Any]: | ||||
""" | """ | ||||
must_save = False | must_save = False | ||||
visit_date = save_request.visit_date | visit_date = save_request.visit_date | ||||
# save task still in scheduler db | # save task still in scheduler db | ||||
if task: | if task: | ||||
save_task_status = _save_task_status[task["status"]] | save_task_status = _save_task_status[task["status"]] | ||||
if task_run: | if task_run: | ||||
save_task_status = _save_task_run_status[task_run["status"]] | save_task_status = _save_task_run_status[task_run["status"]] | ||||
# Consider request from which a visit date has already been found | # Consider request from which a visit date has already been found | ||||
# as succeeded to avoid retrieving it again | # as succeeded to avoid retrieving it again | ||||
if save_task_status == SAVE_TASK_SCHEDULED and visit_date: | if save_task_status == SAVE_TASK_SCHEDULED and visit_date: | ||||
save_task_status = SAVE_TASK_SUCCEEDED | save_task_status = SAVE_TASK_SUCCEEDED | ||||
if ( | if ( | ||||
save_task_status in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEEDED) | save_task_status in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEEDED) | ||||
and not visit_date | and not visit_date | ||||
): | ): | ||||
visit_date, visit_status = _get_visit_info_for_save_request(save_request) | visit_date, visit_status = _get_visit_info_for_save_request(save_request) | ||||
save_request.visit_date = visit_date | save_request.visit_date = visit_date | ||||
save_request.visit_status = visit_status | |||||
if visit_status in ("failed", "not_found"): | if visit_status in ("failed", "not_found"): | ||||
save_task_status = SAVE_TASK_FAILED | save_task_status = SAVE_TASK_FAILED | ||||
must_save = True | must_save = True | ||||
# Check tasks still marked as scheduled / not yet scheduled | # Check tasks still marked as scheduled / not yet scheduled | ||||
if save_task_status in (SAVE_TASK_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED): | if save_task_status in (SAVE_TASK_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED): | ||||
visit_date, save_task_status = _check_visit_update_status( | visit_date, save_task_status = _check_visit_update_status( | ||||
save_request, save_task_status | save_request, save_task_status | ||||
) | ) | ||||
Show All 24 Lines | ) -> Dict[str, Any]: | ||||
return { | return { | ||||
"id": save_request.id, | "id": save_request.id, | ||||
"visit_type": save_request.visit_type, | "visit_type": save_request.visit_type, | ||||
"visit_status": save_request.visit_status, | "visit_status": save_request.visit_status, | ||||
"origin_url": save_request.origin_url, | "origin_url": save_request.origin_url, | ||||
"save_request_date": save_request.request_date.isoformat(), | "save_request_date": save_request.request_date.isoformat(), | ||||
"save_request_status": save_request.status, | "save_request_status": save_request.status, | ||||
"save_task_status": save_request.loading_task_status, | "save_task_status": save_request.loading_task_status, | ||||
"visit_date": visit_date.isoformat() if visit_date else None, | "visit_date": visit_date.isoformat() if visit_date else None, | ||||
Done Inline ActionsYou should also add the visit_status in that dict. anlambert: You should also add the `visit_status` in that dict. | |||||
Done Inline Actionsforget that comment, I did not see it was already there ... anlambert: forget that comment, I did not see it was already there ... | |||||
} | } | ||||
def create_save_origin_request(visit_type: str, origin_url: str) -> Dict[str, Any]: | def create_save_origin_request(visit_type: str, origin_url: str) -> Dict[str, Any]: | ||||
""" | """ | ||||
Create a loading task to save a software origin into the archive. | Create a loading task to save a software origin into the archive. | ||||
This function aims to create a software origin loading task | This function aims to create a software origin loading task | ||||
▲ Show 20 Lines • Show All 423 Lines • Show Last 20 Lines |
I think that possibly happens due to missing updating the tasks in the save code now ui [1].
Which should be attended when [2] is fixed (soon).
(or something)
[1] T3278#63827
[2] T3280