Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/origin_save.py
# Copyright (C) 2018-2019 The Software Heritage developers | # Copyright (C) 2018-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from bisect import bisect_right | from bisect import bisect_right | ||||
from datetime import datetime, timezone, timedelta | from datetime import datetime, timezone, timedelta | ||||
from itertools import product | from itertools import product | ||||
import json | import json | ||||
▲ Show 20 Lines • Show All 133 Lines • ▼ Show 20 Lines | except ValidationError: | ||||
raise BadInputExc( | raise BadInputExc( | ||||
"The provided origin url (%s) is not valid!" % escape(origin_url) | "The provided origin url (%s) is not valid!" % escape(origin_url) | ||||
) | ) | ||||
def _get_visit_info_for_save_request(save_request): | def _get_visit_info_for_save_request(save_request): | ||||
visit_date = None | visit_date = None | ||||
visit_status = None | visit_status = None | ||||
time_now = datetime.now(tz=timezone.utc) | |||||
time_delta = time_now - save_request.request_date | |||||
# stop trying to find a visit date one month after save request submission | |||||
# as those requests to storage are expensive and associated loading task | |||||
# surely ended up with errors | |||||
if time_delta.days <= 30: | |||||
try: | try: | ||||
origin = {"url": save_request.origin_url} | origin = {"url": save_request.origin_url} | ||||
origin_info = service.lookup_origin(origin) | origin_info = service.lookup_origin(origin) | ||||
origin_visits = get_origin_visits(origin_info) | origin_visits = get_origin_visits(origin_info) | ||||
visit_dates = [parse_timestamp(v["date"]) for v in origin_visits] | visit_dates = [parse_timestamp(v["date"]) for v in origin_visits] | ||||
i = bisect_right(visit_dates, save_request.request_date) | i = bisect_right(visit_dates, save_request.request_date) | ||||
if i != len(visit_dates): | if i != len(visit_dates): | ||||
visit_date = visit_dates[i] | visit_date = visit_dates[i] | ||||
visit_status = origin_visits[i]["status"] | visit_status = origin_visits[i]["status"] | ||||
if origin_visits[i]["status"] == "ongoing": | if origin_visits[i]["status"] == "ongoing": | ||||
visit_date = None | visit_date = None | ||||
except Exception as exc: | except Exception as exc: | ||||
sentry_sdk.capture_exception(exc) | sentry_sdk.capture_exception(exc) | ||||
return visit_date, visit_status | return visit_date, visit_status | ||||
def _check_visit_update_status(save_request, save_task_status): | def _check_visit_update_status(save_request, save_task_status): | ||||
visit_date, visit_status = _get_visit_info_for_save_request(save_request) | visit_date, visit_status = _get_visit_info_for_save_request(save_request) | ||||
save_request.visit_date = visit_date | save_request.visit_date = visit_date | ||||
# visit has been performed, mark the saving task as succeed | # visit has been performed, mark the saving task as succeed | ||||
if visit_date and visit_status is not None: | if visit_date and visit_status is not None: | ||||
▲ Show 20 Lines • Show All 425 Lines • Show Last 20 Lines |