diff --git a/swh/web/common/archive.py b/swh/web/common/archive.py --- a/swh/web/common/archive.py +++ b/swh/web/common/archive.py @@ -2,8 +2,8 @@ # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information - from collections import defaultdict +import datetime import itertools import os import re @@ -1066,6 +1066,26 @@ return converters.from_origin_visit({**visit_status.to_dict(), "type": visit.type}) +def origin_visit_status_find_by_date( + origin_url: str, visit_date: datetime.datetime +) -> Optional[OriginVisitInfo]: + """Retrieve the origin visit status whose date is closest to the provided timestamp. + + Args: + origin_url: origin concerned by the visit + visit_date: provided timestamp + + Returns: + The dict origin_visit_status concerned if any. + + """ + visit = storage.origin_visit_find_by_date(origin_url, visit_date) + if not visit: + return None + visit_status = storage.origin_visit_status_get_latest(origin_url, visit.visit_id) + return converters.from_origin_visit({**visit_status.to_dict(), "type": visit.type}) + + def lookup_snapshot_sizes( snapshot_id: str, branch_name_exclude_prefix: Optional[str] = "refs/pull/" ) -> Dict[str, int]: diff --git a/swh/web/common/origin_save.py b/swh/web/common/origin_save.py --- a/swh/web/common/origin_save.py +++ b/swh/web/common/origin_save.py @@ -1,9 +1,8 @@ -# Copyright (C) 2018-2021 The Software Heritage developers +# Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -from bisect import bisect_right from datetime import datetime, timedelta, timezone from functools import lru_cache from itertools import product @@ -39,12 +38,7 @@ SaveOriginRequest, SaveUnauthorizedOrigin, ) -from swh.web.common.origin_visits import get_origin_visits -from swh.web.common.typing import ( - OriginExistenceCheckInfo, - OriginInfo, - SaveOriginRequestInfo, -) +from swh.web.common.typing import OriginExistenceCheckInfo, SaveOriginRequestInfo from swh.web.common.utils import SWH_WEB_METRICS_REGISTRY, parse_iso8601_date_to_utc from swh.web.config import get_config, scheduler @@ -285,16 +279,11 @@ # as those requests to storage are expensive and associated loading task # surely ended up with errors if time_delta.days <= MAX_THRESHOLD_DAYS: - try: - origin_info = archive.lookup_origin(OriginInfo(url=save_request.origin_url)) - origin_visits = get_origin_visits(origin_info) - visit_dates = [parse_iso8601_date_to_utc(v["date"]) for v in origin_visits] - i = bisect_right(visit_dates, save_request.request_date) - if i != len(visit_dates): - visit_date = visit_dates[i] - visit_status = origin_visits[i]["status"] - except Exception as exc: - sentry_sdk.capture_exception(exc) + origin = save_request.origin_url + ovs = archive.origin_visit_status_find_by_date(origin, time_now) + if ovs: + visit_date = parse_iso8601_date_to_utc(ovs["date"]) + visit_status = ovs["status"] return visit_date, visit_status