Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/origin_visits.py
Show All 12 Lines | |||||
from swh.web.common.utils import parse_timestamp | from swh.web.common.utils import parse_timestamp | ||||
def get_origin_visits(origin_info: OriginInfo) -> List[OriginVisitInfo]: | def get_origin_visits(origin_info: OriginInfo) -> List[OriginVisitInfo]: | ||||
"""Function that returns the list of visits for a swh origin. | """Function that returns the list of visits for a swh origin. | ||||
That list is put in cache in order to speedup the navigation | That list is put in cache in order to speedup the navigation | ||||
in the swh web browse ui. | in the swh web browse ui. | ||||
The returned visits are sorted according to their date in | |||||
ascending order. | |||||
Args: | Args: | ||||
origin_info: dict describing the origin to fetch visits from | origin_info: dict describing the origin to fetch visits from | ||||
Returns: | Returns: | ||||
A list of dict describing the origin visits | A list of dict describing the origin visits | ||||
Raises: | Raises: | ||||
swh.web.common.exc.NotFoundExc: if the origin is not found | swh.web.common.exc.NotFoundExc: if the origin is not found | ||||
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | |||||
def get_origin_visit( | def get_origin_visit( | ||||
origin_info: OriginInfo, | origin_info: OriginInfo, | ||||
visit_ts: Optional[Union[int, str]] = None, | visit_ts: Optional[Union[int, str]] = None, | ||||
visit_id: Optional[int] = None, | visit_id: Optional[int] = None, | ||||
snapshot_id: Optional[str] = None, | snapshot_id: Optional[str] = None, | ||||
) -> OriginVisitInfo: | ) -> OriginVisitInfo: | ||||
"""Function that returns information about a visit for a given origin. | """Function that returns information about a visit for a given origin. | ||||
The visit is retrieved from a provided timestamp. | |||||
The closest visit from that timestamp is selected. | If a timestamp is provided, the closest visit from that | ||||
timestamp is returned. | |||||
If a snapshot identifier is provided, the first visit with that snapshot | |||||
is returned. | |||||
If no search hints are provided, return the most recent full visit with | |||||
a valid snapshot or the most recent partial visit with a valid snapshot | |||||
otherwise. | |||||
Args: | Args: | ||||
origin_info: a dict filled with origin information | origin_info: a dict filled with origin information | ||||
visit_ts: an ISO date string or Unix timestamp to parse | visit_ts: an ISO date string or Unix timestamp to parse | ||||
snapshot_id: a snapshot identifier | |||||
Returns: | Returns: | ||||
A dict containing the visit info. | A dict containing the visit info. | ||||
Raises: | |||||
swh.web.common.exc.NotFoundExc: if no visit can be found | |||||
""" | """ | ||||
if not visit_ts and not visit_id and not snapshot_id: | |||||
from swh.web.common import service | |||||
# returns the latest full visit with a valid snapshot | |||||
visit = service.lookup_origin_visit_latest( | |||||
origin_info["url"], allowed_statuses=["full"], require_snapshot=True | |||||
) | |||||
if not visit: | |||||
# or the latest partial visit with a valid snapshot otherwise | |||||
visit = service.lookup_origin_visit_latest( | |||||
origin_info["url"], allowed_statuses=["partial"], require_snapshot=True | |||||
) | |||||
if visit: | |||||
return visit | |||||
else: | |||||
raise NotFoundExc( | |||||
f"No valid visit for origin with url {origin_info['url']} found!" | |||||
) | |||||
visits = get_origin_visits(origin_info) | visits = get_origin_visits(origin_info) | ||||
if not visits: | if not visits: | ||||
raise NotFoundExc( | raise NotFoundExc( | ||||
("No visit associated to origin with" " url %s!" % origin_info["url"]) | f"No visits associated to origin with url {origin_info['url']}!" | ||||
) | ) | ||||
if snapshot_id: | if snapshot_id: | ||||
visits = [v for v in visits if v["snapshot"] == snapshot_id] | visits = [v for v in visits if v["snapshot"] == snapshot_id] | ||||
if len(visits) == 0: | if len(visits) == 0: | ||||
raise NotFoundExc( | raise NotFoundExc( | ||||
( | ( | ||||
"Visit for snapshot with id %s for origin with" | "Visit for snapshot with id %s for origin with" | ||||
" url %s not found!" % (snapshot_id, origin_info["url"]) | " url %s not found!" % (snapshot_id, origin_info["url"]) | ||||
) | ) | ||||
) | ) | ||||
return visits[0] | return visits[0] | ||||
if visit_id: | if visit_id: | ||||
visits = [v for v in visits if v["visit"] == int(visit_id)] | visits = [v for v in visits if v["visit"] == int(visit_id)] | ||||
if len(visits) == 0: | if len(visits) == 0: | ||||
raise NotFoundExc( | raise NotFoundExc( | ||||
( | ( | ||||
"Visit with id %s for origin with" | "Visit with id %s for origin with" | ||||
" url %s not found!" % (visit_id, origin_info["url"]) | " url %s not found!" % (visit_id, origin_info["url"]) | ||||
) | ) | ||||
) | ) | ||||
return visits[0] | return visits[0] | ||||
if not visit_ts: | if visit_ts: | ||||
# returns the latest visit with a valid snapshot when no timestamp is provided | |||||
for v in reversed(visits): | |||||
if v["snapshot"] is not None: | |||||
return v | |||||
return visits[-1] | |||||
target_visit_ts = math.floor(parse_timestamp(visit_ts).timestamp()) | target_visit_ts = math.floor(parse_timestamp(visit_ts).timestamp()) | ||||
# Find the visit with date closest to the target (in absolute value) | # Find the visit with date closest to the target (in absolute value) | ||||
ardumont: Having the snapshot-id set does not mean that it's necessarily valid currently (unfortunately)… | |||||
(abs_time_delta, visit_idx) = min( | (abs_time_delta, visit_idx) = min( | ||||
( | ( | ||||
(math.floor(parse_timestamp(visit["date"]).timestamp()), i) | (math.floor(parse_timestamp(visit["date"]).timestamp()), i) | ||||
for (i, visit) in enumerate(visits) | for (i, visit) in enumerate(visits) | ||||
), | ), | ||||
key=lambda ts_and_i: abs(ts_and_i[0] - target_visit_ts), | key=lambda ts_and_i: abs(ts_and_i[0] - target_visit_ts), | ||||
) | ) | ||||
if visit_idx is not None: | if visit_idx is not None: | ||||
visit = visits[visit_idx] | visit = visits[visit_idx] | ||||
# If multiple visits have the same date, select the one with | # If multiple visits have the same date, select the one with | ||||
# the largest id. | # the largest id. | ||||
while ( | while ( | ||||
visit_idx < len(visits) - 1 | visit_idx < len(visits) - 1 | ||||
and visit["date"] == visits[visit_idx + 1]["date"] | and visit["date"] == visits[visit_idx + 1]["date"] | ||||
): | ): | ||||
visit_idx = visit_idx + 1 | visit_idx = visit_idx + 1 | ||||
visit = visits[visit_idx] | visit = visits[visit_idx] | ||||
return visit | return visit | ||||
else: | else: | ||||
raise NotFoundExc( | raise NotFoundExc( | ||||
( | ( | ||||
"Visit with timestamp %s for origin with " | "Visit with timestamp %s for origin with " | ||||
"url %s not found!" % (visit_ts, origin_info["url"]) | "url %s not found!" % (visit_ts, origin_info["url"]) | ||||
) | ) | ||||
) | ) | ||||
return visits[-1] |
Having the snapshot-id set does not mean that it's necessarily valid currently (unfortunately) [1]
[1] D3322