Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/origin_visits.py
# Copyright (C) 2018-2019 The Software Heritage developers | # Copyright (C) 2018-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import math | import math | ||||
from typing import List, Optional, Union | |||||
from django.core.cache import cache | from django.core.cache import cache | ||||
from swh.web.common.exc import NotFoundExc | from swh.web.common.exc import NotFoundExc | ||||
from swh.web.common.typing import OriginInfo, OriginVisitInfo | |||||
from swh.web.common.utils import parse_timestamp | from swh.web.common.utils import parse_timestamp | ||||
def get_origin_visits(origin_info): | def get_origin_visits(origin_info: OriginInfo) -> List[OriginVisitInfo]: | ||||
"""Function that returns the list of visits for a swh origin. | """Function that returns the list of visits for a swh origin. | ||||
That list is put in cache in order to speedup the navigation | That list is put in cache in order to speedup the navigation | ||||
in the swh web browse ui. | in the swh web browse ui. | ||||
Args: | Args: | ||||
origin_info (dict): dict describing the origin to fetch visits from | origin_info: dict describing the origin to fetch visits from | ||||
Returns: | Returns: | ||||
list: A list of dict describing the origin visits with the | A list of dict describing the origin visits | ||||
following keys: | |||||
* **date**: UTC visit date in ISO format, | |||||
* **origin**: the origin url | |||||
* **status**: the visit status, either **full**, **partial** | |||||
or **ongoing** | |||||
* **visit**: the visit id | |||||
* **type**: the visit type | |||||
Raises: | Raises: | ||||
swh.web.common.exc.NotFoundExc: if the origin is not found | swh.web.common.exc.NotFoundExc: if the origin is not found | ||||
""" | """ | ||||
from swh.web.common import service | from swh.web.common import service | ||||
if "url" in origin_info: | if "url" in origin_info: | ||||
Show All 32 Lines | while 1: | ||||
last_visit = per_page | last_visit = per_page | ||||
else: | else: | ||||
last_visit += per_page | last_visit += per_page | ||||
def _visit_sort_key(visit): | def _visit_sort_key(visit): | ||||
ts = parse_timestamp(visit["date"]).timestamp() | ts = parse_timestamp(visit["date"]).timestamp() | ||||
return ts + (float(visit["visit"]) / 10e3) | return ts + (float(visit["visit"]) / 10e3) | ||||
for v in origin_visits: | |||||
if "metadata" in v: | |||||
del v["metadata"] | |||||
origin_visits = [dict(t) for t in set([tuple(d.items()) for d in origin_visits])] | |||||
origin_visits = sorted(origin_visits, key=lambda v: _visit_sort_key(v)) | origin_visits = sorted(origin_visits, key=lambda v: _visit_sort_key(v)) | ||||
cache.set(cache_entry_id, origin_visits) | cache.set(cache_entry_id, origin_visits) | ||||
return origin_visits | return origin_visits | ||||
def get_origin_visit(origin_info, visit_ts=None, visit_id=None, snapshot_id=None): | def get_origin_visit( | ||||
"""Function that returns information about a visit for | origin_info: OriginInfo, | ||||
a given origin. | visit_ts: Optional[Union[int, str]] = None, | ||||
visit_id: Optional[int] = None, | |||||
snapshot_id: Optional[str] = None, | |||||
) -> OriginVisitInfo: | |||||
"""Function that returns information about a visit for a given origin. | |||||
The visit is retrieved from a provided timestamp. | The visit is retrieved from a provided timestamp. | ||||
The closest visit from that timestamp is selected. | The closest visit from that timestamp is selected. | ||||
Args: | Args: | ||||
origin_info (dict): a dict filled with origin information | origin_info: a dict filled with origin information | ||||
visit_ts (int or str): an ISO date string or Unix timestamp to parse | visit_ts: an ISO date string or Unix timestamp to parse | ||||
Returns: | Returns: | ||||
A dict containing the visit info as described below:: | A dict containing the visit info. | ||||
{'origin': 'https://forge.softwareheritage.org/source/swh-web/', | |||||
'date': '2017-10-08T11:54:25.582463+00:00', | |||||
'metadata': {}, | |||||
'visit': 25, | |||||
'status': 'full'} | |||||
""" | """ | ||||
visits = get_origin_visits(origin_info) | visits = get_origin_visits(origin_info) | ||||
if not visits: | if not visits: | ||||
raise NotFoundExc( | raise NotFoundExc( | ||||
("No visit associated to origin with" " url %s!" % origin_info["url"]) | ("No visit associated to origin with" " url %s!" % origin_info["url"]) | ||||
) | ) | ||||
if snapshot_id: | if snapshot_id: | ||||
visit = [v for v in visits if v["snapshot"] == snapshot_id] | visits = [v for v in visits if v["snapshot"] == snapshot_id] | ||||
if len(visit) == 0: | if len(visits) == 0: | ||||
raise NotFoundExc( | raise NotFoundExc( | ||||
( | ( | ||||
"Visit for snapshot with id %s for origin with" | "Visit for snapshot with id %s for origin with" | ||||
" url %s not found!" % (snapshot_id, origin_info["url"]) | " url %s not found!" % (snapshot_id, origin_info["url"]) | ||||
) | ) | ||||
) | ) | ||||
return visit[0] | return visits[0] | ||||
if visit_id: | if visit_id: | ||||
visit = [v for v in visits if v["visit"] == int(visit_id)] | visits = [v for v in visits if v["visit"] == int(visit_id)] | ||||
if len(visit) == 0: | if len(visits) == 0: | ||||
raise NotFoundExc( | raise NotFoundExc( | ||||
( | ( | ||||
"Visit with id %s for origin with" | "Visit with id %s for origin with" | ||||
" url %s not found!" % (visit_id, origin_info["url"]) | " url %s not found!" % (visit_id, origin_info["url"]) | ||||
) | ) | ||||
) | ) | ||||
return visit[0] | return visits[0] | ||||
if not visit_ts: | if not visit_ts: | ||||
# returns the latest visit with a valid snapshot when no timestamp is provided | # returns the latest visit with a valid snapshot when no timestamp is provided | ||||
for v in reversed(visits): | for v in reversed(visits): | ||||
if v["snapshot"] is not None: | if v["snapshot"] is not None: | ||||
return v | return v | ||||
return visits[-1] | return visits[-1] | ||||
Show All 29 Lines |