diff --git a/swh/web/common/origin_visits.py b/swh/web/common/origin_visits.py index 7de92729..5346a5bd 100644 --- a/swh/web/common/origin_visits.py +++ b/swh/web/common/origin_visits.py @@ -1,175 +1,175 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import math from django.core.cache import cache from swh.web.common.exc import NotFoundExc from swh.web.common.utils import parse_timestamp def get_origin_visits(origin_info): """Function that returns the list of visits for a swh origin. That list is put in cache in order to speedup the navigation in the swh web browse ui. Args: origin_info (dict): dict describing the origin to fetch visits from Returns: list: A list of dict describing the origin visits with the following keys: * **date**: UTC visit date in ISO format, * **origin**: the origin url * **status**: the visit status, either **full**, **partial** or **ongoing** * **visit**: the visit id * **type**: the visit type Raises: swh.web.common.exc.NotFoundExc: if the origin is not found """ from swh.web.common import service if "url" in origin_info: origin_url = origin_info["url"] else: origin_url = service.lookup_origin(origin_info)["url"] cache_entry_id = "origin_visits_%s" % origin_url cache_entry = cache.get(cache_entry_id) if cache_entry: last_visit = cache_entry[-1]["visit"] new_visits = list( service.lookup_origin_visits(origin_url, last_visit=last_visit) ) if not new_visits: last_snp = service.lookup_latest_origin_snapshot(origin_url) if not last_snp or last_snp["id"] == cache_entry[-1]["snapshot"]: return cache_entry origin_visits = [] per_page = service.MAX_LIMIT last_visit = None while 1: visits = list( service.lookup_origin_visits( origin_url, last_visit=last_visit, per_page=per_page ) ) origin_visits += visits if len(visits) < per_page: break else: if not last_visit: last_visit = per_page else: last_visit += per_page def _visit_sort_key(visit): ts = parse_timestamp(visit["date"]).timestamp() return ts + (float(visit["visit"]) / 10e3) for v in origin_visits: if "metadata" in v: del v["metadata"] origin_visits = [dict(t) for t in set([tuple(d.items()) for d in origin_visits])] origin_visits = sorted(origin_visits, key=lambda v: _visit_sort_key(v)) cache.set(cache_entry_id, origin_visits) return origin_visits def get_origin_visit(origin_info, visit_ts=None, visit_id=None, snapshot_id=None): """Function that returns information about a visit for a given origin. The visit is retrieved from a provided timestamp. The closest visit from that timestamp is selected. Args: origin_info (dict): a dict filled with origin information visit_ts (int or str): an ISO date string or Unix timestamp to parse Returns: A dict containing the visit info as described below:: {'origin': 'https://forge.softwareheritage.org/source/swh-web/', 'date': '2017-10-08T11:54:25.582463+00:00', 'metadata': {}, 'visit': 25, 'status': 'full'} """ visits = get_origin_visits(origin_info) if not visits: raise NotFoundExc( ("No visit associated to origin with" " url %s!" % origin_info["url"]) ) if snapshot_id: visit = [v for v in visits if v["snapshot"] == snapshot_id] if len(visit) == 0: raise NotFoundExc( ( "Visit for snapshot with id %s for origin with" " url %s not found!" % (snapshot_id, origin_info["url"]) ) ) return visit[0] if visit_id: visit = [v for v in visits if v["visit"] == int(visit_id)] if len(visit) == 0: raise NotFoundExc( ( "Visit with id %s for origin with" " url %s not found!" % (visit_id, origin_info["url"]) ) ) return visit[0] if not visit_ts: - # returns the latest full visit when no timestamp is provided + # returns the latest visit with a valid snapshot when no timestamp is provided for v in reversed(visits): - if v["status"] == "full": + if v["snapshot"] is not None: return v return visits[-1] target_visit_ts = math.floor(parse_timestamp(visit_ts).timestamp()) # Find the visit with date closest to the target (in absolute value) (abs_time_delta, visit_idx) = min( ( (math.floor(parse_timestamp(visit["date"]).timestamp()), i) for (i, visit) in enumerate(visits) ), key=lambda ts_and_i: abs(ts_and_i[0] - target_visit_ts), ) if visit_idx is not None: visit = visits[visit_idx] # If multiple visits have the same date, select the one with # the largest id. while ( visit_idx < len(visits) - 1 and visit["date"] == visits[visit_idx + 1]["date"] ): visit_idx = visit_idx + 1 visit = visits[visit_idx] return visit else: raise NotFoundExc( ( "Visit with timestamp %s for origin with " "url %s not found!" % (visit_ts, origin_info["url"]) ) ) diff --git a/swh/web/tests/common/test_origin_visits.py b/swh/web/tests/common/test_origin_visits.py index 73b734d9..7d8d059c 100644 --- a/swh/web/tests/common/test_origin_visits.py +++ b/swh/web/tests/common/test_origin_visits.py @@ -1,110 +1,196 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +from hypothesis import given + import pytest +from swh.model.hashutil import hash_to_hex from swh.web.common.exc import NotFoundExc from swh.web.common.origin_visits import get_origin_visits, get_origin_visit +from swh.web.tests.strategies import new_snapshots -def test_get_origin_visits(mocker): +@given(new_snapshots(3)) +def test_get_origin_visits(mocker, snapshots): mock_service = mocker.patch("swh.web.common.service") mock_service.MAX_LIMIT = 2 def _lookup_origin_visits(*args, **kwargs): if kwargs["last_visit"] is None: return [ - {"visit": 1, "date": "2017-05-06T00:59:10+00:00", "metadata": {}}, - {"visit": 2, "date": "2017-08-06T00:59:10+00:00", "metadata": {}}, + { + "visit": 1, + "date": "2017-05-06T00:59:10+00:00", + "status": "full", + "snapshot": hash_to_hex(snapshots[0].id), + "type": "git", + }, + { + "visit": 2, + "date": "2017-08-06T00:59:10+00:00", + "status": "full", + "snapshot": hash_to_hex(snapshots[1].id), + "type": "git", + }, ] else: - return [{"visit": 3, "date": "2017-09-06T00:59:10+00:00", "metadata": {}}] + return [ + { + "visit": 3, + "date": "2017-09-06T00:59:10+00:00", + "status": "full", + "snapshot": hash_to_hex(snapshots[2].id), + "type": "git", + } + ] mock_service.lookup_origin_visits.side_effect = _lookup_origin_visits origin_info = { - "id": 1, "url": "https://github.com/foo/bar", } origin_visits = get_origin_visits(origin_info) assert len(origin_visits) == 3 -def test_get_origin_visit(mocker): +@given(new_snapshots(5)) +def test_get_origin_visit(mocker, snapshots): mock_origin_visits = mocker.patch("swh.web.common.origin_visits.get_origin_visits") origin_info = { "url": "https://github.com/foo/bar", } visits = [ { "status": "full", "date": "2015-07-09T21:09:24+00:00", "visit": 1, "origin": "https://github.com/foo/bar", "type": "git", + "snapshot": hash_to_hex(snapshots[0].id), }, { "status": "full", "date": "2016-02-23T18:05:23.312045+00:00", "visit": 2, "origin": "https://github.com/foo/bar", "type": "git", + "snapshot": hash_to_hex(snapshots[1].id), }, { "status": "full", "date": "2016-03-28T01:35:06.554111+00:00", "visit": 3, "origin": "https://github.com/foo/bar", "type": "git", + "snapshot": hash_to_hex(snapshots[2].id), }, { "status": "full", "date": "2016-06-18T01:22:24.808485+00:00", "visit": 4, "origin": "https://github.com/foo/bar", "type": "git", + "snapshot": hash_to_hex(snapshots[3].id), }, { "status": "full", "date": "2016-08-14T12:10:00.536702+00:00", "visit": 5, "origin": "https://github.com/foo/bar", "type": "git", + "snapshot": hash_to_hex(snapshots[4].id), }, ] mock_origin_visits.return_value = visits visit_id = 12 with pytest.raises(NotFoundExc) as e: visit = get_origin_visit(origin_info, visit_id=visit_id) assert e.match("Visit with id %s" % visit_id) assert e.match("url %s" % origin_info["url"]) visit = get_origin_visit(origin_info, visit_id=2) assert visit == visits[1] visit = get_origin_visit(origin_info, visit_ts="2016-02-23T18:05:23.312045+00:00") assert visit == visits[1] visit = get_origin_visit(origin_info, visit_ts="2016-02-20") assert visit == visits[1] visit = get_origin_visit(origin_info, visit_ts="2016-06-18T01:22") assert visit == visits[3] visit = get_origin_visit(origin_info, visit_ts="2016-06-18 01:22") assert visit == visits[3] visit = get_origin_visit(origin_info, visit_ts=1466208000) assert visit == visits[3] visit = get_origin_visit(origin_info, visit_ts="2014-01-01") assert visit == visits[0] visit = get_origin_visit(origin_info, visit_ts="2018-01-01") assert visit == visits[-1] + + +def test_get_origin_visit_latest_valid_snapshot(mocker): + mock_origin_visits = mocker.patch("swh.web.common.origin_visits.get_origin_visits") + origin_info = { + "url": "https://nix-community.github.io/nixpkgs-swh/sources-unstable.json", + } + visits = [ + { + "date": "2020-04-15T12:42:52.936520+00:00", + "origin": origin_info["url"], + "snapshot": "d820451681c74eec63693b6ea4e4b8417c76bb7a", + "status": "partial", + "type": "nixguix", + "visit": 16, + }, + { + "date": "2020-04-17T17:25:13.738789+00:00", + "origin": origin_info["url"], + "snapshot": "d20627c1ae2b5e553e8adcf625f37e37cc5190dd", + "status": "partial", + "type": "nixguix", + "visit": 17, + }, + { + "date": "2020-04-19T19:02:42.906079+00:00", + "origin": origin_info["url"], + "snapshot": None, + "status": "partial", + "type": "nixguix", + "visit": 18, + }, + { + "date": "2020-04-20T12:43:41.120422+00:00", + "origin": origin_info["url"], + "snapshot": None, + "status": "partial", + "type": "nixguix", + "visit": 19, + }, + { + "date": "2020-04-20T12:46:40.255418+00:00", + "origin": origin_info["url"], + "snapshot": None, + "status": "partial", + "type": "nixguix", + "visit": 20, + }, + ] + + mock_origin_visits.return_value = visits + + visit = get_origin_visit(origin_info) + + assert visit["snapshot"] is not None + assert visit["visit"] == 17