diff --git a/swh/web/common/origin_visits.py b/swh/web/common/origin_visits.py --- a/swh/web/common/origin_visits.py +++ b/swh/web/common/origin_visits.py @@ -42,40 +42,43 @@ cache_entry_id = "origin_visits_%s" % origin_url cache_entry = cache.get(cache_entry_id) + last_visit = 0 + origin_visits = [] + new_visits = [] + per_page = archive.MAX_LIMIT if cache_entry: + origin_visits = cache_entry last_visit = cache_entry[-1]["visit"] new_visits = list( - archive.lookup_origin_visits(origin_url, last_visit=last_visit) + archive.lookup_origin_visits( + origin_url, last_visit=last_visit, per_page=per_page + ) ) if not new_visits: last_snp = archive.lookup_latest_origin_snapshot(origin_url) if not last_snp or last_snp["id"] == cache_entry[-1]["snapshot"]: return cache_entry - origin_visits = [] + last_visit += len(new_visits) - per_page = archive.MAX_LIMIT - last_visit = None + # get new visits that we did not retrieve yet while 1: visits = list( archive.lookup_origin_visits( origin_url, last_visit=last_visit, per_page=per_page ) ) - origin_visits += visits + new_visits += visits if len(visits) < per_page: break - else: - if not last_visit: - last_visit = per_page - else: - last_visit += per_page + last_visit += per_page def _visit_sort_key(visit): ts = parse_iso8601_date_to_utc(visit["date"]).timestamp() return ts + (float(visit["visit"]) / 10e3) - origin_visits = sorted(origin_visits, key=lambda v: _visit_sort_key(v)) + # cache entry is already sorted with oldest visits + origin_visits += sorted(new_visits, key=lambda v: _visit_sort_key(v)) cache.set(cache_entry_id, origin_visits) diff --git a/swh/web/tests/common/test_origin_visits.py b/swh/web/tests/common/test_origin_visits.py --- a/swh/web/tests/common/test_origin_visits.py +++ b/swh/web/tests/common/test_origin_visits.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2021 The Software Heritage developers +# Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -6,9 +6,11 @@ from datetime import timedelta from hypothesis import given -import iso8601 import pytest +from django.core.cache import cache + +from swh.model.hashutil import hash_to_hex from swh.model.model import OriginVisit, OriginVisitStatus from swh.storage.utils import now from swh.web.common.exc import NotFoundExc @@ -17,97 +19,131 @@ from swh.web.tests.strategies import new_origin, new_snapshots -@given(new_origin(), new_snapshots(3)) -def test_get_origin_visits(mocker, archive_data, new_origin, new_snapshots): - from swh.web.common import archive - - mocker.patch.object(archive, "MAX_LIMIT", 2) - - archive_data.origin_add([new_origin]) - archive_data.snapshot_add(new_snapshots) - for i, snapshot in enumerate(new_snapshots): - visit_date = now() + timedelta(days=i * 10) - visit = archive_data.origin_visit_add( - [OriginVisit(origin=new_origin.url, date=visit_date, type="git",)] - )[0] - visit_status = OriginVisitStatus( - origin=new_origin.url, - visit=visit.visit, - date=visit_date + timedelta(minutes=5), - status="full", - snapshot=snapshot.id, - ) - archive_data.origin_visit_status_add([visit_status]) - - origin_visits = get_origin_visits(new_origin.to_dict()) - - assert len(origin_visits) == len(new_snapshots) - - -@given(new_origin(), new_snapshots(5)) -def test_get_origin_visit(archive_data, new_origin, new_snapshots): - - archive_data.origin_add([new_origin]) - archive_data.snapshot_add(new_snapshots) - visits = [] - for i, visit_date in enumerate( - map( - iso8601.parse_date, - [ - "2015-07-09T21:09:24+00:00", - "2016-02-23T18:05:23.312045+00:00", - "2016-03-28T01:35:06.554111+00:00", - "2016-06-18T01:22:24.808485+00:00", - "2016-08-14T12:10:00.536702+00:00", - ], - ) - ): - visit = archive_data.origin_visit_add( - [OriginVisit(origin=new_origin.url, date=visit_date, type="git",)] - )[0] - visits.append(visit) - visit_status = OriginVisitStatus( - origin=new_origin.url, - visit=visit.visit, - date=visit_date + timedelta(minutes=5), - status="full", - snapshot=new_snapshots[i].id, - ) - archive_data.origin_visit_status_add([visit_status]) - - origin_info = new_origin.to_dict() - - visit_id = visits[-1].visit + 1 +@given(new_snapshots(3)) +def test_get_origin_visits(mocker, snapshots): + mock_archive = mocker.patch("swh.web.common.archive") + mock_archive.MAX_LIMIT = 2 + + def _lookup_origin_visits(*args, **kwargs): + if kwargs["last_visit"] == 0: + return [ + { + "visit": 1, + "date": "2017-05-06T00:59:10+00:00", + "status": "full", + "snapshot": hash_to_hex(snapshots[0].id), + "type": "git", + }, + { + "visit": 2, + "date": "2017-08-06T00:59:10+00:00", + "status": "full", + "snapshot": hash_to_hex(snapshots[1].id), + "type": "git", + }, + ] + else: + return [ + { + "visit": 3, + "date": "2017-09-06T00:59:10+00:00", + "status": "full", + "snapshot": hash_to_hex(snapshots[2].id), + "type": "git", + } + ] + + # ensure to reset django cache between hypothesis examples + cache.clear() + + mock_archive.lookup_origin_visits.side_effect = _lookup_origin_visits + + origin_info = { + "url": "https://github.com/foo/bar", + } + + origin_visits = get_origin_visits(origin_info) + + assert len(origin_visits) == 3 + + +@given(new_snapshots(5)) +def test_get_origin_visit(mocker, snapshots): + mock_origin_visits = mocker.patch("swh.web.common.origin_visits.get_origin_visits") + origin_info = { + "url": "https://github.com/foo/bar", + } + visits = [ + { + "status": "full", + "date": "2015-07-09T21:09:24+00:00", + "visit": 1, + "origin": "https://github.com/foo/bar", + "type": "git", + "snapshot": hash_to_hex(snapshots[0].id), + }, + { + "status": "full", + "date": "2016-02-23T18:05:23.312045+00:00", + "visit": 2, + "origin": "https://github.com/foo/bar", + "type": "git", + "snapshot": hash_to_hex(snapshots[1].id), + }, + { + "status": "full", + "date": "2016-03-28T01:35:06.554111+00:00", + "visit": 3, + "origin": "https://github.com/foo/bar", + "type": "git", + "snapshot": hash_to_hex(snapshots[2].id), + }, + { + "status": "full", + "date": "2016-06-18T01:22:24.808485+00:00", + "visit": 4, + "origin": "https://github.com/foo/bar", + "type": "git", + "snapshot": hash_to_hex(snapshots[3].id), + }, + { + "status": "full", + "date": "2016-08-14T12:10:00.536702+00:00", + "visit": 5, + "origin": "https://github.com/foo/bar", + "type": "git", + "snapshot": hash_to_hex(snapshots[4].id), + }, + ] + mock_origin_visits.return_value = visits + + visit_id = 12 with pytest.raises(NotFoundExc) as e: visit = get_origin_visit(origin_info, visit_id=visit_id) assert e.match("Visit with id %s" % visit_id) assert e.match("url %s" % origin_info["url"]) - visit_id = visits[1].visit - visit = get_origin_visit(origin_info, visit_id=visit_id) - assert visit == archive_data.origin_visit_get_by(new_origin.url, visit_id=visit_id) + visit = get_origin_visit(origin_info, visit_id=2) + assert visit == visits[1] visit = get_origin_visit(origin_info, visit_ts="2016-02-23T18:05:23.312045+00:00") - assert visit == archive_data.origin_visit_get_by(new_origin.url, visit_id=visit_id) + assert visit == visits[1] visit = get_origin_visit(origin_info, visit_ts="2016-02-20") - assert visit == archive_data.origin_visit_get_by(new_origin.url, visit_id=visit_id) + assert visit == visits[1] - visit_id = visits[3].visit visit = get_origin_visit(origin_info, visit_ts="2016-06-18T01:22") - assert visit == archive_data.origin_visit_get_by(new_origin.url, visit_id=visit_id) + assert visit == visits[3] visit = get_origin_visit(origin_info, visit_ts="2016-06-18 01:22") - assert visit == archive_data.origin_visit_get_by(new_origin.url, visit_id=visit_id) + assert visit == visits[3] - visit_id = visits[0].visit visit = get_origin_visit(origin_info, visit_ts="2014-01-01") - assert visit == archive_data.origin_visit_get_by(new_origin.url, visit_id=visit_id) + assert visit == visits[0] - visit_id = visits[-1].visit visit = get_origin_visit(origin_info, visit_ts="2018-01-01") - assert visit == archive_data.origin_visit_get_by(new_origin.url, visit_id=visit_id) + assert visit == visits[-1] @given(new_origin(), new_snapshots(6))