diff --git a/swh/web/common/origin_visits.py b/swh/web/common/origin_visits.py --- a/swh/web/common/origin_visits.py +++ b/swh/web/common/origin_visits.py @@ -42,40 +42,43 @@ cache_entry_id = "origin_visits_%s" % origin_url cache_entry = cache.get(cache_entry_id) + last_visit = 0 + origin_visits = [] + new_visits = [] + per_page = archive.MAX_LIMIT if cache_entry: + origin_visits = cache_entry last_visit = cache_entry[-1]["visit"] new_visits = list( - archive.lookup_origin_visits(origin_url, last_visit=last_visit) + archive.lookup_origin_visits( + origin_url, last_visit=last_visit, per_page=per_page + ) ) if not new_visits: last_snp = archive.lookup_latest_origin_snapshot(origin_url) if not last_snp or last_snp["id"] == cache_entry[-1]["snapshot"]: return cache_entry - origin_visits = [] + last_visit += len(new_visits) - per_page = archive.MAX_LIMIT - last_visit = None + # get new visits that we did not retrieve yet while 1: visits = list( archive.lookup_origin_visits( origin_url, last_visit=last_visit, per_page=per_page ) ) - origin_visits += visits + new_visits += visits if len(visits) < per_page: break - else: - if not last_visit: - last_visit = per_page - else: - last_visit += per_page + last_visit += per_page def _visit_sort_key(visit): ts = parse_iso8601_date_to_utc(visit["date"]).timestamp() return ts + (float(visit["visit"]) / 10e3) - origin_visits = sorted(origin_visits, key=lambda v: _visit_sort_key(v)) + # cache entry is already sorted with oldest visits + origin_visits += sorted(new_visits, key=lambda v: _visit_sort_key(v)) cache.set(cache_entry_id, origin_visits) diff --git a/swh/web/tests/common/test_origin_visits.py b/swh/web/tests/common/test_origin_visits.py --- a/swh/web/tests/common/test_origin_visits.py +++ b/swh/web/tests/common/test_origin_visits.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2021 The Software Heritage developers +# Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -8,6 +8,8 @@ from hypothesis import given import pytest +from django.core.cache import cache + from swh.model.hashutil import hash_to_hex from swh.model.model import OriginVisit, OriginVisitStatus from swh.storage.utils import now @@ -23,7 +25,7 @@ mock_archive.MAX_LIMIT = 2 def _lookup_origin_visits(*args, **kwargs): - if kwargs["last_visit"] is None: + if kwargs["last_visit"] == 0: return [ { "visit": 1, @@ -51,6 +53,9 @@ } ] + # ensure to reset django cache between hypothesis examples + cache.clear() + mock_archive.lookup_origin_visits.side_effect = _lookup_origin_visits origin_info = {