diff --git a/swh/web/common/origin_visits.py b/swh/web/common/origin_visits.py index 6dc2201a..023c7aa8 100644 --- a/swh/web/common/origin_visits.py +++ b/swh/web/common/origin_visits.py @@ -42,43 +42,45 @@ def get_origin_visits(origin_info: OriginInfo) -> List[OriginVisitInfo]: cache_entry_id = "origin_visits_%s" % origin_url cache_entry = cache.get(cache_entry_id) + last_visit = 0 + origin_visits = [] + new_visits = [] + per_page = archive.MAX_LIMIT if cache_entry: + origin_visits = cache_entry last_visit = cache_entry[-1]["visit"] new_visits = list( - archive.lookup_origin_visits(origin_url, last_visit=last_visit) + archive.lookup_origin_visits( + origin_url, last_visit=last_visit, per_page=per_page + ) ) + last_visit += per_page if not new_visits: last_snp = archive.lookup_latest_origin_snapshot(origin_url) if not last_snp or last_snp["id"] == cache_entry[-1]["snapshot"]: return cache_entry - origin_visits = [] - - per_page = archive.MAX_LIMIT - last_visit = None + # get new visits that we did not retrieve yet + print(per_page) while 1: visits = list( archive.lookup_origin_visits( origin_url, last_visit=last_visit, per_page=per_page ) ) - origin_visits += visits + print(visits) + new_visits += visits if len(visits) < per_page: break - else: - if not last_visit: - last_visit = per_page - else: - last_visit += per_page + last_visit += per_page def _visit_sort_key(visit): ts = parse_iso8601_date_to_utc(visit["date"]).timestamp() return ts + (float(visit["visit"]) / 10e3) - origin_visits = sorted(origin_visits, key=lambda v: _visit_sort_key(v)) - + # cache entry is already sorted with oldest visits + origin_visits += sorted(new_visits, key=lambda v: _visit_sort_key(v)) cache.set(cache_entry_id, origin_visits) - return origin_visits diff --git a/swh/web/tests/common/test_origin_visits.py b/swh/web/tests/common/test_origin_visits.py index 5da3b07f..0f7e93d0 100644 --- a/swh/web/tests/common/test_origin_visits.py +++ b/swh/web/tests/common/test_origin_visits.py @@ -8,6 +8,8 @@ from datetime import timedelta from hypothesis import given import pytest +from django.core.cache import cache + from swh.model.hashutil import hash_to_hex from swh.model.model import OriginVisit, OriginVisitStatus from swh.storage.utils import now @@ -23,7 +25,7 @@ def test_get_origin_visits(mocker, snapshots): mock_archive.MAX_LIMIT = 2 def _lookup_origin_visits(*args, **kwargs): - if kwargs["last_visit"] is None: + if kwargs["last_visit"] == 0: return [ { "visit": 1, @@ -51,6 +53,9 @@ def test_get_origin_visits(mocker, snapshots): } ] + # ensure to reset django cache between hypothesis examples + cache.clear() + mock_archive.lookup_origin_visits.side_effect = _lookup_origin_visits origin_info = {