Page MenuHomeSoftware Heritage
Paste P1275

(An Untitled Masterwork)
ActivePublic

Authored by anlambert on Feb 3 2022, 6:07 PM.
diff --git a/swh/web/common/origin_visits.py b/swh/web/common/origin_visits.py
index 6dc2201a..023c7aa8 100644
--- a/swh/web/common/origin_visits.py
+++ b/swh/web/common/origin_visits.py
@@ -42,43 +42,45 @@ def get_origin_visits(origin_info: OriginInfo) -> List[OriginVisitInfo]:
cache_entry_id = "origin_visits_%s" % origin_url
cache_entry = cache.get(cache_entry_id)
+ last_visit = 0
+ origin_visits = []
+ new_visits = []
+ per_page = archive.MAX_LIMIT
if cache_entry:
+ origin_visits = cache_entry
last_visit = cache_entry[-1]["visit"]
new_visits = list(
- archive.lookup_origin_visits(origin_url, last_visit=last_visit)
+ archive.lookup_origin_visits(
+ origin_url, last_visit=last_visit, per_page=per_page
+ )
)
+ last_visit += per_page
if not new_visits:
last_snp = archive.lookup_latest_origin_snapshot(origin_url)
if not last_snp or last_snp["id"] == cache_entry[-1]["snapshot"]:
return cache_entry
- origin_visits = []
-
- per_page = archive.MAX_LIMIT
- last_visit = None
+ # get new visits that we did not retrieve yet
+ print(per_page)
while 1:
visits = list(
archive.lookup_origin_visits(
origin_url, last_visit=last_visit, per_page=per_page
)
)
- origin_visits += visits
+ print(visits)
+ new_visits += visits
if len(visits) < per_page:
break
- else:
- if not last_visit:
- last_visit = per_page
- else:
- last_visit += per_page
+ last_visit += per_page
def _visit_sort_key(visit):
ts = parse_iso8601_date_to_utc(visit["date"]).timestamp()
return ts + (float(visit["visit"]) / 10e3)
- origin_visits = sorted(origin_visits, key=lambda v: _visit_sort_key(v))
-
+ # cache entry is already sorted with oldest visits
+ origin_visits += sorted(new_visits, key=lambda v: _visit_sort_key(v))
cache.set(cache_entry_id, origin_visits)
-
return origin_visits
diff --git a/swh/web/tests/common/test_origin_visits.py b/swh/web/tests/common/test_origin_visits.py
index 5da3b07f..0f7e93d0 100644
--- a/swh/web/tests/common/test_origin_visits.py
+++ b/swh/web/tests/common/test_origin_visits.py
@@ -8,6 +8,8 @@ from datetime import timedelta
from hypothesis import given
import pytest
+from django.core.cache import cache
+
from swh.model.hashutil import hash_to_hex
from swh.model.model import OriginVisit, OriginVisitStatus
from swh.storage.utils import now
@@ -23,7 +25,7 @@ def test_get_origin_visits(mocker, snapshots):
mock_archive.MAX_LIMIT = 2
def _lookup_origin_visits(*args, **kwargs):
- if kwargs["last_visit"] is None:
+ if kwargs["last_visit"] == 0:
return [
{
"visit": 1,
@@ -51,6 +53,9 @@ def test_get_origin_visits(mocker, snapshots):
}
]
+ # ensure to reset django cache between hypothesis examples
+ cache.clear()
+
mock_archive.lookup_origin_visits.side_effect = _lookup_origin_visits
origin_info = {