diff --git a/swh/web/misc/coverage.py b/swh/web/misc/coverage.py --- a/swh/web/misc/coverage.py +++ b/swh/web/misc/coverage.py @@ -4,13 +4,15 @@ # See top-level LICENSE file for more information from collections import Counter, defaultdict -from functools import lru_cache -from typing import Dict, List, Tuple +from typing import Any, Dict, List, Tuple from urllib.parse import urlparse import sentry_sdk from django.conf.urls import url +from django.core.cache import cache +from django.http.request import HttpRequest +from django.http.response import HttpResponse from django.shortcuts import render from django.views.decorators.cache import never_cache from django.views.decorators.clickjacking import xframe_options_exempt @@ -18,7 +20,7 @@ from swh.scheduler.model import SchedulerMetrics from swh.web.common import archive from swh.web.common.origin_save import get_savable_visit_types -from swh.web.common.utils import get_deposits_list, reverse +from swh.web.common.utils import get_deposits_list, is_swh_web_production, reverse from swh.web.config import scheduler _swh_arch_overview_doc = ( @@ -33,7 +35,7 @@ # # TODO: Store that list in a database table somewhere (swh-scheduler, swh-storage ?) # and retrieve it dynamically -listed_origins = { +listed_origins: Dict[str, Any] = { "info": ( "These software origins get continuously discovered and archived using " f'the Dict[str, List[Tuple[str, SchedulerMetrics]]]: + +def _get_listers_metrics( + cache_metrics: bool = False, +) -> Dict[str, List[Tuple[str, SchedulerMetrics]]]: """Returns scheduler metrics in the following mapping: Dict[lister_name, List[Tuple[instance_name, SchedulerMetrics]]] as a lister instance has one SchedulerMetrics object per visit type. """ - listers_metrics = defaultdict(list) - try: - listers = scheduler().get_listers() - scheduler_metrics = scheduler().get_metrics() - for lister in listers: - for metrics in filter( - lambda m: m.lister_id == lister.id, scheduler_metrics - ): - listers_metrics[lister.name].append((lister.instance_name, metrics)) - except Exception as e: - sentry_sdk.capture_exception(e) + cache_key = "lister_metrics" + listers_metrics = cache.get(cache_key, {}) + if not listers_metrics: + listers_metrics = defaultdict(list) + try: + listers = scheduler().get_listers() + scheduler_metrics = scheduler().get_metrics() + for lister in listers: + for metrics in filter( + lambda m: m.lister_id == lister.id, scheduler_metrics + ): + listers_metrics[lister.name].append((lister.instance_name, metrics)) + if cache_metrics: + cache.set(cache_key, listers_metrics, timeout=_cache_timeout) + except Exception as e: + sentry_sdk.capture_exception(e) + return listers_metrics -@lru_cache() -def _get_deposits_netloc_counts() -> Counter: +def _get_deposits_netloc_counts(cache_counts: bool = False) -> Counter: """Return deposit counts per origin url network location. """ @@ -261,30 +271,42 @@ netloc += "/" + parsed_url.path.split("/")[1] return netloc - netlocs = [] - try: - deposits = get_deposits_list() - netlocs = [ - _process_origin_url(d["origin_url"]) - for d in deposits - if d["status"] == "done" - ] - except Exception as e: - sentry_sdk.capture_exception(e) - return Counter(netlocs) - - -@lru_cache() -def _get_nixguix_origins_count(origin_url: str) -> int: + cache_key = "deposits_netloc_counts" + deposits_netloc_counts = cache.get(cache_key, Counter()) + if not deposits_netloc_counts: + netlocs = [] + try: + deposits = get_deposits_list() + netlocs = [ + _process_origin_url(d["origin_url"]) + for d in deposits + if d["status"] == "done" + ] + deposits_netloc_counts = Counter(netlocs) + if cache_counts: + cache.set(cache_key, deposits_netloc_counts, timeout=_cache_timeout) + except Exception as e: + sentry_sdk.capture_exception(e) + + return deposits_netloc_counts + + +def _get_nixguix_origins_count(origin_url: str, cache_count: bool = False) -> int: """Returns number of archived tarballs for NixOS, aka the number of branches in a dedicated origin in the archive. """ - snapshot = archive.lookup_latest_origin_snapshot(origin_url) - if snapshot: - snapshot_sizes = archive.lookup_snapshot_sizes(snapshot["id"]) - return snapshot_sizes["release"] - else: - return 0 + cache_key = f"nixguix_origins_count_{origin_url}" + nixguix_origins_count = cache.get(cache_key, 0) + if not nixguix_origins_count: + snapshot = archive.lookup_latest_origin_snapshot(origin_url) + if snapshot: + snapshot_sizes = archive.lookup_snapshot_sizes(snapshot["id"]) + nixguix_origins_count = snapshot_sizes["release"] + else: + nixguix_origins_count = 0 + if cache_count: + cache.set(cache_key, nixguix_origins_count, timeout=_cache_timeout) + return nixguix_origins_count def _search_url(query: str, visit_type: str) -> str: @@ -301,8 +323,9 @@ @xframe_options_exempt @never_cache -def _swh_coverage(request): - listers_metrics = _get_listers_metrics() +def _swh_coverage(request: HttpRequest) -> HttpResponse: + use_cache = is_swh_web_production(request) + listers_metrics = _get_listers_metrics(use_cache) for origins in listed_origins["origins"]: origins["instances"] = {} origins_type = origins["type"] @@ -310,7 +333,8 @@ # special processing for nixos/guix origins as there is no # scheduler metrics for those if origins_type in ("nixos", "guix"): - count = _get_nixguix_origins_count(origins["search_pattern"]) + count = _get_nixguix_origins_count(origins["search_pattern"], use_cache) + origins["count"] = f"{count:,}" if count else "" origins["instances"][origins_type] = {"nixguix": {"count": count}} @@ -371,7 +395,8 @@ origins["search_pattern"], visit_type ) - deposits_counts = _get_deposits_netloc_counts() + deposits_counts = _get_deposits_netloc_counts(use_cache) + for origins in deposited_origins["origins"]: if origins["search_pattern"] in deposits_counts: origins["count"] = f"{deposits_counts[origins['search_pattern']]:,}" diff --git a/swh/web/tests/misc/test_coverage.py b/swh/web/tests/misc/test_coverage.py --- a/swh/web/tests/misc/test_coverage.py +++ b/swh/web/tests/misc/test_coverage.py @@ -9,28 +9,15 @@ from random import randint import uuid -import pytest - from django.conf import settings from django.utils.html import escape from swh.scheduler.model import LastVisitStatus, ListedOrigin, OriginVisitStats from swh.web.common.utils import reverse -from swh.web.misc.coverage import ( - _get_deposits_netloc_counts, - _get_listers_metrics, - deposited_origins, - legacy_origins, - listed_origins, -) +from swh.web.config import SWH_WEB_SERVER_NAME +from swh.web.misc.coverage import deposited_origins, legacy_origins, listed_origins from swh.web.tests.django_asserts import assert_contains -from swh.web.tests.utils import check_html_get_response - - -@pytest.fixture(autouse=True) -def clear_lru_caches(): - _get_listers_metrics.cache_clear() - _get_deposits_netloc_counts.cache_clear() +from swh.web.tests.utils import check_html_get_response, check_http_get_response def test_coverage_view_no_metrics(client, swh_scheduler): @@ -50,9 +37,12 @@ that will be consumed by the archive coverage view, then check the HTML page gets rendered without errors. """ - mocker.patch( - "swh.web.misc.coverage._get_nixguix_origins_count" - ).return_value = 30095 + + # mock calls to get nixguix origin counts + mock_archive = mocker.patch("swh.web.misc.coverage.archive") + mock_archive.lookup_latest_origin_snapshot.return_value = {"id": "some-snapshot"} + mock_archive.lookup_snapshot_sizes.return_value = {"release": 30095} + listers = [] visit_types = ["git", "hg", "svn", "bzr", "svn"] for origins in listed_origins["origins"]: @@ -134,3 +124,8 @@ for visit_type in visit_types: assert_contains(resp, f"{visit_type}") + + # check request as in production with cache enabled + check_http_get_response( + client, url, status_code=200, server_name=SWH_WEB_SERVER_NAME + )