diff --git a/swh/web/misc/origin_save.py b/swh/web/misc/origin_save.py index babaf9a8..d7df8497 100644 --- a/swh/web/misc/origin_save.py +++ b/swh/web/misc/origin_save.py @@ -1,123 +1,124 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.conf.urls import url from django.core.paginator import Paginator +from django.db.models import Q from django.http import JsonResponse from django.shortcuts import render from rest_framework.decorators import api_view, authentication_classes from swh.web.api.throttling import throttle_scope from swh.web.common.exc import ForbiddenExc from swh.web.common.models import SaveOriginRequest from swh.web.common.origin_save import ( create_save_origin_request, get_savable_visit_types, get_save_origin_requests_from_queryset, get_save_origin_task_info, ) from swh.web.common.utils import EnforceCSRFAuthentication def _origin_save_view(request): return render( request, "misc/origin-save.html", {"heading": ("Request the saving of a software origin into " "the archive")}, ) @api_view(["POST"]) @authentication_classes((EnforceCSRFAuthentication,)) @throttle_scope("swh_save_origin") def _origin_save_request(request, visit_type, origin_url): """ This view is called through AJAX from the save code now form of swh-web. We use DRF here as we want to rate limit the number of submitted requests per user to avoid being possibly flooded by bots. """ try: response = create_save_origin_request(visit_type, origin_url) return JsonResponse(response) except ForbiddenExc as exc: return JsonResponse({"detail": str(exc)}, status=403) except Exception as exc: return JsonResponse({"detail": str(exc)}, status=500) def _visit_save_types_list(request): visit_types = get_savable_visit_types() return JsonResponse(visit_types, safe=False) def _origin_save_requests_list(request, status): if status != "all": save_requests = SaveOriginRequest.objects.filter(status=status) else: save_requests = SaveOriginRequest.objects.all() table_data = {} table_data["recordsTotal"] = save_requests.count() table_data["draw"] = int(request.GET["draw"]) search_value = request.GET["search[value]"] column_order = request.GET["order[0][column]"] field_order = request.GET["columns[%s][name]" % column_order] order_dir = request.GET["order[0][dir]"] if order_dir == "desc": field_order = "-" + field_order save_requests = save_requests.order_by(field_order) length = int(request.GET["length"]) page = int(request.GET["start"]) / length + 1 - save_requests = get_save_origin_requests_from_queryset(save_requests) + if search_value: - save_requests = [ - sr - for sr in save_requests - if search_value.lower() in sr["save_request_status"].lower() - or search_value.lower() in sr["save_task_status"].lower() - or search_value.lower() in sr["visit_type"].lower() - or search_value.lower() in sr["origin_url"].lower() - ] - - table_data["recordsFiltered"] = len(save_requests) + save_requests = save_requests.filter( + Q(status__icontains=search_value) + | Q(loading_task_status__icontains=search_value) + | Q(visit_type__icontains=search_value) + | Q(origin_url__icontains=search_value) + ) + + table_data["recordsFiltered"] = save_requests.count() paginator = Paginator(save_requests, length) - table_data["data"] = paginator.page(page).object_list + table_data["data"] = get_save_origin_requests_from_queryset( + paginator.page(page).object_list + ) return JsonResponse(table_data) def _save_origin_task_info(request, save_request_id): request_info = get_save_origin_task_info( save_request_id, full_info=request.user.is_staff ) for date_field in ("scheduled", "started", "ended"): if date_field in request_info and request_info[date_field] is not None: request_info[date_field] = request_info[date_field].isoformat() return JsonResponse(request_info) urlpatterns = [ url(r"^save/$", _origin_save_view, name="origin-save"), url( r"^save/(?P.+)/url/(?P.+)/$", _origin_save_request, name="origin-save-request", ), url(r"^save/types/list/$", _visit_save_types_list, name="origin-save-types-list"), url( r"^save/requests/list/(?P.+)/$", _origin_save_requests_list, name="origin-save-requests-list", ), url( r"^save/task/info/(?P.+)/", _save_origin_task_info, name="origin-save-task-info", ), ] diff --git a/swh/web/tests/misc/test_origin_save.py b/swh/web/tests/misc/test_origin_save.py index 40605677..c29b4ce8 100644 --- a/swh/web/tests/misc/test_origin_save.py +++ b/swh/web/tests/misc/test_origin_save.py @@ -1,105 +1,182 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -from datetime import datetime +from datetime import datetime, timedelta, timezone +import json import pytest from django.test import Client +from swh.web.common.models import SaveOriginRequest from swh.web.common.origin_save import ( SAVE_REQUEST_ACCEPTED, SAVE_TASK_NOT_YET_SCHEDULED, + SAVE_TASK_SUCCEEDED, ) from swh.web.common.utils import reverse from swh.web.settings.tests import save_origin_rate_post from swh.web.tests.utils import ( check_api_post_response, check_http_get_response, check_http_post_response, ) visit_type = "git" origin = {"url": "https://github.com/python/cpython"} @pytest.fixture def client(): return Client(enforce_csrf_checks=True) def test_save_request_form_csrf_token(client, mocker): mock_create_save_origin_request = mocker.patch( "swh.web.misc.origin_save.create_save_origin_request" ) _mock_create_save_origin_request(mock_create_save_origin_request) url = reverse( "origin-save-request", url_args={"visit_type": visit_type, "origin_url": origin["url"]}, ) check_http_post_response(client, url, status_code=403) data = _get_csrf_token(client, reverse("origin-save")) check_api_post_response(client, url, data=data, status_code=200) def test_save_request_form_rate_limit(client, mocker): mock_create_save_origin_request = mocker.patch( "swh.web.misc.origin_save.create_save_origin_request" ) _mock_create_save_origin_request(mock_create_save_origin_request) url = reverse( "origin-save-request", url_args={"visit_type": visit_type, "origin_url": origin["url"]}, ) data = _get_csrf_token(client, reverse("origin-save")) for _ in range(save_origin_rate_post): check_api_post_response(client, url, data=data, status_code=200) check_api_post_response(client, url, data=data, status_code=429) def test_save_request_form_server_error(client, mocker): mock_create_save_origin_request = mocker.patch( "swh.web.misc.origin_save.create_save_origin_request" ) mock_create_save_origin_request.side_effect = Exception("Server error") url = reverse( "origin-save-request", url_args={"visit_type": visit_type, "origin_url": origin["url"]}, ) data = _get_csrf_token(client, reverse("origin-save")) check_api_post_response(client, url, data=data, status_code=500) def test_old_save_url_redirection(client): url = reverse("browse-origin-save") redirect_url = reverse("origin-save") resp = check_http_get_response(client, url, status_code=302) assert resp["location"] == redirect_url +@pytest.mark.django_db +def test_save_origin_requests_list(client, mocker): + visit_types = ("git", "svn", "hg") + nb_origins_per_type = 10 + for visit_type in visit_types: + for i in range(nb_origins_per_type): + SaveOriginRequest.objects.create( + request_date=datetime.now(tz=timezone.utc), + visit_type=visit_type, + origin_url=f"https://{visit_type}.example.org/project{i}", + status=SAVE_REQUEST_ACCEPTED, + visit_date=datetime.now(tz=timezone.utc) + timedelta(hours=1), + loading_task_id=i, + loading_task_status=SAVE_TASK_SUCCEEDED, + ) + + mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler") + mock_scheduler.get_tasks.return_value = [] + mock_scheduler.get_task_runs.return_value = [] + + # retrieve all save requests in 3 pages, sorted in descending order + # of request creation + for i, visit_type in enumerate(reversed(visit_types)): + url = reverse( + "origin-save-requests-list", + url_args={"status": "all"}, + query_params={ + "draw": i + 1, + "search[value]": "", + "order[0][column]": "0", + "columns[0][name]": "request_date", + "order[0][dir]": "desc", + "length": nb_origins_per_type, + "start": i * nb_origins_per_type, + }, + ) + + resp = check_http_get_response( + client, url, status_code=200, content_type="application/json" + ) + sors = json.loads(resp.content.decode("utf-8")) + assert sors["draw"] == i + 1 + assert sors["recordsFiltered"] == len(visit_types) * nb_origins_per_type + assert sors["recordsTotal"] == len(visit_types) * nb_origins_per_type + assert len(sors["data"]) == nb_origins_per_type + assert all(d["visit_type"] == visit_type for d in sors["data"]) + + # retrieve save requests filtered by visit type in a single page + for i, visit_type in enumerate(reversed(visit_types)): + url = reverse( + "origin-save-requests-list", + url_args={"status": "all"}, + query_params={ + "draw": i + 1, + "search[value]": visit_type, + "order[0][column]": "0", + "columns[0][name]": "request_date", + "order[0][dir]": "desc", + "length": nb_origins_per_type, + "start": 0, + }, + ) + + resp = check_http_get_response( + client, url, status_code=200, content_type="application/json" + ) + sors = json.loads(resp.content.decode("utf-8")) + assert sors["draw"] == i + 1 + assert sors["recordsFiltered"] == nb_origins_per_type + assert sors["recordsTotal"] == len(visit_types) * nb_origins_per_type + assert len(sors["data"]) == nb_origins_per_type + assert all(d["visit_type"] == visit_type for d in sors["data"]) + + def _get_csrf_token(client, url): resp = client.get(url) return {"csrfmiddlewaretoken": resp.cookies["csrftoken"].value} def _mock_create_save_origin_request(mock): expected_data = { "visit_type": visit_type, "origin_url": origin["url"], "save_request_date": datetime.now().isoformat(), "save_request_status": SAVE_REQUEST_ACCEPTED, "save_task_status": SAVE_TASK_NOT_YET_SCHEDULED, "visit_date": None, } mock.return_value = expected_data