diff --git a/swh/web/api/throttling.py b/swh/web/api/throttling.py index ed449924..66afa8b3 100644 --- a/swh/web/api/throttling.py +++ b/swh/web/api/throttling.py @@ -1,202 +1,215 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from ipaddress import IPv4Network, IPv6Network, ip_address, ip_network from typing import Callable, List, TypeVar, Union import sentry_sdk from django.core.exceptions import ImproperlyConfigured import rest_framework from rest_framework.throttling import ScopedRateThrottle +from swh.web.auth.utils import API_SAVE_ORIGIN_PERMISSION from swh.web.config import get_config APIView = TypeVar("APIView", bound="rest_framework.views.APIView") Request = rest_framework.request.Request API_THROTTLING_EXEMPTED_PERM = "swh.web.api.throttling_exempted" class SwhWebRateThrottle(ScopedRateThrottle): """Custom DRF request rate limiter for anonymous users Requests are grouped into scopes. It enables to apply different requests rate limiting based on the scope name but also the input HTTP request types. To associate a scope to requests, one must add a 'throttle_scope' attribute when using a class based view, or call the 'throttle_scope' decorator when using a function based view. By default, requests do not have an associated scope and are not rate limited. Rate limiting can also be configured according to the type of the input HTTP requests for fine grained tuning. For instance, the following YAML configuration section sets a rate of: - 1 per minute for POST requests - 60 per minute for other request types for the 'swh_api' scope while exempting those coming from the 127.0.0.0/8 ip network. .. code-block:: yaml throttling: scopes: swh_api: limiter_rate: default: 60/m POST: 1/m exempted_networks: - 127.0.0.0/8 """ scope = None def __init__(self): super().__init__() self.exempted_networks = None self.num_requests = 0 self.duration = 0 def get_cache_key(self, request, view): # do not handle throttling if user is authenticated if request.user.is_authenticated: return None else: return super().get_cache_key(request, view) def get_exempted_networks( self, scope_name: str ) -> List[Union[IPv4Network, IPv6Network]]: if not self.exempted_networks: scopes = get_config()["throttling"]["scopes"] scope = scopes.get(scope_name) if scope: networks = scope.get("exempted_networks") if networks: self.exempted_networks = [ ip_network(network) for network in networks ] return self.exempted_networks + def get_scope(self, view: APIView): + if not self.scope: + # class based view case + return getattr(view, self.scope_attr, None) + else: + # function based view case + return self.scope + def allow_request(self, request: Request, view: APIView) -> bool: # class based view case if not self.scope: default_scope = getattr(view, self.scope_attr, None) request_allowed = None if default_scope is not None: # check if there is a specific rate limiting associated # to the request type assert request.method is not None request_scope = f"{default_scope}_{request.method.lower()}" setattr(view, self.scope_attr, request_scope) try: request_allowed = super().allow_request(request, view) # use default rate limiting otherwise except ImproperlyConfigured as exc: sentry_sdk.capture_exception(exc) setattr(view, self.scope_attr, default_scope) if request_allowed is None: request_allowed = super().allow_request(request, view) # function based view case else: default_scope = self.scope # check if there is a specific rate limiting associated # to the request type self.scope = default_scope + "_" + request.method.lower() try: self.rate = self.get_rate() # use default rate limiting otherwise except ImproperlyConfigured: self.scope = default_scope self.rate = self.get_rate() self.num_requests, self.duration = self.parse_rate(self.rate) request_allowed = super(ScopedRateThrottle, self).allow_request( request, view ) self.scope = default_scope exempted_networks = self.get_exempted_networks(default_scope) exempted_ip = False if exempted_networks: remote_address = ip_address(self.get_ident(request)) exempted_ip = any( remote_address in network for network in exempted_networks ) request_allowed = exempted_ip or request_allowed # set throttling related data in the request metadata # in order for the ThrottlingHeadersMiddleware to # add X-RateLimit-* headers in the HTTP response if not exempted_ip and hasattr(self, "history"): hit_count = len(self.history) request.META["RateLimit-Limit"] = self.num_requests request.META["RateLimit-Remaining"] = self.num_requests - hit_count wait = self.wait() if wait is not None: request.META["RateLimit-Reset"] = int(self.now + wait) return request_allowed class SwhWebUserRateThrottle(SwhWebRateThrottle): """Custom DRF request rate limiter for authenticated users It has the same behavior than :class:`swh.web.api.throttling.SwhWebRateThrottle` except the number of allowed requests for each throttle scope is increased by a 1Ox factor. """ NUM_REQUESTS_FACTOR = 10 def get_cache_key(self, request, view): # do not handle throttling if user is not authenticated if request.user.is_authenticated: return super(SwhWebRateThrottle, self).get_cache_key(request, view) else: return None def parse_rate(self, rate): # increase number of allowed requests num_requests, duration = super().parse_rate(rate) return (num_requests * self.NUM_REQUESTS_FACTOR, duration) def allow_request(self, request: Request, view: APIView) -> bool: - # no throttling for staff users or users with adequate permission if request.user.is_staff or request.user.has_perm(API_THROTTLING_EXEMPTED_PERM): + # no throttling for staff users or users with adequate permission + return True + scope = self.get_scope(view) + if scope == "save_origin" and request.user.has_perm(API_SAVE_ORIGIN_PERMISSION): + # no throttling on save origin endpoint for users with adequate permission return True return super().allow_request(request, view) def throttle_scope(scope: str) -> Callable[..., APIView]: """Decorator that allows the throttle scope of a DRF function based view to be set:: @api_view(['GET', ]) @throttle_scope('scope') def view(request): ... """ def decorator(func: APIView) -> APIView: SwhScopeRateThrottle = type( "SwhWebScopeRateThrottle", (SwhWebRateThrottle,), {"scope": scope} ) SwhScopeUserRateThrottle = type( "SwhWebScopeUserRateThrottle", (SwhWebUserRateThrottle,), {"scope": scope}, ) func.throttle_classes = (SwhScopeRateThrottle, SwhScopeUserRateThrottle) return func return decorator diff --git a/swh/web/api/views/origin_save.py b/swh/web/api/views/origin_save.py index d9370bdc..19a18bac 100644 --- a/swh/web/api/views/origin_save.py +++ b/swh/web/api/views/origin_save.py @@ -1,113 +1,120 @@ # Copyright (C) 2018-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route -from swh.web.auth.utils import privileged_user +from swh.web.auth.utils import ( + API_SAVE_ORIGIN_PERMISSION, + SWH_AMBASSADOR_PERMISSION, + privileged_user, +) from swh.web.common.origin_save import ( create_save_origin_request, get_savable_visit_types, get_save_origin_requests, ) def _savable_visit_types(): visit_types = sorted(get_savable_visit_types()) docstring = "" for visit_type in visit_types[:-1]: docstring += f"**{visit_type}**, " docstring += f"and **{visit_types[-1]}**" return docstring @api_route( r"/origin/save/(?P.+)/url/(?P.+)/", "api-1-save-origin", methods=["GET", "POST"], throttle_scope="swh_save_origin", never_cache=True, ) @api_doc("/origin/save/") @format_docstring(visit_types=_savable_visit_types()) def api_save_origin(request, visit_type, origin_url): """ .. http:get:: /api/1/origin/save/(visit_type)/url/(origin_url)/ .. http:post:: /api/1/origin/save/(visit_type)/url/(origin_url)/ Request the saving of a software origin into the archive or check the status of previously created save requests. That endpoint enables to create a saving task for a software origin through a POST request. Depending of the provided origin url, the save request can either be: * immediately **accepted**, for well known code hosting providers like for instance GitHub or GitLab * **rejected**, in case the url is blacklisted by Software Heritage * **put in pending state** until a manual check is done in order to determine if it can be loaded or not Once a saving request has been accepted, its associated saving task status can then be checked through a GET request on the same url. Returned status can either be: * **not created**: no saving task has been created * **not yet scheduled**: saving task has been created but its execution has not yet been scheduled * **scheduled**: the task execution has been scheduled * **succeeded**: the saving task has been successfully executed * **failed**: the saving task has been executed but it failed When issuing a POST request an object will be returned while a GET request will return an array of objects (as multiple save requests might have been submitted for the same origin). :param string visit_type: the type of visit to perform (currently the supported types are {visit_types}) :param string origin_url: the url of the origin to save {common_headers} :>json string origin_url: the url of the origin to save :>json string visit_type: the type of visit to perform :>json string save_request_date: the date (in iso format) the save request was issued :>json string save_request_status: the status of the save request, either **accepted**, **rejected** or **pending** :>json string save_task_status: the status of the origin saving task, either **not created**, **not yet scheduled**, **scheduled**, **succeeded** or **failed** :>json string visit_date: the date (in iso format) of the visit if a visit occurred, null otherwise. :>json string visit_status: the status of the visit, either **full**, **partial**, **not_found** or **failed** if a visit occurred, null otherwise. :>json string note: optional note giving details about the save request, for instance why it has been rejected :statuscode 200: no error :statuscode 400: an invalid visit type or origin url has been provided :statuscode 403: the provided origin url is blacklisted :statuscode 404: no save requests have been found for a given origin """ data = request.data or {} if request.method == "POST": sor = create_save_origin_request( visit_type, origin_url, - privileged_user(request), + privileged_user( + request, + permissions=[SWH_AMBASSADOR_PERMISSION, API_SAVE_ORIGIN_PERMISSION], + ), user_id=request.user.id, **data, ) del sor["id"] else: sor = get_save_origin_requests(visit_type, origin_url) for s in sor: del s["id"] return sor diff --git a/swh/web/auth/utils.py b/swh/web/auth/utils.py index 9539772d..0f13e78e 100644 --- a/swh/web/auth/utils.py +++ b/swh/web/auth/utils.py @@ -1,84 +1,95 @@ # Copyright (C) 2020-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from base64 import urlsafe_b64encode +from typing import List from cryptography.fernet import Fernet from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import hashes from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC +from django.http.request import HttpRequest + OIDC_SWH_WEB_CLIENT_ID = "swh-web" SWH_AMBASSADOR_PERMISSION = "swh.ambassador" +API_SAVE_ORIGIN_PERMISSION = "swh.web.api.save_origin" def _get_fernet(password: bytes, salt: bytes) -> Fernet: """ Instantiate a Fernet system from a password and a salt value (see https://cryptography.io/en/latest/fernet/). Args: password: user password that will be used to generate a Fernet key derivation function salt: value that will be used to generate a Fernet key derivation function Returns: The Fernet system """ kdf = PBKDF2HMAC( algorithm=hashes.SHA256(), length=32, salt=salt, iterations=100000, backend=default_backend(), ) key = urlsafe_b64encode(kdf.derive(password)) return Fernet(key) def encrypt_data(data: bytes, password: bytes, salt: bytes) -> bytes: """ Encrypt data using Fernet system (symmetric encryption). Args: data: input data to encrypt password: user password that will be used to generate a Fernet key derivation function salt: value that will be used to generate a Fernet key derivation function Returns: The encrypted data """ return _get_fernet(password, salt).encrypt(data) def decrypt_data(data: bytes, password: bytes, salt: bytes) -> bytes: """ Decrypt data using Fernet system (symmetric encryption). Args: data: input data to decrypt password: user password that will be used to generate a Fernet key derivation function salt: value that will be used to generate a Fernet key derivation function Returns: The decrypted data """ return _get_fernet(password, salt).decrypt(data) -def privileged_user(request) -> bool: +def privileged_user(request: HttpRequest, permissions: List[str] = []) -> bool: """Determine whether a user is authenticated and is a privileged one (e.g ambassador). This allows such user to have access to some more actions (e.g. bypass save code now - review, access to 'archives' type...) + review, access to 'archives' type...). + A user is considered as privileged if he is a staff member or has any permission + from those provided as parameters. + Args: + request: Input django HTTP request + permissions: list of permission names to determine if user is privileged or not + Returns: + Whether the user is privileged or not. """ user = request.user return user.is_authenticated and ( - user.is_staff or user.has_perm(SWH_AMBASSADOR_PERMISSION) + user.is_staff or any([user.has_perm(perm) for perm in permissions]) ) diff --git a/swh/web/misc/origin_save.py b/swh/web/misc/origin_save.py index e15261b8..db9f05f2 100644 --- a/swh/web/misc/origin_save.py +++ b/swh/web/misc/origin_save.py @@ -1,97 +1,99 @@ # Copyright (C) 2018-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.conf.urls import url from django.core.paginator import Paginator from django.db.models import Q from django.http import JsonResponse from django.shortcuts import render -from swh.web.auth.utils import privileged_user +from swh.web.auth.utils import SWH_AMBASSADOR_PERMISSION, privileged_user from swh.web.common.models import SaveOriginRequest from swh.web.common.origin_save import ( get_savable_visit_types, get_save_origin_task_info, ) def _origin_save_view(request): return render( request, "misc/origin-save.html", { "heading": ("Request the saving of a software origin into the archive"), - "visit_types": get_savable_visit_types(privileged_user(request)), + "visit_types": get_savable_visit_types( + privileged_user(request, permissions=[SWH_AMBASSADOR_PERMISSION]) + ), }, ) def _origin_save_requests_list(request, status): if status != "all": save_requests = SaveOriginRequest.objects.filter(status=status) else: save_requests = SaveOriginRequest.objects.all() table_data = {} table_data["recordsTotal"] = save_requests.count() table_data["draw"] = int(request.GET["draw"]) search_value = request.GET["search[value]"] column_order = request.GET["order[0][column]"] field_order = request.GET["columns[%s][name]" % column_order] order_dir = request.GET["order[0][dir]"] if order_dir == "desc": field_order = "-" + field_order save_requests = save_requests.order_by(field_order) length = int(request.GET["length"]) page = int(request.GET["start"]) / length + 1 if search_value: save_requests = save_requests.filter( Q(status__icontains=search_value) | Q(loading_task_status__icontains=search_value) | Q(visit_type__icontains=search_value) | Q(origin_url__icontains=search_value) ) if ( int(request.GET.get("user_requests_only", "0")) and request.user.is_authenticated ): save_requests = save_requests.filter(user_ids__contains=f'"{request.user.id}"') table_data["recordsFiltered"] = save_requests.count() paginator = Paginator(save_requests, length) table_data["data"] = [sor.to_dict() for sor in paginator.page(page).object_list] return JsonResponse(table_data) def _save_origin_task_info(request, save_request_id): request_info = get_save_origin_task_info( save_request_id, full_info=request.user.is_staff ) for date_field in ("scheduled", "started", "ended"): if date_field in request_info and request_info[date_field] is not None: request_info[date_field] = request_info[date_field].isoformat() return JsonResponse(request_info) urlpatterns = [ url(r"^save/$", _origin_save_view, name="origin-save"), url( r"^save/requests/list/(?P.+)/$", _origin_save_requests_list, name="origin-save-requests-list", ), url( r"^save/task/info/(?P.+)/", _save_origin_task_info, name="origin-save-task-info", ), ] diff --git a/swh/web/tests/api/views/test_origin_save.py b/swh/web/tests/api/views/test_origin_save.py index 6dcc8f7d..9cdfe6e7 100644 --- a/swh/web/tests/api/views/test_origin_save.py +++ b/swh/web/tests/api/views/test_origin_save.py @@ -1,541 +1,578 @@ # Copyright (C) 2018-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime, timedelta import uuid import pytest from django.core.exceptions import ObjectDoesNotExist from django.utils import timezone -from swh.web.auth.utils import SWH_AMBASSADOR_PERMISSION +from swh.web.auth.utils import API_SAVE_ORIGIN_PERMISSION, SWH_AMBASSADOR_PERMISSION from swh.web.common.models import ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_PENDING, SAVE_REQUEST_REJECTED, SAVE_TASK_FAILED, SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, VISIT_STATUS_FAILED, VISIT_STATUS_FULL, SaveAuthorizedOrigin, SaveOriginRequest, SaveUnauthorizedOrigin, ) from swh.web.common.typing import OriginExistenceCheckInfo from swh.web.common.utils import reverse from swh.web.settings.tests import save_origin_rate_post from swh.web.tests.utils import ( check_api_get_responses, check_api_post_response, check_api_post_responses, + create_django_permission, ) pytestmark = pytest.mark.django_db @pytest.fixture(autouse=True) def populated_db(): SaveAuthorizedOrigin.objects.create(url="https://github.com/"), SaveAuthorizedOrigin.objects.create(url="https://gitlab.com/"), SaveUnauthorizedOrigin.objects.create(url="https://github.com/user/illegal_repo") SaveUnauthorizedOrigin.objects.create(url="https://gitlab.com/user_to_exclude") def test_invalid_visit_type(api_client, swh_scheduler): url = reverse( "api-1-save-origin", url_args={ "visit_type": "foo", "origin_url": "https://github.com/torvalds/linux", }, ) check_api_get_responses(api_client, url, status_code=400) def test_invalid_origin_url(api_client, swh_scheduler): url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": "bar"} ) check_api_get_responses(api_client, url, status_code=400) def check_created_save_request_status( api_client, mocker, origin_url, expected_request_status, expected_task_status=None, visit_date=None, ): mock_origin_exists = mocker.patch("swh.web.common.origin_save.origin_exists") mock_origin_exists.return_value = OriginExistenceCheckInfo( origin_url=origin_url, exists=True, last_modified=None, content_length=None ) url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url} ) mock_visit_date = mocker.patch( ("swh.web.common.origin_save._get_visit_info_for_save_request") ) mock_visit_date.return_value = (visit_date, None) if expected_request_status != SAVE_REQUEST_REJECTED: response = check_api_post_responses(api_client, url, data=None, status_code=200) assert response.data["save_request_status"] == expected_request_status assert response.data["save_task_status"] == expected_task_status else: check_api_post_responses(api_client, url, data=None, status_code=403) def check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status, expected_task_status, scheduler_task_status="next_run_not_scheduled", scheduler_task_run_status=None, visit_date=None, visit_status=None, ): if expected_task_status != SAVE_TASK_NOT_CREATED: task = dict(swh_scheduler.search_tasks()[0].items()) backend_id = str(uuid.uuid4()) if scheduler_task_status != "next_run_not_scheduled": swh_scheduler.schedule_task_run(task["id"], backend_id) if scheduler_task_run_status is not None: swh_scheduler.start_task_run(backend_id) task_run = dict( swh_scheduler.end_task_run(backend_id, scheduler_task_run_status).items() ) url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url} ) mock_visit_date = mocker.patch( ("swh.web.common.origin_save._get_visit_info_for_save_request") ) mock_visit_date.return_value = (visit_date, visit_status) response = check_api_get_responses(api_client, url, status_code=200) save_request_data = response.data[0] assert save_request_data["save_request_status"] == expected_request_status assert save_request_data["save_task_status"] == expected_task_status assert save_request_data["visit_status"] == visit_status if scheduler_task_run_status is not None: # Check that save task status is still available when # the scheduler task has been archived swh_scheduler.delete_archived_tasks( [{"task_id": task["id"], "task_run_id": task_run["id"]}] ) response = check_api_get_responses(api_client, url, status_code=200) save_request_data = response.data[0] assert save_request_data["save_task_status"] == expected_task_status assert save_request_data["visit_status"] == visit_status def test_save_request_rejected(api_client, mocker, swh_scheduler): origin_url = "https://github.com/user/illegal_repo" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_REJECTED, ) check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status=SAVE_REQUEST_REJECTED, expected_task_status=SAVE_TASK_NOT_CREATED, ) def test_save_request_pending(api_client, mocker, swh_scheduler): origin_url = "https://unkwownforge.com/user/repo" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_PENDING, expected_task_status=SAVE_TASK_NOT_CREATED, ) check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status=SAVE_REQUEST_PENDING, expected_task_status=SAVE_TASK_NOT_CREATED, ) def test_save_request_scheduled(api_client, mocker, swh_scheduler): origin_url = "https://github.com/Kitware/CMake" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_SCHEDULED, scheduler_task_status="next_run_scheduled", scheduler_task_run_status="scheduled", ) def test_save_request_completed(api_client, mocker, swh_scheduler): origin_url = "https://github.com/Kitware/CMake" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_SUCCEEDED, scheduler_task_status="completed", scheduler_task_run_status="eventful", visit_date=None, ) def test_save_request_completed_visit_status(api_client, mocker, swh_scheduler): origin_url = "https://github.com/Kitware/CMake" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) visit_date = datetime.now(tz=timezone.utc) + timedelta(hours=1) check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_SUCCEEDED, scheduler_task_status="completed", scheduler_task_run_status="eventful", visit_date=visit_date, visit_status=VISIT_STATUS_FULL, ) def test_save_request_failed(api_client, mocker, swh_scheduler): origin_url = "https://gitlab.com/inkscape/inkscape" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_FAILED, scheduler_task_status="disabled", scheduler_task_run_status="failed", visit_status=VISIT_STATUS_FAILED, ) def test_create_save_request_no_duplicate(api_client, mocker, swh_scheduler): origin_url = "https://github.com/webpack/webpack" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) sors = list( SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url) ) assert len(sors) == 1 check_save_request_status( api_client, mocker, swh_scheduler, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_SCHEDULED, scheduler_task_status="next_run_scheduled", scheduler_task_run_status="scheduled", ) check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_SCHEDULED, ) sors = list( SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url) ) assert len(sors) == 1 def test_get_save_requests_unknown_origin(api_client, swh_scheduler): unknown_origin_url = "https://gitlab.com/foo/bar" url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": unknown_origin_url}, ) response = check_api_get_responses(api_client, url, status_code=404) assert response.data == { "exception": "NotFoundExc", "reason": ( "No save requests found for visit of type git on origin with url %s." ) % unknown_origin_url, } _visit_type = "git" _origin_url = "https://github.com/python/cpython" -def test_save_requests_rate_limit(api_client, mocker): - create_save_origin_request = mocker.patch( - "swh.web.api.views.origin_save.create_save_origin_request" +def test_save_requests_rate_limit(api_client, swh_scheduler): + + url = reverse( + "api-1-save-origin", + url_args={"visit_type": _visit_type, "origin_url": _origin_url}, + ) + + for _ in range(save_origin_rate_post): + check_api_post_response(api_client, url, status_code=200) + + check_api_post_response(api_client, url, status_code=429) + + +def test_save_requests_no_rate_limit_if_permission( + api_client, regular_user, swh_scheduler +): + + regular_user.user_permissions.add( + create_django_permission(API_SAVE_ORIGIN_PERMISSION) ) - def _save_request_dict(*args, **kwargs): - return { - "id": 1, - "visit_type": _visit_type, - "origin_url": _origin_url, - "save_request_date": datetime.now().isoformat(), - "save_request_status": SAVE_REQUEST_ACCEPTED, - "save_task_status": SAVE_TASK_NOT_YET_SCHEDULED, - "visit_date": None, - "visit_status": None, - } + assert regular_user.has_perm(API_SAVE_ORIGIN_PERMISSION) - create_save_origin_request.side_effect = _save_request_dict + api_client.force_login(regular_user) url = reverse( "api-1-save-origin", url_args={"visit_type": _visit_type, "origin_url": _origin_url}, ) for _ in range(save_origin_rate_post): check_api_post_response(api_client, url, status_code=200) - check_api_post_response(api_client, url, status_code=429) + check_api_post_response(api_client, url, status_code=200) + + +def test_save_request_unknown_repo_with_permission( + api_client, regular_user, mocker, swh_scheduler +): + + regular_user.user_permissions.add( + create_django_permission(API_SAVE_ORIGIN_PERMISSION) + ) + + assert regular_user.has_perm(API_SAVE_ORIGIN_PERMISSION) + + api_client.force_login(regular_user) + + origin_url = "https://unkwownforge.org/user/repo" + check_created_save_request_status( + api_client, + mocker, + origin_url, + expected_request_status=SAVE_REQUEST_ACCEPTED, + expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, + ) + check_save_request_status( + api_client, + mocker, + swh_scheduler, + origin_url, + expected_request_status=SAVE_REQUEST_ACCEPTED, + expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, + ) def test_save_request_form_server_error(api_client, mocker): create_save_origin_request = mocker.patch( "swh.web.api.views.origin_save.create_save_origin_request" ) create_save_origin_request.side_effect = Exception("Server error") url = reverse( "api-1-save-origin", url_args={"visit_type": _visit_type, "origin_url": _origin_url}, ) check_api_post_responses(api_client, url, status_code=500) @pytest.fixture def origin_to_review(): return "https://git.example.org/user/project" def test_create_save_request_pending_review_anonymous_user( api_client, origin_to_review, swh_scheduler ): url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_to_review}, ) response = check_api_post_responses(api_client, url, status_code=200) assert response.data["save_request_status"] == SAVE_REQUEST_PENDING with pytest.raises(ObjectDoesNotExist): SaveAuthorizedOrigin.objects.get(url=origin_to_review) def test_create_save_request_archives_with_ambassador_user( api_client, keycloak_oidc, requests_mock, swh_scheduler, ): swh_scheduler.add_load_archive_task_type() keycloak_oidc.realm_permissions = [SWH_AMBASSADOR_PERMISSION] oidc_profile = keycloak_oidc.login() api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}") originUrl = "https://somewhere.org/simple" artifact_version = "1.2.3" artifact_filename = f"tarball-{artifact_version}.tar.gz" artifact_url = f"{originUrl}/{artifact_filename}" content_length = "100" last_modified = "Sun, 21 Aug 2011 16:26:32 GMT" requests_mock.head( artifact_url, status_code=200, headers={"content-length": content_length, "last-modified": last_modified,}, ) url = reverse( "api-1-save-origin", url_args={"visit_type": "archives", "origin_url": originUrl,}, ) response = check_api_post_response( api_client, url, status_code=200, data={ "archives_data": [ {"artifact_url": artifact_url, "artifact_version": artifact_version,} ] }, ) assert response.data["save_request_status"] == SAVE_REQUEST_ACCEPTED assert SaveAuthorizedOrigin.objects.get(url=originUrl) def test_create_save_request_archives_missing_artifacts_data( api_client, keycloak_oidc, swh_scheduler ): swh_scheduler.add_load_archive_task_type() keycloak_oidc.realm_permissions = [SWH_AMBASSADOR_PERMISSION] oidc_profile = keycloak_oidc.login() api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}") originUrl = "https://somewhere.org/simple" url = reverse( "api-1-save-origin", url_args={"visit_type": "archives", "origin_url": originUrl,}, ) response = check_api_post_response(api_client, url, status_code=400, data={},) assert "Artifacts data are missing" in response.data["reason"] response = check_api_post_response( api_client, url, status_code=400, data={"archives_data": [{"artifact_url": "", "arttifact_version": "1.0"}]}, ) assert "Missing url or version for an artifact to load" in response.data["reason"] def test_create_save_request_archives_accepted_ambassador_user( api_client, origin_to_review, keycloak_oidc, mocker, swh_scheduler ): keycloak_oidc.realm_permissions = [SWH_AMBASSADOR_PERMISSION] oidc_profile = keycloak_oidc.login() api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}") check_created_save_request_status( api_client, mocker, origin_to_review, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) assert SaveAuthorizedOrigin.objects.get(url=origin_to_review) def test_create_save_request_anonymous_user_no_user_id(api_client, swh_scheduler): origin_url = "https://some.git.hosters/user/repo" url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url}, ) check_api_post_responses(api_client, url, status_code=200) sor = SaveOriginRequest.objects.get(origin_url=origin_url) assert sor.user_ids is None def test_create_save_request_authenticated_user_id( api_client, keycloak_oidc, swh_scheduler ): oidc_profile = keycloak_oidc.login() api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}") origin_url = "https://some.git.hosters/user/repo2" url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url}, ) response = check_api_post_response(api_client, url, status_code=200) assert response.wsgi_request.user.id is not None user_id = str(response.wsgi_request.user.id) sor = SaveOriginRequest.objects.get(user_ids=f'"{user_id}"') assert sor.user_ids == f'"{user_id}"' def test_create_pending_save_request_multiple_authenticated_users( api_client, swh_scheduler, regular_user, regular_user2 ): origin_url = "https://some.git.hosters/user/repo3" url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url}, ) api_client.force_login(regular_user) check_api_post_response(api_client, url, status_code=200) api_client.force_login(regular_user2) check_api_post_response(api_client, url, status_code=200) assert SaveOriginRequest.objects.get(user_ids__contains=f'"{regular_user.id}"') assert SaveOriginRequest.objects.get(user_ids__contains=f'"{regular_user2.id}"')