diff --git a/swh/web/api/views/graph.py b/swh/web/api/views/graph.py index 053b94ec..ac8427cf 100644 --- a/swh/web/api/views/graph.py +++ b/swh/web/api/views/graph.py @@ -1,181 +1,193 @@ # Copyright (C) 2020-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from distutils.util import strtobool import json from typing import Dict, Iterator, Union from urllib.parse import unquote, urlparse, urlunparse import requests from django.http import QueryDict from django.http.response import StreamingHttpResponse from rest_framework.decorators import renderer_classes from rest_framework.renderers import JSONRenderer from rest_framework.request import Request from rest_framework.response import Response from swh.model.hashutil import hash_to_hex from swh.model.model import Sha1Git from swh.model.swhids import ExtendedObjectType, ExtendedSWHID from swh.web.api.apidoc import api_doc from swh.web.api.apiurls import api_route from swh.web.api.renderers import PlainTextRenderer from swh.web.common import archive from swh.web.config import SWH_WEB_INTERNAL_SERVER_NAME, get_config API_GRAPH_PERM = "swh.web.api.graph" def _resolve_origin_swhid(swhid: str, origin_urls: Dict[Sha1Git, str]) -> str: """ Resolve origin url from its swhid sha1 representation. """ parsed_swhid = ExtendedSWHID.from_string(swhid) if parsed_swhid.object_type == ExtendedObjectType.ORIGIN: if parsed_swhid.object_id in origin_urls: return origin_urls[parsed_swhid.object_id] else: origin_info = list( archive.lookup_origins_by_sha1s([hash_to_hex(parsed_swhid.object_id)]) )[0] assert origin_info is not None origin_urls[parsed_swhid.object_id] = origin_info["url"] return origin_info["url"] else: return swhid def _resolve_origin_swhids_in_graph_response( response: requests.Response, ) -> Iterator[bytes]: """ Resolve origin urls from their swhid sha1 representations in graph service responses. """ content_type = response.headers["Content-Type"] origin_urls: Dict[Sha1Git, str] = {} if content_type == "application/x-ndjson": for line in response.iter_lines(): swhids = json.loads(line.decode("utf-8")) processed_line = [] for swhid in swhids: processed_line.append(_resolve_origin_swhid(swhid, origin_urls)) yield (json.dumps(processed_line) + "\n").encode() elif content_type == "text/plain": for line in response.iter_lines(): processed_line = [] swhids = line.decode("utf-8").split(" ") for swhid in swhids: processed_line.append(_resolve_origin_swhid(swhid, origin_urls)) yield (" ".join(processed_line) + "\n").encode() else: for line in response.iter_lines(): yield line + b"\n" @api_route(r"/graph/", "api-1-graph-doc") @api_doc("/graph/") def api_graph(request: Request) -> None: """ .. http:get:: /api/1/graph/(graph_query)/ Provide fast access to the graph representation of the Software Heritage archive. That endpoint acts as a proxy for the `Software Heritage Graph service `_. It provides fast access to the `graph representation `_ of the Software Heritage archive. For more details please refer to the `Graph RPC API documentation `_. .. warning:: That endpoint is not publicly available and requires authentication and special user permission in order to be able to request it. :param string graph_query: query to forward to the Software Heritage Graph archive (see its `documentation `_) :query boolean resolve_origins: extra parameter defined by that proxy enabling to resolve origin urls from their sha1 representations :statuscode 200: no error :statuscode 400: an invalid graph query has been provided :statuscode 404: provided graph node cannot be found **Examples:** .. parsed-literal:: :swh_web_api:`graph/leaves/swh:1:dir:432d1b21c1256f7408a07c577b6974bbdbcc1323/` :swh_web_api:`graph/neighbors/swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35/` :swh_web_api:`graph/randomwalk/swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2/ori?direction=backward` :swh_web_api:`graph/randomwalk/swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2/ori?direction=backward&limit=-2` :swh_web_api:`graph/visit/nodes/swh:1:snp:40f9f177b8ab0b7b3d70ee14bbc8b214e2b2dcfc?direction=backward&resolve_origins=true` :swh_web_api:`graph/visit/edges/swh:1:snp:40f9f177b8ab0b7b3d70ee14bbc8b214e2b2dcfc?direction=backward&resolve_origins=true` :swh_web_api:`graph/visit/paths/swh:1:dir:644dd466d8ad527ea3a609bfd588a3244e6dafcb?direction=backward&resolve_origins=true` """ return None @api_route(r"/graph/(?P.+)/", "api-1-graph") @renderer_classes([JSONRenderer, PlainTextRenderer]) def api_graph_proxy( request: Request, graph_query: str ) -> Union[Response, StreamingHttpResponse]: if request.get_host() != SWH_WEB_INTERNAL_SERVER_NAME: if not bool(request.user and request.user.is_authenticated): return Response("Authentication credentials were not provided.", status=401) if not request.user.has_perm(API_GRAPH_PERM): return Response( "You do not have permission to perform this action.", status=403 ) - graph_query_url = get_config()["graph"]["server_url"] + graph_config = get_config()["graph"] graph_query = unquote(graph_query) + graph_query_url = graph_config["server_url"] graph_query_url += graph_query parsed_url = urlparse(graph_query_url) query_dict = QueryDict(parsed_url.query, mutable=True) query_dict.update(request.GET) + # clamp max_edges query parameter according to authentication + if request.user.is_staff: + max_edges = graph_config["max_edges"]["staff"] + elif request.user.is_authenticated: + max_edges = graph_config["max_edges"]["user"] + else: + max_edges = graph_config["max_edges"]["anonymous"] + query_dict["max_edges"] = min( + max_edges, int(query_dict.get("max_edges", max_edges + 1)) + ) + if query_dict: graph_query_url = urlunparse( parsed_url._replace(query=query_dict.urlencode(safe="/;:")) ) response = requests.get(graph_query_url, stream=True) if response.status_code != 200: return Response( response.content, status=response.status_code, content_type=response.headers["Content-Type"], ) # graph stats and counter endpoint responses are not streamed if response.headers.get("Transfer-Encoding") != "chunked": return Response( response.json(), status=response.status_code, content_type=response.headers["Content-Type"], ) # other endpoint responses are streamed else: resolve_origins = strtobool(request.GET.get("resolve_origins", "false")) if response.status_code == 200 and resolve_origins: response_stream = _resolve_origin_swhids_in_graph_response(response) else: response_stream = map(lambda line: line + b"\n", response.iter_lines()) return StreamingHttpResponse( response_stream, status=response.status_code, content_type=response.headers["Content-Type"], ) diff --git a/swh/web/config.py b/swh/web/config.py index 88f9cfe6..8b62d009 100644 --- a/swh/web/config.py +++ b/swh/web/config.py @@ -1,205 +1,208 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import os from typing import Any, Dict from swh.core import config from swh.counters import get_counters from swh.indexer.storage import get_indexer_storage from swh.scheduler import get_scheduler from swh.search import get_search from swh.storage import get_storage from swh.vault import get_vault from swh.web import settings SWH_WEB_INTERNAL_SERVER_NAME = "archive.internal.softwareheritage.org" STAGING_SERVER_NAMES = [ "webapp.staging.swh.network", "webapp.internal.staging.swh.network", ] SETTINGS_DIR = os.path.dirname(settings.__file__) DEFAULT_CONFIG = { "allowed_hosts": ("list", []), "storage": ( "dict", {"cls": "remote", "url": "http://127.0.0.1:5002/", "timeout": 10,}, ), "indexer_storage": ( "dict", {"cls": "remote", "url": "http://127.0.0.1:5007/", "timeout": 1,}, ), "counters": ( "dict", {"cls": "remote", "url": "http://127.0.0.1:5011/", "timeout": 1,}, ), "search": ( "dict", {"cls": "remote", "url": "http://127.0.0.1:5010/", "timeout": 10,}, ), "search_config": ( "dict", {"metadata_backend": "swh-indexer-storage",}, # or "swh-search" ), "log_dir": ("string", "/tmp/swh/log"), "debug": ("bool", False), "serve_assets": ("bool", False), "host": ("string", "127.0.0.1"), "port": ("int", 5004), "secret_key": ("string", "development key"), # do not display code highlighting for content > 1MB "content_display_max_size": ("int", 5 * 1024 * 1024), "snapshot_content_max_size": ("int", 1000), "throttling": ( "dict", { "cache_uri": None, # production: memcached as cache (127.0.0.1:11211) # development: in-memory cache so None "scopes": { "swh_api": { "limiter_rate": {"default": "120/h"}, "exempted_networks": ["127.0.0.0/8"], }, "swh_api_origin_search": { "limiter_rate": {"default": "10/m"}, "exempted_networks": ["127.0.0.0/8"], }, "swh_vault_cooking": { "limiter_rate": {"default": "120/h", "GET": "60/m"}, "exempted_networks": ["127.0.0.0/8"], }, "swh_save_origin": { "limiter_rate": {"default": "120/h", "POST": "10/h"}, "exempted_networks": ["127.0.0.0/8"], }, "swh_api_origin_visit_latest": { "limiter_rate": {"default": "700/m"}, "exempted_networks": ["127.0.0.0/8"], }, }, }, ), "vault": ("dict", {"cls": "remote", "args": {"url": "http://127.0.0.1:5005/",}}), "scheduler": ("dict", {"cls": "remote", "url": "http://127.0.0.1:5008/"}), "development_db": ("string", os.path.join(SETTINGS_DIR, "db.sqlite3")), "test_db": ("dict", {"name": "swh-web-test"}), "production_db": ("dict", {"name": "swh-web"}), "deposit": ( "dict", { "private_api_url": "https://deposit.softwareheritage.org/1/private/", "private_api_user": "swhworker", "private_api_password": "some-password", }, ), "e2e_tests_mode": ("bool", False), "es_workers_index_url": ("string", ""), "history_counters_url": ( "string", ( "http://counters1.internal.softwareheritage.org:5011" "/counters_history/history.json" ), ), "client_config": ("dict", {}), "keycloak": ("dict", {"server_url": "", "realm_name": ""}), "graph": ( "dict", - {"server_url": "http://graph.internal.softwareheritage.org:5009/graph/"}, + { + "server_url": "http://graph.internal.softwareheritage.org:5009/graph/", + "max_edges": {"staff": 0, "user": 100000, "anonymous": 1000}, + }, ), "status": ( "dict", { "server_url": "https://status.softwareheritage.org/", "json_path": "1.0/status/578e5eddcdc0cc7951000520", }, ), "counters_backend": ("string", "swh-storage"), # or "swh-counters" "staging_server_names": ("list", STAGING_SERVER_NAMES), "instance_name": ("str", "archive-test.softwareheritage.org"), "give": ("dict", {"public_key": "", "token": ""}), } swhweb_config: Dict[str, Any] = {} def get_config(config_file="web/web"): """Read the configuration file `config_file`. If an environment variable SWH_CONFIG_FILENAME is defined, this takes precedence over the config_file parameter. In any case, update the app with parameters (secret_key, conf) and return the parsed configuration as a dict. If no configuration file is provided, return a default configuration. """ if not swhweb_config: config_filename = os.environ.get("SWH_CONFIG_FILENAME") if config_filename: config_file = config_filename cfg = config.load_named_config(config_file, DEFAULT_CONFIG) swhweb_config.update(cfg) config.prepare_folders(swhweb_config, "log_dir") if swhweb_config.get("search"): swhweb_config["search"] = get_search(**swhweb_config["search"]) else: swhweb_config["search"] = None swhweb_config["storage"] = get_storage(**swhweb_config["storage"]) swhweb_config["vault"] = get_vault(**swhweb_config["vault"]) swhweb_config["indexer_storage"] = get_indexer_storage( **swhweb_config["indexer_storage"] ) swhweb_config["scheduler"] = get_scheduler(**swhweb_config["scheduler"]) swhweb_config["counters"] = get_counters(**swhweb_config["counters"]) return swhweb_config def search(): """Return the current application's search. """ return get_config()["search"] def storage(): """Return the current application's storage. """ return get_config()["storage"] def vault(): """Return the current application's vault. """ return get_config()["vault"] def indexer_storage(): """Return the current application's indexer storage. """ return get_config()["indexer_storage"] def scheduler(): """Return the current application's scheduler. """ return get_config()["scheduler"] def counters(): """Return the current application's counters. """ return get_config()["counters"] diff --git a/swh/web/tests/api/views/test_graph.py b/swh/web/tests/api/views/test_graph.py index dc459659..a8160178 100644 --- a/swh/web/tests/api/views/test_graph.py +++ b/swh/web/tests/api/views/test_graph.py @@ -1,337 +1,418 @@ # Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib import re import textwrap from urllib.parse import unquote, urlparse import pytest from django.http.response import StreamingHttpResponse from swh.model.hashutil import hash_to_bytes from swh.model.swhids import ExtendedObjectType, ExtendedSWHID from swh.web.api.views.graph import API_GRAPH_PERM from swh.web.common.utils import reverse from swh.web.config import SWH_WEB_INTERNAL_SERVER_NAME, get_config from swh.web.tests.utils import check_http_get_response def test_graph_endpoint_no_authentication_for_vpn_users(api_client, requests_mock): graph_query = "stats" url = reverse("api-1-graph", url_args={"graph_query": graph_query}) requests_mock.get( get_config()["graph"]["server_url"] + graph_query, json={}, headers={"Content-Type": "application/json"}, ) check_http_get_response( api_client, url, status_code=200, server_name=SWH_WEB_INTERNAL_SERVER_NAME ) def test_graph_endpoint_needs_authentication(api_client): url = reverse("api-1-graph", url_args={"graph_query": "stats"}) check_http_get_response(api_client, url, status_code=401) -def _authenticate_graph_user(api_client, keycloak_oidc): +def _authenticate_graph_user(api_client, keycloak_oidc, is_staff=False): keycloak_oidc.client_permissions = [API_GRAPH_PERM] + if is_staff: + keycloak_oidc.user_groups = ["/staff"] oidc_profile = keycloak_oidc.login() api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}") def test_graph_endpoint_needs_permission(api_client, keycloak_oidc, requests_mock): graph_query = "stats" url = reverse("api-1-graph", url_args={"graph_query": graph_query}) oidc_profile = keycloak_oidc.login() api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}") check_http_get_response(api_client, url, status_code=403) _authenticate_graph_user(api_client, keycloak_oidc) requests_mock.get( get_config()["graph"]["server_url"] + graph_query, json={}, headers={"Content-Type": "application/json"}, ) check_http_get_response(api_client, url, status_code=200) def test_graph_text_plain_response(api_client, keycloak_oidc, requests_mock): _authenticate_graph_user(api_client, keycloak_oidc) graph_query = "leaves/swh:1:dir:432d1b21c1256f7408a07c577b6974bbdbcc1323" response_text = textwrap.dedent( """\ swh:1:cnt:1d3dace0a825b0535c37c53ed669ef817e9c1b47 swh:1:cnt:6d5b280f4e33589ae967a7912a587dd5cb8dedaa swh:1:cnt:91bef238bf01356a550d416d14bb464c576ac6f4 swh:1:cnt:58a8b925a463b87d49639fda282b8f836546e396 swh:1:cnt:fd32ee0a87e16ccc853dfbeb7018674f9ce008c0 swh:1:cnt:ab7c39871872589a4fc9e249ebc927fb1042c90d swh:1:cnt:93073c02bf3869845977527de16af4d54765838d swh:1:cnt:4251f795b52c54c447a97c9fe904d8b1f993b1e0 swh:1:cnt:c6e7055424332006d07876ffeba684e7e284b383 swh:1:cnt:8459d8867dc3b15ef7ae9683e21cccc9ab2ec887 swh:1:cnt:5f9981d52202815aa947f85b9dfa191b66f51138 swh:1:cnt:00a685ec51bcdf398c15d588ecdedb611dbbab4b swh:1:cnt:e1cf1ea335106a0197a2f92f7804046425a7d3eb swh:1:cnt:07069b38087f88ec192d2c9aff75a502476fd17d swh:1:cnt:f045ee845c7f14d903a2c035b2691a7c400c01f0 """ ) requests_mock.get( get_config()["graph"]["server_url"] + graph_query, text=response_text, headers={"Content-Type": "text/plain", "Transfer-Encoding": "chunked"}, ) url = reverse("api-1-graph", url_args={"graph_query": graph_query}) resp = check_http_get_response( api_client, url, status_code=200, content_type="text/plain" ) assert isinstance(resp, StreamingHttpResponse) assert b"".join(resp.streaming_content) == response_text.encode() _response_json = { "counts": {"nodes": 17075708289, "edges": 196236587976}, "ratios": { "compression": 0.16, "bits_per_node": 58.828, "bits_per_edge": 5.119, "avg_locality": 2184278529.729, }, "indegree": {"min": 0, "max": 263180117, "avg": 11.4921492364925}, "outdegree": {"min": 0, "max": 1033207, "avg": 11.4921492364925}, } def test_graph_json_response(api_client, keycloak_oidc, requests_mock): _authenticate_graph_user(api_client, keycloak_oidc) graph_query = "stats" requests_mock.get( get_config()["graph"]["server_url"] + graph_query, json=_response_json, headers={"Content-Type": "application/json"}, ) url = reverse("api-1-graph", url_args={"graph_query": graph_query}) resp = check_http_get_response(api_client, url, status_code=200) assert resp.content_type == "application/json" assert resp.data == _response_json def test_graph_ndjson_response(api_client, keycloak_oidc, requests_mock): _authenticate_graph_user(api_client, keycloak_oidc) graph_query = "visit/paths/swh:1:dir:644dd466d8ad527ea3a609bfd588a3244e6dafcb" response_ndjson = textwrap.dedent( """\ ["swh:1:dir:644dd466d8ad527ea3a609bfd588a3244e6dafcb",\ "swh:1:cnt:acfb7cabd63b368a03a9df87670ece1488c8bce0"] ["swh:1:dir:644dd466d8ad527ea3a609bfd588a3244e6dafcb",\ "swh:1:cnt:2a0837708151d76edf28fdbb90dc3eabc676cff3"] ["swh:1:dir:644dd466d8ad527ea3a609bfd588a3244e6dafcb",\ "swh:1:cnt:eaf025ad54b94b2fdda26af75594cfae3491ec75"] """ ) requests_mock.get( get_config()["graph"]["server_url"] + graph_query, text=response_ndjson, headers={ "Content-Type": "application/x-ndjson", "Transfer-Encoding": "chunked", }, ) url = reverse("api-1-graph", url_args={"graph_query": graph_query}) resp = check_http_get_response(api_client, url, status_code=200) assert isinstance(resp, StreamingHttpResponse) assert resp["Content-Type"] == "application/x-ndjson" assert b"".join(resp.streaming_content) == response_ndjson.encode() def test_graph_response_resolve_origins( archive_data, api_client, keycloak_oidc, requests_mock, origin ): hasher = hashlib.sha1() hasher.update(origin["url"].encode()) origin_sha1 = hasher.digest() origin_swhid = str( ExtendedSWHID(object_type=ExtendedObjectType.ORIGIN, object_id=origin_sha1) ) snapshot = archive_data.snapshot_get_latest(origin["url"])["id"] snapshot_swhid = str( ExtendedSWHID( object_type=ExtendedObjectType.SNAPSHOT, object_id=hash_to_bytes(snapshot) ) ) _authenticate_graph_user(api_client, keycloak_oidc) for graph_query, response_text, content_type in ( ( f"visit/nodes/{snapshot_swhid}", f"{snapshot_swhid}\n{origin_swhid}\n", "text/plain", ), ( f"visit/edges/{snapshot_swhid}", f"{snapshot_swhid} {origin_swhid}\n", "text/plain", ), ( f"visit/paths/{snapshot_swhid}", f'["{snapshot_swhid}", "{origin_swhid}"]\n', "application/x-ndjson", ), ): # set two lines response to check resolved origins cache response_text = response_text + response_text requests_mock.get( get_config()["graph"]["server_url"] + graph_query, text=response_text, headers={"Content-Type": content_type, "Transfer-Encoding": "chunked"}, ) url = reverse( "api-1-graph", url_args={"graph_query": graph_query}, query_params={"direction": "backward"}, ) resp = check_http_get_response(api_client, url, status_code=200) assert isinstance(resp, StreamingHttpResponse) assert resp["Content-Type"] == content_type assert b"".join(resp.streaming_content) == response_text.encode() url = reverse( "api-1-graph", url_args={"graph_query": graph_query}, query_params={"direction": "backward", "resolve_origins": "true"}, ) resp = check_http_get_response(api_client, url, status_code=200) assert isinstance(resp, StreamingHttpResponse) assert resp["Content-Type"] == content_type assert ( b"".join(resp.streaming_content) == response_text.replace(origin_swhid, origin["url"]).encode() ) def test_graph_response_resolve_origins_nothing_to_do( api_client, keycloak_oidc, requests_mock ): _authenticate_graph_user(api_client, keycloak_oidc) graph_query = "stats" requests_mock.get( get_config()["graph"]["server_url"] + graph_query, json=_response_json, headers={"Content-Type": "application/json"}, ) url = reverse( "api-1-graph", url_args={"graph_query": graph_query}, query_params={"resolve_origins": "true"}, ) resp = check_http_get_response(api_client, url, status_code=200) assert resp.content_type == "application/json" assert resp.data == _response_json def test_graph_response_invalid_accept_header(api_client): url = reverse( "api-1-graph", url_args={"graph_query": "stats"}, query_params={"resolve_origins": "true"}, ) resp = api_client.get(url, HTTP_ACCEPT="text/html") assert resp.status_code == 406 assert resp.content_type == "application/json" assert resp.data["exception"] == "NotAcceptable" assert resp.data["reason"] == "Could not satisfy the request Accept header." def test_graph_error_response(api_client, keycloak_oidc, requests_mock): _authenticate_graph_user(api_client, keycloak_oidc) graph_query = "foo" error_message = "Not found" content_type = "text/plain" requests_mock.get( get_config()["graph"]["server_url"] + graph_query, text=error_message, headers={"Content-Type": content_type}, status_code=404, ) url = reverse("api-1-graph", url_args={"graph_query": graph_query}) resp = check_http_get_response(api_client, url, status_code=404) assert resp.content_type == content_type assert resp.content == f'"{error_message}"'.encode() @pytest.mark.parametrize( "graph_query, query_params, expected_graph_query_params", [ ("stats", {}, ""), ("stats", {"resolve_origins": "true"}, "resolve_origins=true"), ("stats?a=1", {}, "a=1"), ("stats%3Fb=2", {}, "b=2"), ("stats?a=1", {"resolve_origins": "true"}, "a=1&resolve_origins=true"), ("stats%3Fb=2", {"resolve_origins": "true"}, "b=2&resolve_origins=true"), ("stats/?a=1", {"a": "2"}, "a=1&a=2"), ("stats/%3Fa=1", {"a": "2"}, "a=1&a=2"), ], ) def test_graph_query_params( api_client, keycloak_oidc, requests_mock, graph_query, query_params, expected_graph_query_params, ): _authenticate_graph_user(api_client, keycloak_oidc) requests_mock.get( re.compile(get_config()["graph"]["server_url"]), json=_response_json, headers={"Content-Type": "application/json"}, ) url = reverse( "api-1-graph", url_args={"graph_query": graph_query}, query_params=query_params, ) check_http_get_response(api_client, url, status_code=200) url = requests_mock.request_history[0].url parsed_url = urlparse(url) assert parsed_url.path == f"/graph/{unquote(graph_query).split('?')[0]}" - assert parsed_url.query == expected_graph_query_params + assert expected_graph_query_params in parsed_url.query + + +def test_graph_endpoint_max_edges_settings(api_client, keycloak_oidc, requests_mock): + graph_config = get_config()["graph"] + graph_query = "stats" + url = reverse("api-1-graph", url_args={"graph_query": graph_query}) + requests_mock.get( + get_config()["graph"]["server_url"] + graph_query, + json={}, + headers={"Content-Type": "application/json"}, + ) + + # currently unauthenticated user can only use the graph endpoint from + # Software Heritage VPN + check_http_get_response( + api_client, url, status_code=200, server_name=SWH_WEB_INTERNAL_SERVER_NAME + ) + assert ( + f"max_edges={graph_config['max_edges']['anonymous']}" + in requests_mock.request_history[0].url + ) + + # standard user + _authenticate_graph_user(api_client, keycloak_oidc) + check_http_get_response( + api_client, url, status_code=200, + ) + assert ( + f"max_edges={graph_config['max_edges']['user']}" + in requests_mock.request_history[1].url + ) + + # staff user + _authenticate_graph_user(api_client, keycloak_oidc, is_staff=True) + check_http_get_response( + api_client, url, status_code=200, + ) + assert ( + f"max_edges={graph_config['max_edges']['staff']}" + in requests_mock.request_history[2].url + ) + + +def test_graph_endpoint_max_edges_query_parameter_value( + api_client, keycloak_oidc, requests_mock +): + graph_config = get_config()["graph"] + graph_query = "stats" + + requests_mock.get( + get_config()["graph"]["server_url"] + graph_query, + json={}, + headers={"Content-Type": "application/json"}, + ) + _authenticate_graph_user(api_client, keycloak_oidc) + + max_edges_max_value = graph_config["max_edges"]["user"] + + max_edges = max_edges_max_value // 2 + url = reverse( + "api-1-graph", + url_args={"graph_query": graph_query}, + query_params={"max_edges": max_edges}, + ) + check_http_get_response( + api_client, url, status_code=200, + ) + assert f"max_edges={max_edges}" in requests_mock.request_history[0].url + + max_edges = max_edges_max_value * 2 + url = reverse( + "api-1-graph", + url_args={"graph_query": graph_query}, + query_params={"max_edges": max_edges}, + ) + check_http_get_response( + api_client, url, status_code=200, + ) + assert f"max_edges={max_edges_max_value}" in requests_mock.request_history[1].url