Changeset View
Changeset View
Standalone View
Standalone View
swh/web/api/views/graph.py
# Copyright (C) 2020-2021 The Software Heritage developers | # Copyright (C) 2020-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from distutils.util import strtobool | from distutils.util import strtobool | ||||
import json | import json | ||||
from typing import Dict, Iterator, Union | from typing import Dict, Iterator, Union | ||||
import requests | import requests | ||||
from django.http import QueryDict | |||||
from django.http.response import StreamingHttpResponse | from django.http.response import StreamingHttpResponse | ||||
from rest_framework.decorators import renderer_classes | from rest_framework.decorators import renderer_classes | ||||
from rest_framework.renderers import JSONRenderer | from rest_framework.renderers import JSONRenderer | ||||
from rest_framework.request import Request | from rest_framework.request import Request | ||||
from rest_framework.response import Response | from rest_framework.response import Response | ||||
from swh.model.hashutil import hash_to_hex | from swh.model.hashutil import hash_to_hex | ||||
from swh.model.model import Sha1Git | from swh.model.model import Sha1Git | ||||
▲ Show 20 Lines • Show All 110 Lines • ▼ Show 20 Lines | |||||
) -> Union[Response, StreamingHttpResponse]: | ) -> Union[Response, StreamingHttpResponse]: | ||||
if request.get_host() != SWH_WEB_INTERNAL_SERVER_NAME: | if request.get_host() != SWH_WEB_INTERNAL_SERVER_NAME: | ||||
if not bool(request.user and request.user.is_authenticated): | if not bool(request.user and request.user.is_authenticated): | ||||
return Response("Authentication credentials were not provided.", status=401) | return Response("Authentication credentials were not provided.", status=401) | ||||
if not request.user.has_perm(API_GRAPH_PERM): | if not request.user.has_perm(API_GRAPH_PERM): | ||||
return Response( | return Response( | ||||
"You do not have permission to perform this action.", status=403 | "You do not have permission to perform this action.", status=403 | ||||
) | ) | ||||
graph_query_url = get_config()["graph"]["server_url"] | |||||
graph_config = get_config()["graph"] | |||||
graph_query_url = graph_config["server_url"] | |||||
graph_query_url += graph_query | graph_query_url += graph_query | ||||
if request.GET: | query = request.GET.urlencode(safe="/;:") if request.GET else "" | ||||
graph_query_url += "?" + request.GET.urlencode(safe="/;:") | query_dict = QueryDict(query, mutable=True) | ||||
# clamp max_edges query parameter according to authentication | |||||
if request.user.is_staff: | |||||
max_edges = graph_config["max_edges"]["staff"] | |||||
elif request.user.is_authenticated: | |||||
max_edges = graph_config["max_edges"]["user"] | |||||
else: | |||||
max_edges = graph_config["max_edges"]["anonymous"] | |||||
query_dict["max_edges"] = min( | |||||
max_edges, int(query_dict.get("max_edges", max_edges + 1)) | |||||
) | |||||
graph_query_url += "?" + query_dict.urlencode(safe="/;:") | |||||
seirl: I think this is potentially dangerous if an endpoint gets passed with a query parameter.
If… | |||||
anlambertAuthorUnsubmitted Done Inline ActionsThis should not happen as the Web API URL will be parsed by django so graph_query will not contain any ? character and the query parameters will be available in request.GET. Nevertheless, we can have such issue if the ? got quoted to %3F in the Web API URL. I will handle that case in another diff. I also found out that I did not handle really well the error responses coming from the graph API, I will also handle that in a new diff. anlambert: This should not happen as the Web API URL will be parsed by django so `graph_query` will not… | |||||
response = requests.get(graph_query_url, stream=True) | response = requests.get(graph_query_url, stream=True) | ||||
# graph stats and counter endpoint responses are not streamed | # graph stats and counter endpoint responses are not streamed | ||||
if response.headers.get("Transfer-Encoding") != "chunked": | if response.headers.get("Transfer-Encoding") != "chunked": | ||||
return Response( | return Response( | ||||
response.json(), | response.json(), | ||||
status=response.status_code, | status=response.status_code, | ||||
content_type=response.headers["Content-Type"], | content_type=response.headers["Content-Type"], | ||||
) | ) | ||||
Show All 12 Lines |
I think this is potentially dangerous if an endpoint gets passed with a query parameter.
If someone passes something like graph_query == "/graph/visit/nodes?prod=true" to the function, the resulting URL will look like https://graph.internal.softwareheritage.org/graph/visit/nodes?prod=true?max_edges=42 and the max_edges parameter will get silently ignored.
Instead, we could use something from urllib like this: https://stackoverflow.com/a/2506477 to ensure that the argument is properly added to the query (with a & if there is already a ?, or with a ? otherwise).