diff --git a/swh/web/auth/views.py b/swh/web/auth/views.py --- a/swh/web/auth/views.py +++ b/swh/web/auth/views.py @@ -32,7 +32,7 @@ gen_oidc_pkce_codes, get_oidc_client, ) -from swh.web.common.exc import BadInputExc, handle_view_exception +from swh.web.common.exc import BadInputExc from swh.web.common.utils import reverse @@ -62,85 +62,76 @@ "prompt": request.GET.get("prompt", ""), } - try: - oidc_client = get_oidc_client() - authorization_url = oidc_client.authorization_url( - redirect_uri, **authorization_url_params - ) + oidc_client = get_oidc_client() + authorization_url = oidc_client.authorization_url( + redirect_uri, **authorization_url_params + ) - return HttpResponseRedirect(authorization_url) - except Exception as e: - return handle_view_exception(request, e) + return HttpResponseRedirect(authorization_url) def oidc_login_complete(request: HttpRequest) -> HttpResponse: """ Django view to finalize login process using OpenID Connect. """ - try: - if "login_data" not in request.session: - raise Exception("Login process has not been initialized.") + if "login_data" not in request.session: + raise Exception("Login process has not been initialized.") - login_data = request.session["login_data"] - next_path = login_data["next_path"] or request.build_absolute_uri("/") + login_data = request.session["login_data"] + next_path = login_data["next_path"] or request.build_absolute_uri("/") - if "error" in request.GET: - if login_data["prompt"] == "none": - # Silent login failed because OIDC session expired. - # Redirect to logout page and inform user. - logout(request) - logout_url = reverse( - "logout", query_params={"next_path": next_path, "remote_user": 1} - ) - return HttpResponseRedirect(logout_url) - return HttpResponseServerError(request.GET["error"]) + if "error" in request.GET: + if login_data["prompt"] == "none": + # Silent login failed because OIDC session expired. + # Redirect to logout page and inform user. + logout(request) + logout_url = reverse( + "logout", query_params={"next_path": next_path, "remote_user": 1} + ) + return HttpResponseRedirect(logout_url) + return HttpResponseServerError(request.GET["error"]) - if "code" not in request.GET or "state" not in request.GET: - raise BadInputExc("Missing query parameters for authentication.") + if "code" not in request.GET or "state" not in request.GET: + raise BadInputExc("Missing query parameters for authentication.") - # get CSRF token returned by OIDC server - state = request.GET["state"] + # get CSRF token returned by OIDC server + state = request.GET["state"] - if state != login_data["state"]: - raise BadInputExc("Wrong CSRF token, aborting login process.") + if state != login_data["state"]: + raise BadInputExc("Wrong CSRF token, aborting login process.") - user = authenticate( - request=request, - code=request.GET["code"], - code_verifier=login_data["code_verifier"], - redirect_uri=login_data["redirect_uri"], - ) + user = authenticate( + request=request, + code=request.GET["code"], + code_verifier=login_data["code_verifier"], + redirect_uri=login_data["redirect_uri"], + ) - if user is None: - raise Exception("User authentication failed.") + if user is None: + raise Exception("User authentication failed.") - login(request, user) + login(request, user) - return HttpResponseRedirect(next_path) - except Exception as e: - return handle_view_exception(request, e) + return HttpResponseRedirect(next_path) def oidc_logout(request: HttpRequest) -> HttpResponse: """ Django view to logout using OpenID Connect. """ - try: - user = request.user - logout(request) - if hasattr(user, "refresh_token"): - oidc_client = get_oidc_client() - user = cast(OIDCUser, user) - refresh_token = cast(str, user.refresh_token) - # end OpenID Connect session - oidc_client.logout(refresh_token) - # remove user data from cache - cache.delete(f"oidc_user_{user.id}") + user = request.user + logout(request) + if hasattr(user, "refresh_token"): + oidc_client = get_oidc_client() + user = cast(OIDCUser, user) + refresh_token = cast(str, user.refresh_token) + # end OpenID Connect session + oidc_client.logout(refresh_token) + # remove user data from cache + cache.delete(f"oidc_user_{user.id}") - logout_url = reverse("logout", query_params={"remote_user": 1}) - return HttpResponseRedirect(request.build_absolute_uri(logout_url)) - except Exception as e: - return handle_view_exception(request, e) + logout_url = reverse("logout", query_params={"remote_user": 1}) + return HttpResponseRedirect(request.build_absolute_uri(logout_url)) @require_http_methods(["POST"]) @@ -162,9 +153,6 @@ except KeycloakError as e: sentry_sdk.capture_exception(e) return HttpResponse(status=e.response_code or 500) - except Exception as e: - sentry_sdk.capture_exception(e) - return HttpResponseServerError(str(e)) def oidc_list_bearer_tokens(request: HttpRequest) -> HttpResponse: @@ -206,9 +194,6 @@ return HttpResponse(decrypted_token.decode("ascii"), content_type="text/plain") except InvalidToken: return HttpResponse(status=401) - except Exception as e: - sentry_sdk.capture_exception(e) - return HttpResponseServerError(str(e)) @require_http_methods(["POST"]) @@ -229,9 +214,6 @@ return HttpResponse(status=200) except InvalidToken: return HttpResponse(status=401) - except Exception as e: - sentry_sdk.capture_exception(e) - return HttpResponseServerError(str(e)) urlpatterns = [ diff --git a/swh/web/browse/browseurls.py b/swh/web/browse/browseurls.py --- a/swh/web/browse/browseurls.py +++ b/swh/web/browse/browseurls.py @@ -3,6 +3,7 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information + from swh.web.common.urlsindex import UrlsIndex diff --git a/swh/web/browse/identifiers.py b/swh/web/browse/identifiers.py --- a/swh/web/browse/identifiers.py +++ b/swh/web/browse/identifiers.py @@ -5,7 +5,6 @@ from django.shortcuts import redirect -from swh.web.common.exc import handle_view_exception from swh.web.common.identifiers import resolve_swhid @@ -15,9 +14,6 @@ The url that points to it is :http:get:`/(swhid)/`. """ - try: - swhid_resolved = resolve_swhid(swhid, query_params=request.GET) - except Exception as exc: - return handle_view_exception(request, exc) + swhid_resolved = resolve_swhid(swhid, query_params=request.GET) return redirect(swhid_resolved["browse_url"]) diff --git a/swh/web/browse/snapshot_context.py b/swh/web/browse/snapshot_context.py --- a/swh/web/browse/snapshot_context.py +++ b/swh/web/browse/snapshot_context.py @@ -32,7 +32,7 @@ request_content, ) from swh.web.common import archive, highlightjs -from swh.web.common.exc import BadInputExc, NotFoundExc, handle_view_exception +from swh.web.common.exc import BadInputExc, NotFoundExc from swh.web.common.identifiers import get_swhids_info from swh.web.common.origin_visits import get_origin_visit from swh.web.common.typing import ( @@ -670,35 +670,30 @@ """ Django view implementation for browsing a directory in a snapshot context. """ - try: - - _check_origin_url(snapshot_id, origin_url) - - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - timestamp=timestamp, - visit_id=request.GET.get("visit_id"), - path=path, - browse_context="directory", - branch_name=request.GET.get("branch"), - release_name=request.GET.get("release"), - revision_id=request.GET.get("revision"), - ) + _check_origin_url(snapshot_id, origin_url) - root_directory = snapshot_context["root_directory"] - sha1_git = root_directory - if root_directory and path: - dir_info = archive.lookup_directory_with_path(root_directory, path) - sha1_git = dir_info["target"] + snapshot_context = get_snapshot_context( + snapshot_id=snapshot_id, + origin_url=origin_url, + timestamp=timestamp, + visit_id=request.GET.get("visit_id"), + path=path, + browse_context="directory", + branch_name=request.GET.get("branch"), + release_name=request.GET.get("release"), + revision_id=request.GET.get("revision"), + ) - dirs = [] - files = [] - if sha1_git: - dirs, files = get_directory_entries(sha1_git) + root_directory = snapshot_context["root_directory"] + sha1_git = root_directory + if root_directory and path: + dir_info = archive.lookup_directory_with_path(root_directory, path) + sha1_git = dir_info["target"] - except Exception as exc: - return handle_view_exception(request, exc) + dirs = [] + files = [] + if sha1_git: + dirs, files = get_directory_entries(sha1_git) origin_info = snapshot_context["origin_info"] visit_info = snapshot_context["visit_info"] @@ -880,47 +875,42 @@ """ Django view implementation for browsing a content in a snapshot context. """ - try: - - _check_origin_url(snapshot_id, origin_url) - - if path is None: - raise BadInputExc("The path of a content must be given as query parameter.") - - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - timestamp=timestamp, - visit_id=request.GET.get("visit_id"), - path=path, - browse_context="content", - branch_name=request.GET.get("branch"), - release_name=request.GET.get("release"), - revision_id=request.GET.get("revision"), - ) + _check_origin_url(snapshot_id, origin_url) - root_directory = snapshot_context["root_directory"] - sha1_git = None - query_string = None - content_data = {} - directory_id = None - split_path = path.split("/") - filename = split_path[-1] - filepath = path[: -len(filename)] - if root_directory: - content_info = archive.lookup_directory_with_path(root_directory, path) - sha1_git = content_info["target"] - query_string = "sha1_git:" + sha1_git - content_data = request_content(query_string, raise_if_unavailable=False) - - if filepath: - dir_info = archive.lookup_directory_with_path(root_directory, filepath) - directory_id = dir_info["target"] - else: - directory_id = root_directory + if path is None: + raise BadInputExc("The path of a content must be given as query parameter.") - except Exception as exc: - return handle_view_exception(request, exc) + snapshot_context = get_snapshot_context( + snapshot_id=snapshot_id, + origin_url=origin_url, + timestamp=timestamp, + visit_id=request.GET.get("visit_id"), + path=path, + browse_context="content", + branch_name=request.GET.get("branch"), + release_name=request.GET.get("release"), + revision_id=request.GET.get("revision"), + ) + + root_directory = snapshot_context["root_directory"] + sha1_git = None + query_string = None + content_data = {} + directory_id = None + split_path = path.split("/") + filename = split_path[-1] + filepath = path[: -len(filename)] + if root_directory: + content_info = archive.lookup_directory_with_path(root_directory, path) + sha1_git = content_info["target"] + query_string = "sha1_git:" + sha1_git + content_data = request_content(query_string, raise_if_unavailable=False) + + if filepath: + dir_info = archive.lookup_directory_with_path(root_directory, filepath) + directory_id = dir_info["target"] + else: + directory_id = root_directory revision_id = snapshot_context["revision_id"] origin_info = snapshot_context["origin_info"] @@ -1066,54 +1056,49 @@ Django view implementation for browsing a revision history in a snapshot context. """ - try: - - _check_origin_url(snapshot_id, origin_url) - - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - timestamp=timestamp, - visit_id=request.GET.get("visit_id"), - browse_context="log", - branch_name=request.GET.get("branch"), - release_name=request.GET.get("release"), - revision_id=request.GET.get("revision"), - ) + _check_origin_url(snapshot_id, origin_url) - revision_id = snapshot_context["revision_id"] - - per_page = int(request.GET.get("per_page", PER_PAGE)) - offset = int(request.GET.get("offset", 0)) - revs_ordering = request.GET.get("revs_ordering", "committer_date") - session_key = "rev_%s_log_ordering_%s" % (revision_id, revs_ordering) - rev_log_session = request.session.get(session_key, None) - rev_log = [] - revs_walker_state = None - if rev_log_session: - rev_log = rev_log_session["rev_log"] - revs_walker_state = rev_log_session["revs_walker_state"] - - if len(rev_log) < offset + per_page: - revs_walker = archive.get_revisions_walker( - revs_ordering, - revision_id, - max_revs=offset + per_page + 1, - state=revs_walker_state, - ) - rev_log += [rev["id"] for rev in revs_walker] - revs_walker_state = revs_walker.export_state() + snapshot_context = get_snapshot_context( + snapshot_id=snapshot_id, + origin_url=origin_url, + timestamp=timestamp, + visit_id=request.GET.get("visit_id"), + browse_context="log", + branch_name=request.GET.get("branch"), + release_name=request.GET.get("release"), + revision_id=request.GET.get("revision"), + ) - revs = rev_log[offset : offset + per_page] - revision_log = archive.lookup_revision_multiple(revs) + revision_id = snapshot_context["revision_id"] - request.session[session_key] = { - "rev_log": rev_log, - "revs_walker_state": revs_walker_state, - } + per_page = int(request.GET.get("per_page", PER_PAGE)) + offset = int(request.GET.get("offset", 0)) + revs_ordering = request.GET.get("revs_ordering", "committer_date") + session_key = "rev_%s_log_ordering_%s" % (revision_id, revs_ordering) + rev_log_session = request.session.get(session_key, None) + rev_log = [] + revs_walker_state = None + if rev_log_session: + rev_log = rev_log_session["rev_log"] + revs_walker_state = rev_log_session["revs_walker_state"] + + if len(rev_log) < offset + per_page: + revs_walker = archive.get_revisions_walker( + revs_ordering, + revision_id, + max_revs=offset + per_page + 1, + state=revs_walker_state, + ) + rev_log += [rev["id"] for rev in revs_walker] + revs_walker_state = revs_walker.export_state() + + revs = rev_log[offset : offset + per_page] + revision_log = archive.lookup_revision_multiple(revs) - except Exception as exc: - return handle_view_exception(request, exc) + request.session[session_key] = { + "rev_log": rev_log, + "revs_walker_state": revs_walker_state, + } origin_info = snapshot_context["origin_info"] visit_info = snapshot_context["visit_info"] @@ -1211,41 +1196,36 @@ Django view implementation for browsing a list of branches in a snapshot context. """ - try: - - _check_origin_url(snapshot_id, origin_url) + _check_origin_url(snapshot_id, origin_url) - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - timestamp=timestamp, - visit_id=request.GET.get("visit_id"), - ) - - branches_bc = request.GET.get("branches_breadcrumbs", "") - branches_bc = branches_bc.split(",") if branches_bc else [] - branches_from = branches_bc[-1] if branches_bc else "" + snapshot_context = get_snapshot_context( + snapshot_id=snapshot_id, + origin_url=origin_url, + timestamp=timestamp, + visit_id=request.GET.get("visit_id"), + ) - origin_info = snapshot_context["origin_info"] - url_args = snapshot_context["url_args"] - query_params = snapshot_context["query_params"] + branches_bc = request.GET.get("branches_breadcrumbs", "") + branches_bc = branches_bc.split(",") if branches_bc else [] + branches_from = branches_bc[-1] if branches_bc else "" - if origin_info: - browse_view_name = "browse-origin-directory" - else: - browse_view_name = "browse-snapshot-directory" + origin_info = snapshot_context["origin_info"] + url_args = snapshot_context["url_args"] + query_params = snapshot_context["query_params"] - snapshot = archive.lookup_snapshot( - snapshot_context["snapshot_id"], - branches_from, - PER_PAGE + 1, - target_types=["revision", "alias"], - ) + if origin_info: + browse_view_name = "browse-origin-directory" + else: + browse_view_name = "browse-snapshot-directory" - displayed_branches, _ = process_snapshot_branches(snapshot) + snapshot = archive.lookup_snapshot( + snapshot_context["snapshot_id"], + branches_from, + PER_PAGE + 1, + target_types=["revision", "alias"], + ) - except Exception as exc: - return handle_view_exception(request, exc) + displayed_branches, _ = process_snapshot_branches(snapshot) for branch in displayed_branches: rev_query_params = {} @@ -1325,36 +1305,31 @@ Django view implementation for browsing a list of releases in a snapshot context. """ - try: - - _check_origin_url(snapshot_id, origin_url) - - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - timestamp=timestamp, - visit_id=request.GET.get("visit_id"), - ) + _check_origin_url(snapshot_id, origin_url) - rel_bc = request.GET.get("releases_breadcrumbs", "") - rel_bc = rel_bc.split(",") if rel_bc else [] - rel_from = rel_bc[-1] if rel_bc else "" + snapshot_context = get_snapshot_context( + snapshot_id=snapshot_id, + origin_url=origin_url, + timestamp=timestamp, + visit_id=request.GET.get("visit_id"), + ) - origin_info = snapshot_context["origin_info"] - url_args = snapshot_context["url_args"] - query_params = snapshot_context["query_params"] + rel_bc = request.GET.get("releases_breadcrumbs", "") + rel_bc = rel_bc.split(",") if rel_bc else [] + rel_from = rel_bc[-1] if rel_bc else "" - snapshot = archive.lookup_snapshot( - snapshot_context["snapshot_id"], - rel_from, - PER_PAGE + 1, - target_types=["release", "alias"], - ) + origin_info = snapshot_context["origin_info"] + url_args = snapshot_context["url_args"] + query_params = snapshot_context["query_params"] - _, displayed_releases = process_snapshot_branches(snapshot) + snapshot = archive.lookup_snapshot( + snapshot_context["snapshot_id"], + rel_from, + PER_PAGE + 1, + target_types=["release", "alias"], + ) - except Exception as exc: - return handle_view_exception(request, exc) + _, displayed_releases = process_snapshot_branches(snapshot) for release in displayed_releases: query_params_tgt = {"snapshot": snapshot_id} diff --git a/swh/web/browse/views/content.py b/swh/web/browse/views/content.py --- a/swh/web/browse/views/content.py +++ b/swh/web/browse/views/content.py @@ -24,7 +24,7 @@ request_content, ) from swh.web.common import archive, highlightjs, query -from swh.web.common.exc import NotFoundExc, handle_view_exception +from swh.web.common.exc import NotFoundExc from swh.web.common.identifiers import get_swhids_info from swh.web.common.typing import ContentMetadata, SWHObjectInfo from swh.web.common.utils import gen_path_info, reverse, swh_object_icons @@ -42,13 +42,10 @@ The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/raw/` """ - try: - re_encode = bool(strtobool(request.GET.get("re_encode", "false"))) - algo, checksum = query.parse_hash(query_string) - checksum = hash_to_hex(checksum) - content_data = request_content(query_string, max_size=None, re_encode=re_encode) - except Exception as exc: - return handle_view_exception(request, exc) + re_encode = bool(strtobool(request.GET.get("re_encode", "false"))) + algo, checksum = query.parse_hash(query_string) + checksum = hash_to_hex(checksum) + content_data = request_content(query_string, max_size=None, re_encode=re_encode) filename = request.GET.get("filename", None) if not filename: @@ -187,48 +184,44 @@ The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/` """ - try: - algo, checksum = query.parse_hash(query_string) - checksum = hash_to_hex(checksum) - content_data = request_content(query_string, raise_if_unavailable=False) - origin_url = request.GET.get("origin_url") - selected_language = request.GET.get("language") - if not origin_url: - origin_url = request.GET.get("origin") - snapshot_id = request.GET.get("snapshot") - path = request.GET.get("path") - snapshot_context = None - if origin_url is not None or snapshot_id is not None: - try: - snapshot_context = get_snapshot_context( - origin_url=origin_url, - snapshot_id=snapshot_id, - branch_name=request.GET.get("branch"), - release_name=request.GET.get("release"), - revision_id=request.GET.get("revision"), - path=path, - browse_context=CONTENT, + algo, checksum = query.parse_hash(query_string) + checksum = hash_to_hex(checksum) + content_data = request_content(query_string, raise_if_unavailable=False) + origin_url = request.GET.get("origin_url") + selected_language = request.GET.get("language") + if not origin_url: + origin_url = request.GET.get("origin") + snapshot_id = request.GET.get("snapshot") + path = request.GET.get("path") + snapshot_context = None + if origin_url is not None or snapshot_id is not None: + try: + snapshot_context = get_snapshot_context( + origin_url=origin_url, + snapshot_id=snapshot_id, + branch_name=request.GET.get("branch"), + release_name=request.GET.get("release"), + revision_id=request.GET.get("revision"), + path=path, + browse_context=CONTENT, + ) + except NotFoundExc as e: + if str(e).startswith("Origin"): + raw_cnt_url = reverse( + "browse-content", url_args={"query_string": query_string} ) - except NotFoundExc as e: - if str(e).startswith("Origin"): - raw_cnt_url = reverse( - "browse-content", url_args={"query_string": query_string} - ) - error_message = ( - "The Software Heritage archive has a content " - "with the hash you provided but the origin " - "mentioned in your request appears broken: %s. " - "Please check the URL and try again.\n\n" - "Nevertheless, you can still browse the content " - "without origin information: %s" - % (gen_link(origin_url), gen_link(raw_cnt_url)) - ) - raise NotFoundExc(error_message) - else: - raise e - except Exception as exc: - return handle_view_exception(request, exc) - + error_message = ( + "The Software Heritage archive has a content " + "with the hash you provided but the origin " + "mentioned in your request appears broken: %s. " + "Please check the URL and try again.\n\n" + "Nevertheless, you can still browse the content " + "without origin information: %s" + % (gen_link(origin_url), gen_link(raw_cnt_url)) + ) + raise NotFoundExc(error_message) + else: + raise e content = None language = None mimetype = None @@ -288,11 +281,8 @@ breadcrumbs.append({"name": filename, "url": None}) if path and root_dir != path: - try: - dir_info = archive.lookup_directory_with_path(root_dir, path) - directory_id = dir_info["target"] - except Exception as exc: - return handle_view_exception(request, exc) + dir_info = archive.lookup_directory_with_path(root_dir, path) + directory_id = dir_info["target"] elif root_dir != path: directory_id = root_dir else: diff --git a/swh/web/browse/views/directory.py b/swh/web/browse/views/directory.py --- a/swh/web/browse/views/directory.py +++ b/swh/web/browse/views/directory.py @@ -16,7 +16,7 @@ from swh.web.browse.snapshot_context import get_snapshot_context from swh.web.browse.utils import gen_link, get_directory_entries, get_readme_to_display from swh.web.common import archive -from swh.web.common.exc import NotFoundExc, handle_view_exception +from swh.web.common.exc import NotFoundExc from swh.web.common.identifiers import get_swhids_info from swh.web.common.typing import DirectoryMetadata, SWHObjectInfo from swh.web.common.utils import gen_path_info, reverse, swh_object_icons @@ -24,46 +24,43 @@ def _directory_browse(request, sha1_git, path=None): root_sha1_git = sha1_git - try: - if path: - dir_info = archive.lookup_directory_with_path(sha1_git, path) - sha1_git = dir_info["target"] - - dirs, files = get_directory_entries(sha1_git) - origin_url = request.GET.get("origin_url") - if not origin_url: - origin_url = request.GET.get("origin") - snapshot_id = request.GET.get("snapshot") - snapshot_context = None - if origin_url is not None or snapshot_id is not None: - try: - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - branch_name=request.GET.get("branch"), - release_name=request.GET.get("release"), - revision_id=request.GET.get("revision"), - path=path, + if path: + dir_info = archive.lookup_directory_with_path(sha1_git, path) + sha1_git = dir_info["target"] + + dirs, files = get_directory_entries(sha1_git) + origin_url = request.GET.get("origin_url") + if not origin_url: + origin_url = request.GET.get("origin") + snapshot_id = request.GET.get("snapshot") + snapshot_context = None + if origin_url is not None or snapshot_id is not None: + try: + snapshot_context = get_snapshot_context( + snapshot_id=snapshot_id, + origin_url=origin_url, + branch_name=request.GET.get("branch"), + release_name=request.GET.get("release"), + revision_id=request.GET.get("revision"), + path=path, + ) + except NotFoundExc as e: + if str(e).startswith("Origin"): + raw_dir_url = reverse( + "browse-directory", url_args={"sha1_git": sha1_git} ) - except NotFoundExc as e: - if str(e).startswith("Origin"): - raw_dir_url = reverse( - "browse-directory", url_args={"sha1_git": sha1_git} - ) - error_message = ( - "The Software Heritage archive has a directory " - "with the hash you provided but the origin " - "mentioned in your request appears broken: %s. " - "Please check the URL and try again.\n\n" - "Nevertheless, you can still browse the directory " - "without origin information: %s" - % (gen_link(origin_url), gen_link(raw_dir_url)) - ) - raise NotFoundExc(error_message) - else: - raise e - except Exception as exc: - return handle_view_exception(request, exc) + error_message = ( + "The Software Heritage archive has a directory " + "with the hash you provided but the origin " + "mentioned in your request appears broken: %s. " + "Please check the URL and try again.\n\n" + "Nevertheless, you can still browse the directory " + "without origin information: %s" + % (gen_link(origin_url), gen_link(raw_dir_url)) + ) + raise NotFoundExc(error_message) + else: + raise e path_info = gen_path_info(path) diff --git a/swh/web/browse/views/origin.py b/swh/web/browse/views/origin.py --- a/swh/web/browse/views/origin.py +++ b/swh/web/browse/views/origin.py @@ -15,7 +15,7 @@ get_snapshot_context, ) from swh.web.common import archive -from swh.web.common.exc import BadInputExc, handle_view_exception +from swh.web.common.exc import BadInputExc from swh.web.common.origin_visits import get_origin_visits from swh.web.common.utils import format_utc_iso_date, parse_iso8601_date_to_utc, reverse @@ -227,16 +227,12 @@ def _origin_visits_browse(request, origin_url): - try: + if origin_url is None: + raise BadInputExc("An origin URL must be provided as query parameter.") - if origin_url is None: - raise BadInputExc("An origin URL must be provided as query parameter.") - - origin_info = archive.lookup_origin({"url": origin_url}) - origin_visits = get_origin_visits(origin_info) - snapshot_context = get_snapshot_context(origin_url=origin_url) - except Exception as exc: - return handle_view_exception(request, exc) + origin_info = archive.lookup_origin({"url": origin_url}) + origin_visits = get_origin_visits(origin_info) + snapshot_context = get_snapshot_context(origin_url=origin_url) for i, visit in enumerate(origin_visits): url_date = format_utc_iso_date(visit["date"], "%Y-%m-%dT%H:%M:%SZ") diff --git a/swh/web/browse/views/release.py b/swh/web/browse/views/release.py --- a/swh/web/browse/views/release.py +++ b/swh/web/browse/views/release.py @@ -20,7 +20,7 @@ gen_snapshot_link, ) from swh.web.common import archive -from swh.web.common.exc import NotFoundExc, handle_view_exception +from swh.web.common.exc import NotFoundExc from swh.web.common.identifiers import get_swhids_info from swh.web.common.typing import ReleaseMetadata, SWHObjectInfo from swh.web.common.utils import format_utc_iso_date, reverse @@ -38,43 +38,40 @@ The url that points to it is :http:get:`/browse/release/(sha1_git)/`. """ - try: - release = archive.lookup_release(sha1_git) - snapshot_context = {} - origin_info = None - snapshot_id = request.GET.get("snapshot_id") - if not snapshot_id: - snapshot_id = request.GET.get("snapshot") - origin_url = request.GET.get("origin_url") - if not origin_url: - origin_url = request.GET.get("origin") - timestamp = request.GET.get("timestamp") - visit_id = request.GET.get("visit_id") - if origin_url: - try: - snapshot_context = get_snapshot_context( - snapshot_id, origin_url, timestamp, visit_id - ) - except NotFoundExc as e: - raw_rel_url = reverse("browse-release", url_args={"sha1_git": sha1_git}) - error_message = ( - "The Software Heritage archive has a release " - "with the hash you provided but the origin " - "mentioned in your request appears broken: %s. " - "Please check the URL and try again.\n\n" - "Nevertheless, you can still browse the release " - "without origin information: %s" - % (gen_link(origin_url), gen_link(raw_rel_url)) - ) - if str(e).startswith("Origin"): - raise NotFoundExc(error_message) - else: - raise e - origin_info = snapshot_context["origin_info"] - elif snapshot_id: - snapshot_context = get_snapshot_context(snapshot_id) - except Exception as exc: - return handle_view_exception(request, exc) + release = archive.lookup_release(sha1_git) + snapshot_context = {} + origin_info = None + snapshot_id = request.GET.get("snapshot_id") + if not snapshot_id: + snapshot_id = request.GET.get("snapshot") + origin_url = request.GET.get("origin_url") + if not origin_url: + origin_url = request.GET.get("origin") + timestamp = request.GET.get("timestamp") + visit_id = request.GET.get("visit_id") + if origin_url: + try: + snapshot_context = get_snapshot_context( + snapshot_id, origin_url, timestamp, visit_id + ) + except NotFoundExc as e: + raw_rel_url = reverse("browse-release", url_args={"sha1_git": sha1_git}) + error_message = ( + "The Software Heritage archive has a release " + "with the hash you provided but the origin " + "mentioned in your request appears broken: %s. " + "Please check the URL and try again.\n\n" + "Nevertheless, you can still browse the release " + "without origin information: %s" + % (gen_link(origin_url), gen_link(raw_rel_url)) + ) + if str(e).startswith("Origin"): + raise NotFoundExc(error_message) + else: + raise e + origin_info = snapshot_context["origin_info"] + elif snapshot_id: + snapshot_context = get_snapshot_context(snapshot_id) target_url = None if release["target_type"] == REVISION: diff --git a/swh/web/browse/views/revision.py b/swh/web/browse/views/revision.py --- a/swh/web/browse/views/revision.py +++ b/swh/web/browse/views/revision.py @@ -31,7 +31,7 @@ request_content, ) from swh.web.common import archive -from swh.web.common.exc import NotFoundExc, handle_view_exception +from swh.web.common.exc import NotFoundExc from swh.web.common.identifiers import get_swhids_info from swh.web.common.typing import RevisionMetadata, SWHObjectInfo from swh.web.common.utils import ( @@ -158,20 +158,17 @@ """ Browse internal endpoint to compute revision diff """ - try: - revision = archive.lookup_revision(sha1_git) - snapshot_context = None - origin_url = request.GET.get("origin_url", None) - if not origin_url: - origin_url = request.GET.get("origin", None) - timestamp = request.GET.get("timestamp", None) - visit_id = request.GET.get("visit_id", None) - if origin_url: - snapshot_context = get_snapshot_context( - origin_url=origin_url, timestamp=timestamp, visit_id=visit_id - ) - except Exception as exc: - return handle_view_exception(request, exc) + revision = archive.lookup_revision(sha1_git) + snapshot_context = None + origin_url = request.GET.get("origin_url", None) + if not origin_url: + origin_url = request.GET.get("origin", None) + timestamp = request.GET.get("timestamp", None) + visit_id = request.GET.get("visit_id", None) + if origin_url: + snapshot_context = get_snapshot_context( + origin_url=origin_url, timestamp=timestamp, visit_id=visit_id + ) changes = archive.diff_revision(sha1_git) changes_msg = _gen_revision_changes_list(revision, changes, snapshot_context) @@ -199,51 +196,48 @@ The url that points to it is :http:get:`/browse/revision/(sha1_git)/log/` """ - try: - origin_url = request.GET.get("origin_url") - snapshot_id = request.GET.get("snapshot") - snapshot_context = None - if origin_url or snapshot_id: - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - timestamp=request.GET.get("timestamp"), - visit_id=request.GET.get("visit_id"), - branch_name=request.GET.get("branch"), - release_name=request.GET.get("release"), - revision_id=sha1_git, - ) - per_page = int(request.GET.get("per_page", NB_LOG_ENTRIES)) - offset = int(request.GET.get("offset", 0)) - revs_ordering = request.GET.get("revs_ordering", "committer_date") - session_key = "rev_%s_log_ordering_%s" % (sha1_git, revs_ordering) - rev_log_session = request.session.get(session_key, None) - rev_log = [] - revs_walker_state = None - if rev_log_session: - rev_log = rev_log_session["rev_log"] - revs_walker_state = rev_log_session["revs_walker_state"] - - if len(rev_log) < offset + per_page: - revs_walker = archive.get_revisions_walker( - revs_ordering, - sha1_git, - max_revs=offset + per_page + 1, - state=revs_walker_state, - ) + origin_url = request.GET.get("origin_url") + snapshot_id = request.GET.get("snapshot") + snapshot_context = None + if origin_url or snapshot_id: + snapshot_context = get_snapshot_context( + snapshot_id=snapshot_id, + origin_url=origin_url, + timestamp=request.GET.get("timestamp"), + visit_id=request.GET.get("visit_id"), + branch_name=request.GET.get("branch"), + release_name=request.GET.get("release"), + revision_id=sha1_git, + ) + per_page = int(request.GET.get("per_page", NB_LOG_ENTRIES)) + offset = int(request.GET.get("offset", 0)) + revs_ordering = request.GET.get("revs_ordering", "committer_date") + session_key = "rev_%s_log_ordering_%s" % (sha1_git, revs_ordering) + rev_log_session = request.session.get(session_key, None) + rev_log = [] + revs_walker_state = None + if rev_log_session: + rev_log = rev_log_session["rev_log"] + revs_walker_state = rev_log_session["revs_walker_state"] + + if len(rev_log) < offset + per_page: + revs_walker = archive.get_revisions_walker( + revs_ordering, + sha1_git, + max_revs=offset + per_page + 1, + state=revs_walker_state, + ) - rev_log += [rev["id"] for rev in revs_walker] - revs_walker_state = revs_walker.export_state() + rev_log += [rev["id"] for rev in revs_walker] + revs_walker_state = revs_walker.export_state() - revs = rev_log[offset : offset + per_page] - revision_log = archive.lookup_revision_multiple(revs) + revs = rev_log[offset : offset + per_page] + revision_log = archive.lookup_revision_multiple(revs) - request.session[session_key] = { - "rev_log": rev_log, - "revs_walker_state": revs_walker_state, - } - except Exception as exc: - return handle_view_exception(request, exc) + request.session[session_key] = { + "rev_log": rev_log, + "revs_walker_state": revs_walker_state, + } revs_ordering = request.GET.get("revs_ordering", "") @@ -309,70 +303,65 @@ The url that points to it is :http:get:`/browse/revision/(sha1_git)/`. """ - try: - revision = archive.lookup_revision(sha1_git) - origin_info = None - snapshot_context = None - origin_url = request.GET.get("origin_url") - if not origin_url: - origin_url = request.GET.get("origin") - timestamp = request.GET.get("timestamp") - visit_id = request.GET.get("visit_id") - snapshot_id = request.GET.get("snapshot_id") - if not snapshot_id: - snapshot_id = request.GET.get("snapshot") - path = request.GET.get("path") - dir_id = None - dirs, files = None, None - content_data = {} - if origin_url: - try: - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - timestamp=timestamp, - visit_id=visit_id, - branch_name=request.GET.get("branch"), - release_name=request.GET.get("release"), - revision_id=sha1_git, - ) - except NotFoundExc as e: - raw_rev_url = reverse( - "browse-revision", url_args={"sha1_git": sha1_git} - ) - error_message = ( - "The Software Heritage archive has a revision " - "with the hash you provided but the origin " - "mentioned in your request appears broken: %s. " - "Please check the URL and try again.\n\n" - "Nevertheless, you can still browse the revision " - "without origin information: %s" - % (gen_link(origin_url), gen_link(raw_rev_url)) - ) - if str(e).startswith("Origin"): - raise NotFoundExc(error_message) - else: - raise e - origin_info = snapshot_context["origin_info"] - snapshot_id = snapshot_context["snapshot_id"] - elif snapshot_id: - snapshot_context = get_snapshot_context(snapshot_id) - - if path: - file_info = archive.lookup_directory_with_path(revision["directory"], path) - if file_info["type"] == "dir": - dir_id = file_info["target"] + revision = archive.lookup_revision(sha1_git) + origin_info = None + snapshot_context = None + origin_url = request.GET.get("origin_url") + if not origin_url: + origin_url = request.GET.get("origin") + timestamp = request.GET.get("timestamp") + visit_id = request.GET.get("visit_id") + snapshot_id = request.GET.get("snapshot_id") + if not snapshot_id: + snapshot_id = request.GET.get("snapshot") + path = request.GET.get("path") + dir_id = None + dirs, files = None, None + content_data = {} + if origin_url: + try: + snapshot_context = get_snapshot_context( + snapshot_id=snapshot_id, + origin_url=origin_url, + timestamp=timestamp, + visit_id=visit_id, + branch_name=request.GET.get("branch"), + release_name=request.GET.get("release"), + revision_id=sha1_git, + ) + except NotFoundExc as e: + raw_rev_url = reverse("browse-revision", url_args={"sha1_git": sha1_git}) + error_message = ( + "The Software Heritage archive has a revision " + "with the hash you provided but the origin " + "mentioned in your request appears broken: %s. " + "Please check the URL and try again.\n\n" + "Nevertheless, you can still browse the revision " + "without origin information: %s" + % (gen_link(origin_url), gen_link(raw_rev_url)) + ) + if str(e).startswith("Origin"): + raise NotFoundExc(error_message) else: - query_string = "sha1_git:" + file_info["target"] - content_data = request_content(query_string, raise_if_unavailable=False) + raise e + origin_info = snapshot_context["origin_info"] + snapshot_id = snapshot_context["snapshot_id"] + elif snapshot_id: + snapshot_context = get_snapshot_context(snapshot_id) + + if path: + file_info = archive.lookup_directory_with_path(revision["directory"], path) + if file_info["type"] == "dir": + dir_id = file_info["target"] else: - dir_id = revision["directory"] + query_string = "sha1_git:" + file_info["target"] + content_data = request_content(query_string, raise_if_unavailable=False) + else: + dir_id = revision["directory"] - if dir_id: - path = "" if path is None else (path + "/") - dirs, files = get_directory_entries(dir_id) - except Exception as exc: - return handle_view_exception(request, exc) + if dir_id: + path = "" if path is None else (path + "/") + dirs, files = get_directory_entries(dir_id) revision_metadata = RevisionMetadata( object_type=REVISION, diff --git a/swh/web/common/exc.py b/swh/web/common/exc.py --- a/swh/web/common/exc.py +++ b/swh/web/common/exc.py @@ -7,7 +7,6 @@ import sentry_sdk -from django.http import HttpResponse from django.shortcuts import render from django.utils.html import escape from django.utils.safestring import mark_safe @@ -126,7 +125,7 @@ return _generate_error_page(request, 500, error_description) -def handle_view_exception(request, exc, html_response=True): +def handle_view_exception(request, exc): """ Function used to generate an error page when an exception was raised inside a swh-web browse view. @@ -142,9 +141,5 @@ error_code = 403 if isinstance(exc, NotFoundExc): error_code = 404 - if html_response: - return _generate_error_page(request, error_code, error_description) - else: - return HttpResponse( - error_description, content_type="text/plain", status=error_code - ) + + return _generate_error_page(request, error_code, error_description) diff --git a/swh/web/common/middlewares.py b/swh/web/common/middlewares.py --- a/swh/web/common/middlewares.py +++ b/swh/web/common/middlewares.py @@ -3,9 +3,11 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information + from htmlmin import minify import sentry_sdk +from swh.web.common.exc import handle_view_exception from swh.web.common.utils import prettify_html @@ -71,3 +73,19 @@ if "RateLimit-Reset" in request.META: resp["X-RateLimit-Reset"] = request.META["RateLimit-Reset"] return resp + + +class ExceptionMiddleware(object): + """ + Django middleware for handling uncaught exception raised when + processing a view. + """ + + def __init__(self, get_response=None): + self.get_response = get_response + + def __call__(self, request): + return self.get_response(request) + + def process_exception(self, request, exception): + return handle_view_exception(request, exception) diff --git a/swh/web/settings/common.py b/swh/web/settings/common.py --- a/swh/web/settings/common.py +++ b/swh/web/settings/common.py @@ -61,6 +61,7 @@ "django.contrib.messages.middleware.MessageMiddleware", "django.middleware.clickjacking.XFrameOptionsMiddleware", "swh.web.common.middlewares.ThrottlingHeadersMiddleware", + "swh.web.common.middlewares.ExceptionMiddleware", ] # Compress all assets (static ones and dynamically generated html) diff --git a/swh/web/tests/common/test_middlewares.py b/swh/web/tests/common/test_middlewares.py new file mode 100644 --- /dev/null +++ b/swh/web/tests/common/test_middlewares.py @@ -0,0 +1,41 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from hypothesis import given +import pytest + +from django.test import modify_settings + +from swh.web.common.utils import reverse +from swh.web.tests.strategies import snapshot + + +@modify_settings( + MIDDLEWARE={"remove": ["swh.web.common.middlewares.ExceptionMiddleware"]} +) +@given(snapshot()) +def test_exception_middleware_disabled(client, mocker, snapshot): + mock_browse_snapshot_directory = mocker.patch( + "swh.web.browse.views.snapshot.browse_snapshot_directory" + ) + mock_browse_snapshot_directory.side_effect = Exception("Something went wrong") + + url = reverse("browse-snapshot-directory", url_args={"snapshot_id": snapshot}) + + with pytest.raises(Exception, match="Something went wrong"): + client.get(url) + + +@given(snapshot()) +def test_exception_middleware_enabled(client, mocker, snapshot): + mock_browse_snapshot_directory = mocker.patch( + "swh.web.browse.views.snapshot.browse_snapshot_directory" + ) + mock_browse_snapshot_directory.side_effect = Exception("Something went wrong") + + url = reverse("browse-snapshot-directory", url_args={"snapshot_id": snapshot}) + + resp = client.get(url) + assert resp.status_code == 500