diff --git a/swh/web/auth/views.py b/swh/web/auth/views.py index c89e82b1..9e3d8615 100644 --- a/swh/web/auth/views.py +++ b/swh/web/auth/views.py @@ -1,261 +1,243 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json from typing import Any, Dict, cast import uuid from cryptography.fernet import InvalidToken from keycloak.exceptions import KeycloakError import sentry_sdk from django.conf.urls import url from django.contrib.auth import authenticate, login, logout from django.core.cache import cache from django.core.paginator import Paginator from django.http import HttpRequest from django.http.response import ( HttpResponse, HttpResponseForbidden, HttpResponseRedirect, HttpResponseServerError, JsonResponse, ) from django.views.decorators.http import require_http_methods from swh.web.auth.models import OIDCUser, OIDCUserOfflineTokens from swh.web.auth.utils import ( decrypt_data, encrypt_data, gen_oidc_pkce_codes, get_oidc_client, ) -from swh.web.common.exc import BadInputExc, handle_view_exception +from swh.web.common.exc import BadInputExc from swh.web.common.utils import reverse def oidc_login(request: HttpRequest) -> HttpResponse: """ Django view to initiate login process using OpenID Connect. """ # generate a CSRF token state = str(uuid.uuid4()) redirect_uri = reverse("oidc-login-complete", request=request) code_verifier, code_challenge = gen_oidc_pkce_codes() request.session["login_data"] = { "code_verifier": code_verifier, "state": state, "redirect_uri": redirect_uri, "next_path": request.GET.get("next_path", ""), "prompt": request.GET.get("prompt", ""), } authorization_url_params = { "state": state, "code_challenge": code_challenge, "code_challenge_method": "S256", "scope": "openid", "prompt": request.GET.get("prompt", ""), } - try: - oidc_client = get_oidc_client() - authorization_url = oidc_client.authorization_url( - redirect_uri, **authorization_url_params - ) + oidc_client = get_oidc_client() + authorization_url = oidc_client.authorization_url( + redirect_uri, **authorization_url_params + ) - return HttpResponseRedirect(authorization_url) - except Exception as e: - return handle_view_exception(request, e) + return HttpResponseRedirect(authorization_url) def oidc_login_complete(request: HttpRequest) -> HttpResponse: """ Django view to finalize login process using OpenID Connect. """ - try: - if "login_data" not in request.session: - raise Exception("Login process has not been initialized.") + if "login_data" not in request.session: + raise Exception("Login process has not been initialized.") - login_data = request.session["login_data"] - next_path = login_data["next_path"] or request.build_absolute_uri("/") + login_data = request.session["login_data"] + next_path = login_data["next_path"] or request.build_absolute_uri("/") - if "error" in request.GET: - if login_data["prompt"] == "none": - # Silent login failed because OIDC session expired. - # Redirect to logout page and inform user. - logout(request) - logout_url = reverse( - "logout", query_params={"next_path": next_path, "remote_user": 1} - ) - return HttpResponseRedirect(logout_url) - return HttpResponseServerError(request.GET["error"]) + if "error" in request.GET: + if login_data["prompt"] == "none": + # Silent login failed because OIDC session expired. + # Redirect to logout page and inform user. + logout(request) + logout_url = reverse( + "logout", query_params={"next_path": next_path, "remote_user": 1} + ) + return HttpResponseRedirect(logout_url) + return HttpResponseServerError(request.GET["error"]) - if "code" not in request.GET or "state" not in request.GET: - raise BadInputExc("Missing query parameters for authentication.") + if "code" not in request.GET or "state" not in request.GET: + raise BadInputExc("Missing query parameters for authentication.") - # get CSRF token returned by OIDC server - state = request.GET["state"] + # get CSRF token returned by OIDC server + state = request.GET["state"] - if state != login_data["state"]: - raise BadInputExc("Wrong CSRF token, aborting login process.") + if state != login_data["state"]: + raise BadInputExc("Wrong CSRF token, aborting login process.") - user = authenticate( - request=request, - code=request.GET["code"], - code_verifier=login_data["code_verifier"], - redirect_uri=login_data["redirect_uri"], - ) + user = authenticate( + request=request, + code=request.GET["code"], + code_verifier=login_data["code_verifier"], + redirect_uri=login_data["redirect_uri"], + ) - if user is None: - raise Exception("User authentication failed.") + if user is None: + raise Exception("User authentication failed.") - login(request, user) + login(request, user) - return HttpResponseRedirect(next_path) - except Exception as e: - return handle_view_exception(request, e) + return HttpResponseRedirect(next_path) def oidc_logout(request: HttpRequest) -> HttpResponse: """ Django view to logout using OpenID Connect. """ - try: - user = request.user - logout(request) - if hasattr(user, "refresh_token"): - oidc_client = get_oidc_client() - user = cast(OIDCUser, user) - refresh_token = cast(str, user.refresh_token) - # end OpenID Connect session - oidc_client.logout(refresh_token) - # remove user data from cache - cache.delete(f"oidc_user_{user.id}") + user = request.user + logout(request) + if hasattr(user, "refresh_token"): + oidc_client = get_oidc_client() + user = cast(OIDCUser, user) + refresh_token = cast(str, user.refresh_token) + # end OpenID Connect session + oidc_client.logout(refresh_token) + # remove user data from cache + cache.delete(f"oidc_user_{user.id}") - logout_url = reverse("logout", query_params={"remote_user": 1}) - return HttpResponseRedirect(request.build_absolute_uri(logout_url)) - except Exception as e: - return handle_view_exception(request, e) + logout_url = reverse("logout", query_params={"remote_user": 1}) + return HttpResponseRedirect(request.build_absolute_uri(logout_url)) @require_http_methods(["POST"]) def oidc_generate_bearer_token(request: HttpRequest) -> HttpResponse: if not request.user.is_authenticated or not isinstance(request.user, OIDCUser): return HttpResponseForbidden() try: data = json.loads(request.body.decode("utf-8")) user = cast(OIDCUser, request.user) oidc_client = get_oidc_client() token = oidc_client.offline_token(user.username, data["password"]) password = data["password"].encode() salt = user.sub.encode() encrypted_token = encrypt_data(token.encode(), password, salt) OIDCUserOfflineTokens.objects.create( user_id=str(user.id), offline_token=encrypted_token ).save() return HttpResponse(token, content_type="text/plain") except KeycloakError as e: sentry_sdk.capture_exception(e) return HttpResponse(status=e.response_code or 500) - except Exception as e: - sentry_sdk.capture_exception(e) - return HttpResponseServerError(str(e)) def oidc_list_bearer_tokens(request: HttpRequest) -> HttpResponse: if not request.user.is_authenticated or not isinstance(request.user, OIDCUser): return HttpResponseForbidden() tokens = OIDCUserOfflineTokens.objects.filter(user_id=str(request.user.id)) tokens = tokens.order_by("-creation_date") length = int(request.GET["length"]) page = int(request.GET["start"]) / length + 1 paginator = Paginator(tokens, length) tokens_data = [ {"id": t.id, "creation_date": t.creation_date.isoformat()} for t in paginator.page(int(page)).object_list ] table_data: Dict[str, Any] = {} table_data["recordsTotal"] = len(tokens_data) table_data["draw"] = int(request.GET["draw"]) table_data["data"] = tokens_data table_data["recordsFiltered"] = len(tokens_data) return JsonResponse(table_data) @require_http_methods(["POST"]) def oidc_get_bearer_token(request: HttpRequest) -> HttpResponse: if not request.user.is_authenticated or not isinstance(request.user, OIDCUser): return HttpResponseForbidden() try: data = json.loads(request.body.decode("utf-8")) user = cast(OIDCUser, request.user) token_data = OIDCUserOfflineTokens.objects.get(id=data["token_id"]) password = data["password"].encode() salt = user.sub.encode() decrypted_token = decrypt_data(token_data.offline_token, password, salt) return HttpResponse(decrypted_token.decode("ascii"), content_type="text/plain") except InvalidToken: return HttpResponse(status=401) - except Exception as e: - sentry_sdk.capture_exception(e) - return HttpResponseServerError(str(e)) @require_http_methods(["POST"]) def oidc_revoke_bearer_tokens(request: HttpRequest) -> HttpResponse: if not request.user.is_authenticated or not isinstance(request.user, OIDCUser): return HttpResponseForbidden() try: data = json.loads(request.body.decode("utf-8")) user = cast(OIDCUser, request.user) for token_id in data["token_ids"]: token_data = OIDCUserOfflineTokens.objects.get(id=token_id) password = data["password"].encode() salt = user.sub.encode() decrypted_token = decrypt_data(token_data.offline_token, password, salt) oidc_client = get_oidc_client() oidc_client.logout(decrypted_token.decode("ascii")) token_data.delete() return HttpResponse(status=200) except InvalidToken: return HttpResponse(status=401) - except Exception as e: - sentry_sdk.capture_exception(e) - return HttpResponseServerError(str(e)) urlpatterns = [ url(r"^oidc/login/$", oidc_login, name="oidc-login"), url(r"^oidc/login-complete/$", oidc_login_complete, name="oidc-login-complete"), url(r"^oidc/logout/$", oidc_logout, name="oidc-logout"), url( r"^oidc/generate-bearer-token/$", oidc_generate_bearer_token, name="oidc-generate-bearer-token", ), url( r"^oidc/list-bearer-token/$", oidc_list_bearer_tokens, name="oidc-list-bearer-tokens", ), url( r"^oidc/get-bearer-token/$", oidc_get_bearer_token, name="oidc-get-bearer-token", ), url( r"^oidc/revoke-bearer-tokens/$", oidc_revoke_bearer_tokens, name="oidc-revoke-bearer-tokens", ), ] diff --git a/swh/web/browse/identifiers.py b/swh/web/browse/identifiers.py index 3e3c129b..a9e75e61 100644 --- a/swh/web/browse/identifiers.py +++ b/swh/web/browse/identifiers.py @@ -1,23 +1,19 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import redirect -from swh.web.common.exc import handle_view_exception from swh.web.common.identifiers import resolve_swhid def swhid_browse(request, swhid): """ Django view enabling to browse the archive using :ref:``. The url that points to it is :http:get:`/(swhid)/`. """ - try: - swhid_resolved = resolve_swhid(swhid, query_params=request.GET) - except Exception as exc: - return handle_view_exception(request, exc) + swhid_resolved = resolve_swhid(swhid, query_params=request.GET) return redirect(swhid_resolved["browse_url"]) diff --git a/swh/web/browse/snapshot_context.py b/swh/web/browse/snapshot_context.py index 69ec005e..4395aeb2 100644 --- a/swh/web/browse/snapshot_context.py +++ b/swh/web/browse/snapshot_context.py @@ -1,1452 +1,1427 @@ # Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information # Utility module for browsing the archive in a snapshot context. from collections import defaultdict from copy import copy from typing import Any, Dict, List, Optional, Tuple from django.core.cache import cache from django.shortcuts import render from django.template.defaultfilters import filesizeformat from django.utils.html import escape from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT, swhid from swh.model.model import Snapshot from swh.web.browse.utils import ( content_display_max_size, format_log_entries, gen_content_link, gen_directory_link, gen_release_link, gen_revision_link, gen_revision_log_link, gen_revision_url, gen_snapshot_link, get_directory_entries, get_readme_to_display, prepare_content_for_display, request_content, ) from swh.web.common import archive, highlightjs -from swh.web.common.exc import BadInputExc, NotFoundExc, handle_view_exception +from swh.web.common.exc import BadInputExc, NotFoundExc from swh.web.common.identifiers import get_swhids_info from swh.web.common.origin_visits import get_origin_visit from swh.web.common.typing import ( ContentMetadata, DirectoryMetadata, OriginInfo, SnapshotBranchInfo, SnapshotContext, SnapshotReleaseInfo, SWHObjectInfo, ) from swh.web.common.utils import ( format_utc_iso_date, gen_path_info, reverse, swh_object_icons, ) from swh.web.config import get_config _empty_snapshot_id = Snapshot(branches={}).id.hex() def _get_branch(branches, branch_name, snapshot_id): """ Utility function to get a specific branch from a branches list. Its purpose is to get the default HEAD branch as some software origin (e.g those with svn type) does not have it. In that latter case, check if there is a master branch instead and returns it. """ filtered_branches = [b for b in branches if b["name"] == branch_name] if filtered_branches: return filtered_branches[0] elif branch_name == "HEAD": filtered_branches = [b for b in branches if b["name"].endswith("master")] if filtered_branches: return filtered_branches[0] elif branches: return branches[0] else: # case where a large branches list has been truncated snp = archive.lookup_snapshot( snapshot_id, branches_from=branch_name, branches_count=1, target_types=["revision", "alias"], ) snp_branch, _ = process_snapshot_branches(snp) if snp_branch and snp_branch[0]["name"] == branch_name: branches.append(snp_branch[0]) return snp_branch[0] def _get_release(releases, release_name, snapshot_id): """ Utility function to get a specific release from a releases list. Returns None if the release can not be found in the list. """ filtered_releases = [r for r in releases if r["name"] == release_name] if filtered_releases: return filtered_releases[0] else: # case where a large branches list has been truncated try: # git origins have specific branches for releases snp = archive.lookup_snapshot( snapshot_id, branches_from=f"refs/tags/{release_name}", branches_count=1, target_types=["release"], ) except NotFoundExc: snp = archive.lookup_snapshot( snapshot_id, branches_from=release_name, branches_count=1, target_types=["release"], ) _, snp_release = process_snapshot_branches(snp) if snp_release and snp_release[0]["name"] == release_name: releases.append(snp_release[0]) return snp_release[0] def _branch_not_found( branch_type, branch, snapshot_id, snapshot_sizes, origin_info, timestamp, visit_id ): """ Utility function to raise an exception when a specified branch/release can not be found. """ if branch_type == "branch": branch_type = "Branch" branch_type_plural = "branches" target_type = "revision" else: branch_type = "Release" branch_type_plural = "releases" target_type = "release" if snapshot_id and snapshot_sizes[target_type] == 0: msg = "Snapshot with id %s has an empty list" " of %s!" % ( snapshot_id, branch_type_plural, ) elif snapshot_id: msg = "%s %s for snapshot with id %s" " not found!" % ( branch_type, branch, snapshot_id, ) elif visit_id and snapshot_sizes[target_type] == 0: msg = ( "Origin with url %s" " for visit with id %s has an empty list" " of %s!" % (origin_info["url"], visit_id, branch_type_plural) ) elif visit_id: msg = ( "%s %s associated to visit with" " id %s for origin with url %s" " not found!" % (branch_type, branch, visit_id, origin_info["url"]) ) elif snapshot_sizes[target_type] == 0: msg = ( "Origin with url %s" " for visit with timestamp %s has an empty list" " of %s!" % (origin_info["url"], timestamp, branch_type_plural) ) else: msg = ( "%s %s associated to visit with" " timestamp %s for origin with " "url %s not found!" % (branch_type, branch, timestamp, origin_info["url"]) ) raise NotFoundExc(escape(msg)) def process_snapshot_branches( snapshot: Dict[str, Any] ) -> Tuple[List[SnapshotBranchInfo], List[SnapshotReleaseInfo]]: """ Process a dictionary describing snapshot branches: extract those targeting revisions and releases, put them in two different lists, then sort those lists in lexicographical order of the branches' names. Args: snapshot: A dict describing a snapshot as returned for instance by :func:`swh.web.common.archive.lookup_snapshot` Returns: A tuple whose first member is the sorted list of branches targeting revisions and second member the sorted list of branches targeting releases """ snapshot_branches = snapshot["branches"] branches: Dict[str, SnapshotBranchInfo] = {} branch_aliases: Dict[str, str] = {} releases: Dict[str, SnapshotReleaseInfo] = {} revision_to_branch = defaultdict(set) revision_to_release = defaultdict(set) release_to_branch = defaultdict(set) for branch_name, target in snapshot_branches.items(): if not target: # FIXME: display branches with an unknown target anyway continue target_id = target["target"] target_type = target["target_type"] if target_type == "revision": branches[branch_name] = SnapshotBranchInfo( name=branch_name, revision=target_id, date=None, directory=None, message=None, url=None, ) revision_to_branch[target_id].add(branch_name) elif target_type == "release": release_to_branch[target_id].add(branch_name) elif target_type == "alias": branch_aliases[branch_name] = target_id # FIXME: handle pointers to other object types def _add_release_info(branch, release): releases[branch] = SnapshotReleaseInfo( name=release["name"], branch_name=branch, date=format_utc_iso_date(release["date"]), directory=None, id=release["id"], message=release["message"], target_type=release["target_type"], target=release["target"], url=None, ) def _add_branch_info(branch, revision): branches[branch] = SnapshotBranchInfo( name=branch, revision=revision["id"], directory=revision["directory"], date=format_utc_iso_date(revision["date"]), message=revision["message"], url=None, ) releases_info = archive.lookup_release_multiple(release_to_branch.keys()) for release in releases_info: if release is None: continue branches_to_update = release_to_branch[release["id"]] for branch in branches_to_update: _add_release_info(branch, release) if release["target_type"] == "revision": revision_to_release[release["target"]].update(branches_to_update) revisions = archive.lookup_revision_multiple( set(revision_to_branch.keys()) | set(revision_to_release.keys()) ) for revision in revisions: if not revision: continue for branch in revision_to_branch[revision["id"]]: _add_branch_info(branch, revision) for release_id in revision_to_release[revision["id"]]: releases[release_id]["directory"] = revision["directory"] for branch_alias, branch_target in branch_aliases.items(): if branch_target in branches: branches[branch_alias] = copy(branches[branch_target]) else: snp = archive.lookup_snapshot( snapshot["id"], branches_from=branch_target, branches_count=1 ) if snp and branch_target in snp["branches"]: if snp["branches"][branch_target] is None: continue target_type = snp["branches"][branch_target]["target_type"] target = snp["branches"][branch_target]["target"] if target_type == "revision": branches[branch_alias] = snp["branches"][branch_target] revision = archive.lookup_revision(target) _add_branch_info(branch_alias, revision) elif target_type == "release": release = archive.lookup_release(target) _add_release_info(branch_alias, release) if branch_alias in branches: branches[branch_alias]["name"] = branch_alias ret_branches = list(sorted(branches.values(), key=lambda b: b["name"])) ret_releases = list(sorted(releases.values(), key=lambda b: b["name"])) return ret_branches, ret_releases def get_snapshot_content( snapshot_id: str, ) -> Tuple[List[SnapshotBranchInfo], List[SnapshotReleaseInfo]]: """Returns the lists of branches and releases associated to a swh snapshot. That list is put in cache in order to speedup the navigation in the swh-web/browse ui. .. warning:: At most 1000 branches contained in the snapshot will be returned for performance reasons. Args: snapshot_id: hexadecimal representation of the snapshot identifier Returns: A tuple with two members. The first one is a list of dict describing the snapshot branches. The second one is a list of dict describing the snapshot releases. Raises: NotFoundExc if the snapshot does not exist """ cache_entry_id = "swh_snapshot_%s" % snapshot_id cache_entry = cache.get(cache_entry_id) if cache_entry: return cache_entry["branches"], cache_entry["releases"] branches: List[SnapshotBranchInfo] = [] releases: List[SnapshotReleaseInfo] = [] snapshot_content_max_size = get_config()["snapshot_content_max_size"] if snapshot_id: snapshot = archive.lookup_snapshot( snapshot_id, branches_count=snapshot_content_max_size ) branches, releases = process_snapshot_branches(snapshot) cache.set(cache_entry_id, {"branches": branches, "releases": releases,}) return branches, releases def get_origin_visit_snapshot( origin_info: OriginInfo, visit_ts: Optional[str] = None, visit_id: Optional[int] = None, snapshot_id: Optional[str] = None, ) -> Tuple[List[SnapshotBranchInfo], List[SnapshotReleaseInfo]]: """Returns the lists of branches and releases associated to an origin for a given visit. The visit is expressed by either: * a snapshot identifier * a timestamp, if no visit with that exact timestamp is found, the closest one from the provided timestamp will be used. If no visit parameter is provided, it returns the list of branches found for the latest visit. That list is put in cache in order to speedup the navigation in the swh-web/browse ui. .. warning:: At most 1000 branches contained in the snapshot will be returned for performance reasons. Args: origin_info: a dict filled with origin information visit_ts: an ISO 8601 datetime string to parse visit_id: visit id for disambiguation in case several visits have the same timestamp snapshot_id: if provided, visit associated to the snapshot will be processed Returns: A tuple with two members. The first one is a list of dict describing the origin branches for the given visit. The second one is a list of dict describing the origin releases for the given visit. Raises: NotFoundExc if the origin or its visit are not found """ visit_info = get_origin_visit(origin_info, visit_ts, visit_id, snapshot_id) return get_snapshot_content(visit_info["snapshot"]) def get_snapshot_context( snapshot_id: Optional[str] = None, origin_url: Optional[str] = None, timestamp: Optional[str] = None, visit_id: Optional[int] = None, branch_name: Optional[str] = None, release_name: Optional[str] = None, revision_id: Optional[str] = None, path: Optional[str] = None, browse_context: str = "directory", ) -> SnapshotContext: """ Utility function to compute relevant information when navigating the archive in a snapshot context. The snapshot is either referenced by its id or it will be retrieved from an origin visit. Args: snapshot_id: hexadecimal representation of a snapshot identifier origin_url: an origin_url timestamp: a datetime string for retrieving the closest visit of the origin visit_id: optional visit id for disambiguation in case of several visits with the same timestamp branch_name: optional branch name set when browsing the snapshot in that scope (will default to "HEAD" if not provided) release_name: optional release name set when browsing the snapshot in that scope revision_id: optional revision identifier set when browsing the snapshot in that scope path: optional path of the object currently browsed in the snapshot browse_context: indicates which type of object is currently browsed Returns: A dict filled with snapshot context information. Raises: swh.web.common.exc.NotFoundExc: if no snapshot is found for the visit of an origin. """ assert origin_url is not None or snapshot_id is not None origin_info = None visit_info = None url_args = {} query_params: Dict[str, Any] = {} origin_visits_url = None if origin_url: if visit_id is not None: query_params["visit_id"] = visit_id elif snapshot_id is not None: query_params["snapshot"] = snapshot_id origin_info = archive.lookup_origin({"url": origin_url}) visit_info = get_origin_visit(origin_info, timestamp, visit_id, snapshot_id) formatted_date = format_utc_iso_date(visit_info["date"]) visit_info["formatted_date"] = formatted_date snapshot_id = visit_info["snapshot"] if not snapshot_id: raise NotFoundExc( "No snapshot associated to the visit of origin " "%s on %s" % (escape(origin_url), formatted_date) ) # provided timestamp is not necessarily equals to the one # of the retrieved visit, so get the exact one in order # to use it in the urls generated below if timestamp: timestamp = visit_info["date"] branches, releases = get_origin_visit_snapshot( origin_info, timestamp, visit_id, snapshot_id ) query_params["origin_url"] = origin_info["url"] origin_visits_url = reverse( "browse-origin-visits", query_params={"origin_url": origin_info["url"]} ) if timestamp is not None: query_params["timestamp"] = format_utc_iso_date( timestamp, "%Y-%m-%dT%H:%M:%SZ" ) visit_url = reverse("browse-origin-directory", query_params=query_params) visit_info["url"] = visit_url branches_url = reverse("browse-origin-branches", query_params=query_params) releases_url = reverse("browse-origin-releases", query_params=query_params) else: assert snapshot_id is not None branches, releases = get_snapshot_content(snapshot_id) url_args = {"snapshot_id": snapshot_id} branches_url = reverse("browse-snapshot-branches", url_args=url_args) releases_url = reverse("browse-snapshot-releases", url_args=url_args) releases = list(reversed(releases)) snapshot_sizes = archive.lookup_snapshot_sizes(snapshot_id) is_empty = sum(snapshot_sizes.values()) == 0 swh_snp_id = swhid("snapshot", snapshot_id) if visit_info: timestamp = format_utc_iso_date(visit_info["date"]) if origin_info: browse_view_name = f"browse-origin-{browse_context}" else: browse_view_name = f"browse-snapshot-{browse_context}" release_id = None root_directory = None snapshot_total_size = sum(snapshot_sizes.values()) if path is not None: query_params["path"] = path if snapshot_total_size and revision_id is not None: revision = archive.lookup_revision(revision_id) root_directory = revision["directory"] branches.append( SnapshotBranchInfo( name=revision_id, revision=revision_id, directory=root_directory, date=revision["date"], message=revision["message"], url=None, ) ) branch_name = revision_id query_params["revision"] = revision_id elif snapshot_total_size and release_name: release = _get_release(releases, release_name, snapshot_id) if release is None: _branch_not_found( "release", release_name, snapshot_id, snapshot_sizes, origin_info, timestamp, visit_id, ) else: root_directory = release["directory"] revision_id = release["target"] release_id = release["id"] query_params["release"] = release_name elif snapshot_total_size: if branch_name: query_params["branch"] = branch_name branch = _get_branch(branches, branch_name or "HEAD", snapshot_id) if branch is None: _branch_not_found( "branch", branch_name, snapshot_id, snapshot_sizes, origin_info, timestamp, visit_id, ) else: branch_name = branch["name"] revision_id = branch["revision"] root_directory = branch["directory"] for b in branches: branch_query_params = dict(query_params) branch_query_params.pop("release", None) if b["name"] != b["revision"]: branch_query_params.pop("revision", None) branch_query_params["branch"] = b["name"] b["url"] = reverse( browse_view_name, url_args=url_args, query_params=branch_query_params ) for r in releases: release_query_params = dict(query_params) release_query_params.pop("branch", None) release_query_params.pop("revision", None) release_query_params["release"] = r["name"] r["url"] = reverse( browse_view_name, url_args=url_args, query_params=release_query_params, ) revision_info = None if revision_id: try: revision_info = archive.lookup_revision(revision_id) except NotFoundExc: pass else: revision_info["date"] = format_utc_iso_date(revision_info["date"]) revision_info["committer_date"] = format_utc_iso_date( revision_info["committer_date"] ) if revision_info["message"]: message_lines = revision_info["message"].split("\n") revision_info["message_header"] = message_lines[0] else: revision_info["message_header"] = "" snapshot_context = SnapshotContext( branch=branch_name, branches=branches, branches_url=branches_url, is_empty=is_empty, origin_info=origin_info, origin_visits_url=origin_visits_url, release=release_name, release_id=release_id, query_params=query_params, releases=releases, releases_url=releases_url, revision_id=revision_id, revision_info=revision_info, root_directory=root_directory, snapshot_id=snapshot_id, snapshot_sizes=snapshot_sizes, snapshot_swhid=swh_snp_id, url_args=url_args, visit_info=visit_info, ) if revision_info: revision_info["revision_url"] = gen_revision_url(revision_id, snapshot_context) return snapshot_context def _build_breadcrumbs(snapshot_context: SnapshotContext, path: str): origin_info = snapshot_context["origin_info"] url_args = snapshot_context["url_args"] query_params = dict(snapshot_context["query_params"]) root_directory = snapshot_context["root_directory"] path_info = gen_path_info(path) if origin_info: browse_view_name = "browse-origin-directory" else: browse_view_name = "browse-snapshot-directory" breadcrumbs = [] if root_directory: query_params.pop("path", None) breadcrumbs.append( { "name": root_directory[:7], "url": reverse( browse_view_name, url_args=url_args, query_params=query_params ), } ) for pi in path_info: query_params["path"] = pi["path"] breadcrumbs.append( { "name": pi["name"], "url": reverse( browse_view_name, url_args=url_args, query_params=query_params ), } ) return breadcrumbs def _check_origin_url(snapshot_id, origin_url): if snapshot_id is None and origin_url is None: raise BadInputExc("An origin URL must be provided as query parameter.") def browse_snapshot_directory( request, snapshot_id=None, origin_url=None, timestamp=None, path=None ): """ Django view implementation for browsing a directory in a snapshot context. """ - try: - - _check_origin_url(snapshot_id, origin_url) - - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - timestamp=timestamp, - visit_id=request.GET.get("visit_id"), - path=path, - browse_context="directory", - branch_name=request.GET.get("branch"), - release_name=request.GET.get("release"), - revision_id=request.GET.get("revision"), - ) + _check_origin_url(snapshot_id, origin_url) - root_directory = snapshot_context["root_directory"] - sha1_git = root_directory - if root_directory and path: - dir_info = archive.lookup_directory_with_path(root_directory, path) - sha1_git = dir_info["target"] + snapshot_context = get_snapshot_context( + snapshot_id=snapshot_id, + origin_url=origin_url, + timestamp=timestamp, + visit_id=request.GET.get("visit_id"), + path=path, + browse_context="directory", + branch_name=request.GET.get("branch"), + release_name=request.GET.get("release"), + revision_id=request.GET.get("revision"), + ) - dirs = [] - files = [] - if sha1_git: - dirs, files = get_directory_entries(sha1_git) + root_directory = snapshot_context["root_directory"] + sha1_git = root_directory + if root_directory and path: + dir_info = archive.lookup_directory_with_path(root_directory, path) + sha1_git = dir_info["target"] - except Exception as exc: - return handle_view_exception(request, exc) + dirs = [] + files = [] + if sha1_git: + dirs, files = get_directory_entries(sha1_git) origin_info = snapshot_context["origin_info"] visit_info = snapshot_context["visit_info"] url_args = snapshot_context["url_args"] query_params = dict(snapshot_context["query_params"]) revision_id = snapshot_context["revision_id"] snapshot_id = snapshot_context["snapshot_id"] if origin_info: browse_view_name = "browse-origin-directory" else: browse_view_name = "browse-snapshot-directory" breadcrumbs = _build_breadcrumbs(snapshot_context, path) path = "" if path is None else (path + "/") for d in dirs: if d["type"] == "rev": d["url"] = reverse("browse-revision", url_args={"sha1_git": d["target"]}) else: query_params["path"] = path + d["name"] d["url"] = reverse( browse_view_name, url_args=url_args, query_params=query_params ) sum_file_sizes = 0 readmes = {} if origin_info: browse_view_name = "browse-origin-content" else: browse_view_name = "browse-snapshot-content" for f in files: query_params["path"] = path + f["name"] f["url"] = reverse( browse_view_name, url_args=url_args, query_params=query_params ) if f["length"] is not None: sum_file_sizes += f["length"] f["length"] = filesizeformat(f["length"]) if f["name"].lower().startswith("readme"): readmes[f["name"]] = f["checksums"]["sha1"] readme_name, readme_url, readme_html = get_readme_to_display(readmes) if origin_info: browse_view_name = "browse-origin-log" else: browse_view_name = "browse-snapshot-log" history_url = None if snapshot_id != _empty_snapshot_id: query_params.pop("path", None) history_url = reverse( browse_view_name, url_args=url_args, query_params=query_params ) nb_files = None nb_dirs = None dir_path = None if root_directory: nb_files = len(files) nb_dirs = len(dirs) sum_file_sizes = filesizeformat(sum_file_sizes) dir_path = "/" + path browse_dir_link = gen_directory_link(sha1_git) browse_rev_link = gen_revision_link(revision_id) browse_snp_link = gen_snapshot_link(snapshot_id) revision_found = True if sha1_git is None and revision_id is not None: try: archive.lookup_revision(revision_id) except NotFoundExc: revision_found = False swh_objects = [ SWHObjectInfo(object_type=DIRECTORY, object_id=sha1_git), SWHObjectInfo(object_type=REVISION, object_id=revision_id), SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id), ] visit_date = None visit_type = None if visit_info: visit_date = format_utc_iso_date(visit_info["date"]) visit_type = visit_info["type"] release_id = snapshot_context["release_id"] browse_rel_link = None if release_id: swh_objects.append(SWHObjectInfo(object_type=RELEASE, object_id=release_id)) browse_rel_link = gen_release_link(release_id) dir_metadata = DirectoryMetadata( object_type=DIRECTORY, object_id=sha1_git, directory=sha1_git, directory_url=browse_dir_link, nb_files=nb_files, nb_dirs=nb_dirs, sum_file_sizes=sum_file_sizes, root_directory=root_directory, path=dir_path, revision=revision_id, revision_found=revision_found, revision_url=browse_rev_link, release=release_id, release_url=browse_rel_link, snapshot=snapshot_id, snapshot_url=browse_snp_link, origin_url=origin_url, visit_date=visit_date, visit_type=visit_type, ) vault_cooking = { "directory_context": True, "directory_id": sha1_git, "revision_context": True, "revision_id": revision_id, } swhids_info = get_swhids_info(swh_objects, snapshot_context, dir_metadata) dir_path = "/".join([bc["name"] for bc in breadcrumbs]) + "/" context_found = "snapshot: %s" % snapshot_context["snapshot_id"] if origin_info: context_found = "origin: %s" % origin_info["url"] heading = "Directory - %s - %s - %s" % ( dir_path, snapshot_context["branch"], context_found, ) top_right_link = None if not snapshot_context["is_empty"]: top_right_link = { "url": history_url, "icon": swh_object_icons["revisions history"], "text": "History", } return render( request, "browse/directory.html", { "heading": heading, "swh_object_name": "Directory", "swh_object_metadata": dir_metadata, "dirs": dirs, "files": files, "breadcrumbs": breadcrumbs if root_directory else [], "top_right_link": top_right_link, "readme_name": readme_name, "readme_url": readme_url, "readme_html": readme_html, "snapshot_context": snapshot_context, "vault_cooking": vault_cooking, "show_actions": True, "swhids_info": swhids_info, }, ) def browse_snapshot_content( request, snapshot_id=None, origin_url=None, timestamp=None, path=None, selected_language=None, ): """ Django view implementation for browsing a content in a snapshot context. """ - try: - - _check_origin_url(snapshot_id, origin_url) - - if path is None: - raise BadInputExc("The path of a content must be given as query parameter.") - - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - timestamp=timestamp, - visit_id=request.GET.get("visit_id"), - path=path, - browse_context="content", - branch_name=request.GET.get("branch"), - release_name=request.GET.get("release"), - revision_id=request.GET.get("revision"), - ) + _check_origin_url(snapshot_id, origin_url) - root_directory = snapshot_context["root_directory"] - sha1_git = None - query_string = None - content_data = {} - directory_id = None - split_path = path.split("/") - filename = split_path[-1] - filepath = path[: -len(filename)] - if root_directory: - content_info = archive.lookup_directory_with_path(root_directory, path) - sha1_git = content_info["target"] - query_string = "sha1_git:" + sha1_git - content_data = request_content(query_string, raise_if_unavailable=False) - - if filepath: - dir_info = archive.lookup_directory_with_path(root_directory, filepath) - directory_id = dir_info["target"] - else: - directory_id = root_directory + if path is None: + raise BadInputExc("The path of a content must be given as query parameter.") - except Exception as exc: - return handle_view_exception(request, exc) + snapshot_context = get_snapshot_context( + snapshot_id=snapshot_id, + origin_url=origin_url, + timestamp=timestamp, + visit_id=request.GET.get("visit_id"), + path=path, + browse_context="content", + branch_name=request.GET.get("branch"), + release_name=request.GET.get("release"), + revision_id=request.GET.get("revision"), + ) + + root_directory = snapshot_context["root_directory"] + sha1_git = None + query_string = None + content_data = {} + directory_id = None + split_path = path.split("/") + filename = split_path[-1] + filepath = path[: -len(filename)] + if root_directory: + content_info = archive.lookup_directory_with_path(root_directory, path) + sha1_git = content_info["target"] + query_string = "sha1_git:" + sha1_git + content_data = request_content(query_string, raise_if_unavailable=False) + + if filepath: + dir_info = archive.lookup_directory_with_path(root_directory, filepath) + directory_id = dir_info["target"] + else: + directory_id = root_directory revision_id = snapshot_context["revision_id"] origin_info = snapshot_context["origin_info"] visit_info = snapshot_context["visit_info"] snapshot_id = snapshot_context["snapshot_id"] if content_data.get("raw_data") is not None: content_display_data = prepare_content_for_display( content_data["raw_data"], content_data["mimetype"], path ) content_data.update(content_display_data) # Override language with user-selected language if selected_language is not None: content_data["language"] = selected_language available_languages = None if content_data.get("mimetype") is not None and "text/" in content_data["mimetype"]: available_languages = highlightjs.get_supported_languages() breadcrumbs = _build_breadcrumbs(snapshot_context, filepath) breadcrumbs.append({"name": filename, "url": None}) browse_content_link = gen_content_link(sha1_git) content_raw_url = None if query_string: content_raw_url = reverse( "browse-content-raw", url_args={"query_string": query_string}, query_params={"filename": filename}, ) browse_rev_link = gen_revision_link(revision_id) browse_dir_link = gen_directory_link(directory_id) content_checksums = content_data.get("checksums", {}) swh_objects = [ SWHObjectInfo(object_type=CONTENT, object_id=content_checksums.get("sha1_git")), SWHObjectInfo(object_type=DIRECTORY, object_id=directory_id), SWHObjectInfo(object_type=REVISION, object_id=revision_id), SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id), ] visit_date = None visit_type = None if visit_info: visit_date = format_utc_iso_date(visit_info["date"]) visit_type = visit_info["type"] release_id = snapshot_context["release_id"] browse_rel_link = None if release_id: swh_objects.append(SWHObjectInfo(object_type=RELEASE, object_id=release_id)) browse_rel_link = gen_release_link(release_id) content_metadata = ContentMetadata( object_type=CONTENT, object_id=content_checksums.get("sha1_git"), sha1=content_checksums.get("sha1"), sha1_git=content_checksums.get("sha1_git"), sha256=content_checksums.get("sha256"), blake2s256=content_checksums.get("blake2s256"), content_url=browse_content_link, mimetype=content_data.get("mimetype"), encoding=content_data.get("encoding"), size=filesizeformat(content_data.get("length", 0)), language=content_data.get("language"), root_directory=root_directory, path=f"/{filepath}", filename=filename, directory=directory_id, directory_url=browse_dir_link, revision=revision_id, revision_url=browse_rev_link, release=release_id, release_url=browse_rel_link, snapshot=snapshot_id, snapshot_url=gen_snapshot_link(snapshot_id), origin_url=origin_url, visit_date=visit_date, visit_type=visit_type, ) swhids_info = get_swhids_info(swh_objects, snapshot_context, content_metadata) content_path = "/".join([bc["name"] for bc in breadcrumbs]) context_found = "snapshot: %s" % snapshot_context["snapshot_id"] if origin_info: context_found = "origin: %s" % origin_info["url"] heading = "Content - %s - %s - %s" % ( content_path, snapshot_context["branch"], context_found, ) top_right_link = None if not snapshot_context["is_empty"]: top_right_link = { "url": content_raw_url, "icon": swh_object_icons["content"], "text": "Raw File", } return render( request, "browse/content.html", { "heading": heading, "swh_object_name": "Content", "swh_object_metadata": content_metadata, "content": content_data.get("content_data"), "content_size": content_data.get("length"), "max_content_size": content_display_max_size, "filename": filename, "encoding": content_data.get("encoding"), "mimetype": content_data.get("mimetype"), "language": content_data.get("language"), "available_languages": available_languages, "breadcrumbs": breadcrumbs if root_directory else [], "top_right_link": top_right_link, "snapshot_context": snapshot_context, "vault_cooking": None, "show_actions": True, "swhids_info": swhids_info, "error_code": content_data.get("error_code"), "error_message": content_data.get("error_message"), "error_description": content_data.get("error_description"), }, status=content_data.get("error_code", 200), ) PER_PAGE = 100 def browse_snapshot_log(request, snapshot_id=None, origin_url=None, timestamp=None): """ Django view implementation for browsing a revision history in a snapshot context. """ - try: - - _check_origin_url(snapshot_id, origin_url) - - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - timestamp=timestamp, - visit_id=request.GET.get("visit_id"), - browse_context="log", - branch_name=request.GET.get("branch"), - release_name=request.GET.get("release"), - revision_id=request.GET.get("revision"), - ) + _check_origin_url(snapshot_id, origin_url) - revision_id = snapshot_context["revision_id"] - - per_page = int(request.GET.get("per_page", PER_PAGE)) - offset = int(request.GET.get("offset", 0)) - revs_ordering = request.GET.get("revs_ordering", "committer_date") - session_key = "rev_%s_log_ordering_%s" % (revision_id, revs_ordering) - rev_log_session = request.session.get(session_key, None) - rev_log = [] - revs_walker_state = None - if rev_log_session: - rev_log = rev_log_session["rev_log"] - revs_walker_state = rev_log_session["revs_walker_state"] - - if len(rev_log) < offset + per_page: - revs_walker = archive.get_revisions_walker( - revs_ordering, - revision_id, - max_revs=offset + per_page + 1, - state=revs_walker_state, - ) - rev_log += [rev["id"] for rev in revs_walker] - revs_walker_state = revs_walker.export_state() + snapshot_context = get_snapshot_context( + snapshot_id=snapshot_id, + origin_url=origin_url, + timestamp=timestamp, + visit_id=request.GET.get("visit_id"), + browse_context="log", + branch_name=request.GET.get("branch"), + release_name=request.GET.get("release"), + revision_id=request.GET.get("revision"), + ) - revs = rev_log[offset : offset + per_page] - revision_log = archive.lookup_revision_multiple(revs) + revision_id = snapshot_context["revision_id"] - request.session[session_key] = { - "rev_log": rev_log, - "revs_walker_state": revs_walker_state, - } + per_page = int(request.GET.get("per_page", PER_PAGE)) + offset = int(request.GET.get("offset", 0)) + revs_ordering = request.GET.get("revs_ordering", "committer_date") + session_key = "rev_%s_log_ordering_%s" % (revision_id, revs_ordering) + rev_log_session = request.session.get(session_key, None) + rev_log = [] + revs_walker_state = None + if rev_log_session: + rev_log = rev_log_session["rev_log"] + revs_walker_state = rev_log_session["revs_walker_state"] + + if len(rev_log) < offset + per_page: + revs_walker = archive.get_revisions_walker( + revs_ordering, + revision_id, + max_revs=offset + per_page + 1, + state=revs_walker_state, + ) + rev_log += [rev["id"] for rev in revs_walker] + revs_walker_state = revs_walker.export_state() + + revs = rev_log[offset : offset + per_page] + revision_log = archive.lookup_revision_multiple(revs) - except Exception as exc: - return handle_view_exception(request, exc) + request.session[session_key] = { + "rev_log": rev_log, + "revs_walker_state": revs_walker_state, + } origin_info = snapshot_context["origin_info"] visit_info = snapshot_context["visit_info"] url_args = snapshot_context["url_args"] query_params = snapshot_context["query_params"] snapshot_id = snapshot_context["snapshot_id"] query_params["per_page"] = per_page revs_ordering = request.GET.get("revs_ordering", "") query_params["revs_ordering"] = revs_ordering or None if origin_info: browse_view_name = "browse-origin-log" else: browse_view_name = "browse-snapshot-log" prev_log_url = None if len(rev_log) > offset + per_page: query_params["offset"] = offset + per_page prev_log_url = reverse( browse_view_name, url_args=url_args, query_params=query_params ) next_log_url = None if offset != 0: query_params["offset"] = offset - per_page next_log_url = reverse( browse_view_name, url_args=url_args, query_params=query_params ) revision_log_data = format_log_entries(revision_log, per_page, snapshot_context) browse_rev_link = gen_revision_link(revision_id) browse_log_link = gen_revision_log_link(revision_id) browse_snp_link = gen_snapshot_link(snapshot_id) revision_metadata = { "context-independent revision": browse_rev_link, "context-independent revision history": browse_log_link, "context-independent snapshot": browse_snp_link, "snapshot": snapshot_id, } if origin_info: revision_metadata["origin url"] = origin_info["url"] revision_metadata["origin visit date"] = format_utc_iso_date(visit_info["date"]) revision_metadata["origin visit type"] = visit_info["type"] swh_objects = [ SWHObjectInfo(object_type=REVISION, object_id=revision_id), SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id), ] release_id = snapshot_context["release_id"] if release_id: swh_objects.append(SWHObjectInfo(object_type=RELEASE, object_id=release_id)) browse_rel_link = gen_release_link(release_id) revision_metadata["release"] = release_id revision_metadata["context-independent release"] = browse_rel_link swhids_info = get_swhids_info(swh_objects, snapshot_context) context_found = "snapshot: %s" % snapshot_context["snapshot_id"] if origin_info: context_found = "origin: %s" % origin_info["url"] heading = "Revision history - %s - %s" % (snapshot_context["branch"], context_found) return render( request, "browse/revision-log.html", { "heading": heading, "swh_object_name": "Revisions history", "swh_object_metadata": revision_metadata, "revision_log": revision_log_data, "revs_ordering": revs_ordering, "next_log_url": next_log_url, "prev_log_url": prev_log_url, "breadcrumbs": None, "top_right_link": None, "snapshot_context": snapshot_context, "vault_cooking": None, "show_actions": True, "swhids_info": swhids_info, }, ) def browse_snapshot_branches( request, snapshot_id=None, origin_url=None, timestamp=None ): """ Django view implementation for browsing a list of branches in a snapshot context. """ - try: - - _check_origin_url(snapshot_id, origin_url) + _check_origin_url(snapshot_id, origin_url) - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - timestamp=timestamp, - visit_id=request.GET.get("visit_id"), - ) - - branches_bc = request.GET.get("branches_breadcrumbs", "") - branches_bc = branches_bc.split(",") if branches_bc else [] - branches_from = branches_bc[-1] if branches_bc else "" + snapshot_context = get_snapshot_context( + snapshot_id=snapshot_id, + origin_url=origin_url, + timestamp=timestamp, + visit_id=request.GET.get("visit_id"), + ) - origin_info = snapshot_context["origin_info"] - url_args = snapshot_context["url_args"] - query_params = snapshot_context["query_params"] + branches_bc = request.GET.get("branches_breadcrumbs", "") + branches_bc = branches_bc.split(",") if branches_bc else [] + branches_from = branches_bc[-1] if branches_bc else "" - if origin_info: - browse_view_name = "browse-origin-directory" - else: - browse_view_name = "browse-snapshot-directory" + origin_info = snapshot_context["origin_info"] + url_args = snapshot_context["url_args"] + query_params = snapshot_context["query_params"] - snapshot = archive.lookup_snapshot( - snapshot_context["snapshot_id"], - branches_from, - PER_PAGE + 1, - target_types=["revision", "alias"], - ) + if origin_info: + browse_view_name = "browse-origin-directory" + else: + browse_view_name = "browse-snapshot-directory" - displayed_branches, _ = process_snapshot_branches(snapshot) + snapshot = archive.lookup_snapshot( + snapshot_context["snapshot_id"], + branches_from, + PER_PAGE + 1, + target_types=["revision", "alias"], + ) - except Exception as exc: - return handle_view_exception(request, exc) + displayed_branches, _ = process_snapshot_branches(snapshot) for branch in displayed_branches: rev_query_params = {} if origin_info: rev_query_params["origin_url"] = origin_info["url"] revision_url = reverse( "browse-revision", url_args={"sha1_git": branch["revision"]}, query_params=query_params, ) query_params["branch"] = branch["name"] directory_url = reverse( browse_view_name, url_args=url_args, query_params=query_params ) del query_params["branch"] branch["revision_url"] = revision_url branch["directory_url"] = directory_url if origin_info: browse_view_name = "browse-origin-branches" else: browse_view_name = "browse-snapshot-branches" prev_branches_url = None next_branches_url = None if branches_bc: query_params_prev = dict(query_params) query_params_prev["branches_breadcrumbs"] = ",".join(branches_bc[:-1]) prev_branches_url = reverse( browse_view_name, url_args=url_args, query_params=query_params_prev ) elif branches_from: prev_branches_url = reverse( browse_view_name, url_args=url_args, query_params=query_params ) if snapshot["next_branch"] is not None: query_params_next = dict(query_params) next_branch = displayed_branches[-1]["name"] del displayed_branches[-1] branches_bc.append(next_branch) query_params_next["branches_breadcrumbs"] = ",".join(branches_bc) next_branches_url = reverse( browse_view_name, url_args=url_args, query_params=query_params_next ) heading = "Branches - " if origin_info: heading += "origin: %s" % origin_info["url"] else: heading += "snapshot: %s" % snapshot_id return render( request, "browse/branches.html", { "heading": heading, "swh_object_name": "Branches", "swh_object_metadata": {}, "top_right_link": None, "displayed_branches": displayed_branches, "prev_branches_url": prev_branches_url, "next_branches_url": next_branches_url, "snapshot_context": snapshot_context, }, ) def browse_snapshot_releases( request, snapshot_id=None, origin_url=None, timestamp=None ): """ Django view implementation for browsing a list of releases in a snapshot context. """ - try: - - _check_origin_url(snapshot_id, origin_url) - - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - timestamp=timestamp, - visit_id=request.GET.get("visit_id"), - ) + _check_origin_url(snapshot_id, origin_url) - rel_bc = request.GET.get("releases_breadcrumbs", "") - rel_bc = rel_bc.split(",") if rel_bc else [] - rel_from = rel_bc[-1] if rel_bc else "" + snapshot_context = get_snapshot_context( + snapshot_id=snapshot_id, + origin_url=origin_url, + timestamp=timestamp, + visit_id=request.GET.get("visit_id"), + ) - origin_info = snapshot_context["origin_info"] - url_args = snapshot_context["url_args"] - query_params = snapshot_context["query_params"] + rel_bc = request.GET.get("releases_breadcrumbs", "") + rel_bc = rel_bc.split(",") if rel_bc else [] + rel_from = rel_bc[-1] if rel_bc else "" - snapshot = archive.lookup_snapshot( - snapshot_context["snapshot_id"], - rel_from, - PER_PAGE + 1, - target_types=["release", "alias"], - ) + origin_info = snapshot_context["origin_info"] + url_args = snapshot_context["url_args"] + query_params = snapshot_context["query_params"] - _, displayed_releases = process_snapshot_branches(snapshot) + snapshot = archive.lookup_snapshot( + snapshot_context["snapshot_id"], + rel_from, + PER_PAGE + 1, + target_types=["release", "alias"], + ) - except Exception as exc: - return handle_view_exception(request, exc) + _, displayed_releases = process_snapshot_branches(snapshot) for release in displayed_releases: query_params_tgt = {"snapshot": snapshot_id} if origin_info: query_params_tgt["origin_url"] = origin_info["url"] release_url = reverse( "browse-release", url_args={"sha1_git": release["id"]}, query_params=query_params_tgt, ) target_url = "" if release["target_type"] == "revision": target_url = reverse( "browse-revision", url_args={"sha1_git": release["target"]}, query_params=query_params_tgt, ) elif release["target_type"] == "directory": target_url = reverse( "browse-directory", url_args={"sha1_git": release["target"]}, query_params=query_params_tgt, ) elif release["target_type"] == "content": target_url = reverse( "browse-content", url_args={"query_string": release["target"]}, query_params=query_params_tgt, ) elif release["target_type"] == "release": target_url = reverse( "browse-release", url_args={"sha1_git": release["target"]}, query_params=query_params_tgt, ) release["release_url"] = release_url release["target_url"] = target_url if origin_info: browse_view_name = "browse-origin-releases" else: browse_view_name = "browse-snapshot-releases" prev_releases_url = None next_releases_url = None if rel_bc: query_params_prev = dict(query_params) query_params_prev["releases_breadcrumbs"] = ",".join(rel_bc[:-1]) prev_releases_url = reverse( browse_view_name, url_args=url_args, query_params=query_params_prev ) elif rel_from: prev_releases_url = reverse( browse_view_name, url_args=url_args, query_params=query_params ) if snapshot["next_branch"] is not None: query_params_next = dict(query_params) next_rel = displayed_releases[-1]["branch_name"] del displayed_releases[-1] rel_bc.append(next_rel) query_params_next["releases_breadcrumbs"] = ",".join(rel_bc) next_releases_url = reverse( browse_view_name, url_args=url_args, query_params=query_params_next ) heading = "Releases - " if origin_info: heading += "origin: %s" % origin_info["url"] else: heading += "snapshot: %s" % snapshot_id return render( request, "browse/releases.html", { "heading": heading, "top_panel_visible": False, "top_panel_collapsible": False, "swh_object_name": "Releases", "swh_object_metadata": {}, "top_right_link": None, "displayed_releases": displayed_releases, "prev_releases_url": prev_releases_url, "next_releases_url": next_releases_url, "snapshot_context": snapshot_context, "vault_cooking": None, "show_actions": False, }, ) diff --git a/swh/web/browse/views/content.py b/swh/web/browse/views/content.py index eec4958d..244aba20 100644 --- a/swh/web/browse/views/content.py +++ b/swh/web/browse/views/content.py @@ -1,407 +1,397 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import difflib from distutils.util import strtobool import sentry_sdk from django.http import HttpResponse, JsonResponse from django.shortcuts import render from django.template.defaultfilters import filesizeformat from swh.model.hashutil import hash_to_hex from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT from swh.web.browse.browseurls import browse_route from swh.web.browse.snapshot_context import get_snapshot_context from swh.web.browse.utils import ( content_display_max_size, gen_directory_link, gen_link, prepare_content_for_display, request_content, ) from swh.web.common import archive, highlightjs, query -from swh.web.common.exc import NotFoundExc, handle_view_exception +from swh.web.common.exc import NotFoundExc from swh.web.common.identifiers import get_swhids_info from swh.web.common.typing import ContentMetadata, SWHObjectInfo from swh.web.common.utils import gen_path_info, reverse, swh_object_icons @browse_route( r"content/(?P[0-9a-z_:]*[0-9a-f]+.)/raw/", view_name="browse-content-raw", checksum_args=["query_string"], ) def content_raw(request, query_string): """Django view that produces a raw display of a content identified by its hash value. The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/raw/` """ - try: - re_encode = bool(strtobool(request.GET.get("re_encode", "false"))) - algo, checksum = query.parse_hash(query_string) - checksum = hash_to_hex(checksum) - content_data = request_content(query_string, max_size=None, re_encode=re_encode) - except Exception as exc: - return handle_view_exception(request, exc) + re_encode = bool(strtobool(request.GET.get("re_encode", "false"))) + algo, checksum = query.parse_hash(query_string) + checksum = hash_to_hex(checksum) + content_data = request_content(query_string, max_size=None, re_encode=re_encode) filename = request.GET.get("filename", None) if not filename: filename = "%s_%s" % (algo, checksum) if ( content_data["mimetype"].startswith("text/") or content_data["mimetype"] == "inode/x-empty" ): response = HttpResponse(content_data["raw_data"], content_type="text/plain") response["Content-disposition"] = "filename=%s" % filename else: response = HttpResponse( content_data["raw_data"], content_type="application/octet-stream" ) response["Content-disposition"] = "attachment; filename=%s" % filename return response _auto_diff_size_limit = 20000 @browse_route( r"content/(?P.*)/diff/(?P.*)", view_name="diff-contents", ) def _contents_diff(request, from_query_string, to_query_string): """ Browse endpoint used to compute unified diffs between two contents. Diffs are generated only if the two contents are textual. By default, diffs whose size are greater than 20 kB will not be generated. To force the generation of large diffs, the 'force' boolean query parameter must be used. Args: request: input django http request from_query_string: a string of the form "[ALGO_HASH:]HASH" where optional ALGO_HASH can be either ``sha1``, ``sha1_git``, ``sha256``, or ``blake2s256`` (default to ``sha1``) and HASH the hexadecimal representation of the hash value identifying the first content to_query_string: same as above for identifying the second content Returns: A JSON object containing the unified diff. """ diff_data = {} content_from = None content_to = None content_from_size = 0 content_to_size = 0 content_from_lines = [] content_to_lines = [] force = request.GET.get("force", "false") path = request.GET.get("path", None) language = "nohighlight" force = bool(strtobool(force)) if from_query_string == to_query_string: diff_str = "File renamed without changes" else: try: text_diff = True if from_query_string: content_from = request_content(from_query_string, max_size=None) content_from_display_data = prepare_content_for_display( content_from["raw_data"], content_from["mimetype"], path ) language = content_from_display_data["language"] content_from_size = content_from["length"] if not ( content_from["mimetype"].startswith("text/") or content_from["mimetype"] == "inode/x-empty" ): text_diff = False if text_diff and to_query_string: content_to = request_content(to_query_string, max_size=None) content_to_display_data = prepare_content_for_display( content_to["raw_data"], content_to["mimetype"], path ) language = content_to_display_data["language"] content_to_size = content_to["length"] if not ( content_to["mimetype"].startswith("text/") or content_to["mimetype"] == "inode/x-empty" ): text_diff = False diff_size = abs(content_to_size - content_from_size) if not text_diff: diff_str = "Diffs are not generated for non textual content" language = "nohighlight" elif not force and diff_size > _auto_diff_size_limit: diff_str = "Large diffs are not automatically computed" language = "nohighlight" else: if content_from: content_from_lines = ( content_from["raw_data"].decode("utf-8").splitlines(True) ) if content_from_lines and content_from_lines[-1][-1] != "\n": content_from_lines[-1] += "[swh-no-nl-marker]\n" if content_to: content_to_lines = ( content_to["raw_data"].decode("utf-8").splitlines(True) ) if content_to_lines and content_to_lines[-1][-1] != "\n": content_to_lines[-1] += "[swh-no-nl-marker]\n" diff_lines = difflib.unified_diff(content_from_lines, content_to_lines) diff_str = "".join(list(diff_lines)[2:]) except Exception as exc: sentry_sdk.capture_exception(exc) diff_str = str(exc) diff_data["diff_str"] = diff_str diff_data["language"] = language return JsonResponse(diff_data) @browse_route( r"content/(?P[0-9a-z_:]*[0-9a-f]+.)/", view_name="browse-content", checksum_args=["query_string"], ) def content_display(request, query_string): """Django view that produces an HTML display of a content identified by its hash value. The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/` """ - try: - algo, checksum = query.parse_hash(query_string) - checksum = hash_to_hex(checksum) - content_data = request_content(query_string, raise_if_unavailable=False) - origin_url = request.GET.get("origin_url") - selected_language = request.GET.get("language") - if not origin_url: - origin_url = request.GET.get("origin") - snapshot_id = request.GET.get("snapshot") - path = request.GET.get("path") - snapshot_context = None - if origin_url is not None or snapshot_id is not None: - try: - snapshot_context = get_snapshot_context( - origin_url=origin_url, - snapshot_id=snapshot_id, - branch_name=request.GET.get("branch"), - release_name=request.GET.get("release"), - revision_id=request.GET.get("revision"), - path=path, - browse_context=CONTENT, + algo, checksum = query.parse_hash(query_string) + checksum = hash_to_hex(checksum) + content_data = request_content(query_string, raise_if_unavailable=False) + origin_url = request.GET.get("origin_url") + selected_language = request.GET.get("language") + if not origin_url: + origin_url = request.GET.get("origin") + snapshot_id = request.GET.get("snapshot") + path = request.GET.get("path") + snapshot_context = None + if origin_url is not None or snapshot_id is not None: + try: + snapshot_context = get_snapshot_context( + origin_url=origin_url, + snapshot_id=snapshot_id, + branch_name=request.GET.get("branch"), + release_name=request.GET.get("release"), + revision_id=request.GET.get("revision"), + path=path, + browse_context=CONTENT, + ) + except NotFoundExc as e: + if str(e).startswith("Origin"): + raw_cnt_url = reverse( + "browse-content", url_args={"query_string": query_string} ) - except NotFoundExc as e: - if str(e).startswith("Origin"): - raw_cnt_url = reverse( - "browse-content", url_args={"query_string": query_string} - ) - error_message = ( - "The Software Heritage archive has a content " - "with the hash you provided but the origin " - "mentioned in your request appears broken: %s. " - "Please check the URL and try again.\n\n" - "Nevertheless, you can still browse the content " - "without origin information: %s" - % (gen_link(origin_url), gen_link(raw_cnt_url)) - ) - raise NotFoundExc(error_message) - else: - raise e - except Exception as exc: - return handle_view_exception(request, exc) - + error_message = ( + "The Software Heritage archive has a content " + "with the hash you provided but the origin " + "mentioned in your request appears broken: %s. " + "Please check the URL and try again.\n\n" + "Nevertheless, you can still browse the content " + "without origin information: %s" + % (gen_link(origin_url), gen_link(raw_cnt_url)) + ) + raise NotFoundExc(error_message) + else: + raise e content = None language = None mimetype = None if content_data["raw_data"] is not None: content_display_data = prepare_content_for_display( content_data["raw_data"], content_data["mimetype"], path ) content = content_display_data["content_data"] language = content_display_data["language"] mimetype = content_display_data["mimetype"] # Override language with user-selected language if selected_language is not None: language = selected_language available_languages = None if mimetype and "text/" in mimetype: available_languages = highlightjs.get_supported_languages() filename = None path_info = None directory_id = None directory_url = None root_dir = None if snapshot_context: root_dir = snapshot_context.get("root_directory") query_params = snapshot_context["query_params"] if snapshot_context else {} breadcrumbs = [] if path: split_path = path.split("/") root_dir = root_dir or split_path[0] filename = split_path[-1] if root_dir != path: path = path.replace(root_dir + "/", "") path = path[: -len(filename)] path_info = gen_path_info(path) query_params.pop("path", None) dir_url = reverse( "browse-directory", url_args={"sha1_git": root_dir}, query_params=query_params, ) breadcrumbs.append({"name": root_dir[:7], "url": dir_url}) for pi in path_info: query_params["path"] = pi["path"] dir_url = reverse( "browse-directory", url_args={"sha1_git": root_dir}, query_params=query_params, ) breadcrumbs.append({"name": pi["name"], "url": dir_url}) breadcrumbs.append({"name": filename, "url": None}) if path and root_dir != path: - try: - dir_info = archive.lookup_directory_with_path(root_dir, path) - directory_id = dir_info["target"] - except Exception as exc: - return handle_view_exception(request, exc) + dir_info = archive.lookup_directory_with_path(root_dir, path) + directory_id = dir_info["target"] elif root_dir != path: directory_id = root_dir else: root_dir = None if directory_id: directory_url = gen_directory_link(directory_id) query_params = {"filename": filename} content_checksums = content_data["checksums"] content_url = reverse( "browse-content", url_args={"query_string": f'sha1_git:{content_checksums["sha1_git"]}'}, ) content_raw_url = reverse( "browse-content-raw", url_args={"query_string": query_string}, query_params=query_params, ) content_metadata = ContentMetadata( object_type=CONTENT, object_id=content_checksums["sha1_git"], sha1=content_checksums["sha1"], sha1_git=content_checksums["sha1_git"], sha256=content_checksums["sha256"], blake2s256=content_checksums["blake2s256"], content_url=content_url, mimetype=content_data["mimetype"], encoding=content_data["encoding"], size=filesizeformat(content_data["length"]), language=content_data["language"], root_directory=root_dir, path=f"/{path}" if path else None, filename=filename or "", directory=directory_id, directory_url=directory_url, revision=None, release=None, snapshot=None, origin_url=origin_url, ) swh_objects = [ SWHObjectInfo(object_type=CONTENT, object_id=content_checksums["sha1_git"]) ] if directory_id: swh_objects.append(SWHObjectInfo(object_type=DIRECTORY, object_id=directory_id)) if snapshot_context: swh_objects.append( SWHObjectInfo( object_type=REVISION, object_id=snapshot_context["revision_id"] ) ) swh_objects.append( SWHObjectInfo( object_type=SNAPSHOT, object_id=snapshot_context["snapshot_id"] ) ) if snapshot_context["release_id"]: swh_objects.append( SWHObjectInfo( object_type=RELEASE, object_id=snapshot_context["release_id"] ) ) swhids_info = get_swhids_info( swh_objects, snapshot_context, extra_context=content_metadata, ) heading = "Content - %s" % content_checksums["sha1_git"] if breadcrumbs: content_path = "/".join([bc["name"] for bc in breadcrumbs]) heading += " - %s" % content_path return render( request, "browse/content.html", { "heading": heading, "swh_object_id": swhids_info[0]["swhid"], "swh_object_name": "Content", "swh_object_metadata": content_metadata, "content": content, "content_size": content_data["length"], "max_content_size": content_display_max_size, "filename": filename, "encoding": content_data["encoding"], "mimetype": mimetype, "language": language, "available_languages": available_languages, "breadcrumbs": breadcrumbs, "top_right_link": { "url": content_raw_url, "icon": swh_object_icons["content"], "text": "Raw File", }, "snapshot_context": snapshot_context, "vault_cooking": None, "show_actions": True, "swhids_info": swhids_info, "error_code": content_data["error_code"], "error_message": content_data["error_message"], "error_description": content_data["error_description"], }, status=content_data["error_code"], ) diff --git a/swh/web/browse/views/directory.py b/swh/web/browse/views/directory.py index 333f39ae..aa8d579c 100644 --- a/swh/web/browse/views/directory.py +++ b/swh/web/browse/views/directory.py @@ -1,273 +1,270 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import os import sentry_sdk from django.http import HttpResponse from django.shortcuts import redirect, render from django.template.defaultfilters import filesizeformat from swh.model.identifiers import DIRECTORY, RELEASE, REVISION, SNAPSHOT from swh.web.browse.browseurls import browse_route from swh.web.browse.snapshot_context import get_snapshot_context from swh.web.browse.utils import gen_link, get_directory_entries, get_readme_to_display from swh.web.common import archive -from swh.web.common.exc import NotFoundExc, handle_view_exception +from swh.web.common.exc import NotFoundExc from swh.web.common.identifiers import get_swhids_info from swh.web.common.typing import DirectoryMetadata, SWHObjectInfo from swh.web.common.utils import gen_path_info, reverse, swh_object_icons def _directory_browse(request, sha1_git, path=None): root_sha1_git = sha1_git - try: - if path: - dir_info = archive.lookup_directory_with_path(sha1_git, path) - sha1_git = dir_info["target"] - - dirs, files = get_directory_entries(sha1_git) - origin_url = request.GET.get("origin_url") - if not origin_url: - origin_url = request.GET.get("origin") - snapshot_id = request.GET.get("snapshot") - snapshot_context = None - if origin_url is not None or snapshot_id is not None: - try: - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - branch_name=request.GET.get("branch"), - release_name=request.GET.get("release"), - revision_id=request.GET.get("revision"), - path=path, + if path: + dir_info = archive.lookup_directory_with_path(sha1_git, path) + sha1_git = dir_info["target"] + + dirs, files = get_directory_entries(sha1_git) + origin_url = request.GET.get("origin_url") + if not origin_url: + origin_url = request.GET.get("origin") + snapshot_id = request.GET.get("snapshot") + snapshot_context = None + if origin_url is not None or snapshot_id is not None: + try: + snapshot_context = get_snapshot_context( + snapshot_id=snapshot_id, + origin_url=origin_url, + branch_name=request.GET.get("branch"), + release_name=request.GET.get("release"), + revision_id=request.GET.get("revision"), + path=path, + ) + except NotFoundExc as e: + if str(e).startswith("Origin"): + raw_dir_url = reverse( + "browse-directory", url_args={"sha1_git": sha1_git} ) - except NotFoundExc as e: - if str(e).startswith("Origin"): - raw_dir_url = reverse( - "browse-directory", url_args={"sha1_git": sha1_git} - ) - error_message = ( - "The Software Heritage archive has a directory " - "with the hash you provided but the origin " - "mentioned in your request appears broken: %s. " - "Please check the URL and try again.\n\n" - "Nevertheless, you can still browse the directory " - "without origin information: %s" - % (gen_link(origin_url), gen_link(raw_dir_url)) - ) - raise NotFoundExc(error_message) - else: - raise e - except Exception as exc: - return handle_view_exception(request, exc) + error_message = ( + "The Software Heritage archive has a directory " + "with the hash you provided but the origin " + "mentioned in your request appears broken: %s. " + "Please check the URL and try again.\n\n" + "Nevertheless, you can still browse the directory " + "without origin information: %s" + % (gen_link(origin_url), gen_link(raw_dir_url)) + ) + raise NotFoundExc(error_message) + else: + raise e path_info = gen_path_info(path) query_params = snapshot_context["query_params"] if snapshot_context else {} breadcrumbs = [] breadcrumbs.append( { "name": root_sha1_git[:7], "url": reverse( "browse-directory", url_args={"sha1_git": root_sha1_git}, query_params={**query_params, "path": None}, ), } ) for pi in path_info: breadcrumbs.append( { "name": pi["name"], "url": reverse( "browse-directory", url_args={"sha1_git": root_sha1_git}, query_params={**query_params, "path": pi["path"],}, ), } ) path = "" if path is None else (path + "/") for d in dirs: if d["type"] == "rev": d["url"] = reverse( "browse-revision", url_args={"sha1_git": d["target"]}, query_params=query_params, ) else: d["url"] = reverse( "browse-directory", url_args={"sha1_git": root_sha1_git}, query_params={**query_params, "path": path + d["name"],}, ) sum_file_sizes = 0 readmes = {} for f in files: query_string = "sha1_git:" + f["target"] f["url"] = reverse( "browse-content", url_args={"query_string": query_string}, query_params={ **query_params, "path": root_sha1_git + "/" + path + f["name"], }, ) if f["length"] is not None: sum_file_sizes += f["length"] f["length"] = filesizeformat(f["length"]) if f["name"].lower().startswith("readme"): readmes[f["name"]] = f["checksums"]["sha1"] readme_name, readme_url, readme_html = get_readme_to_display(readmes) sum_file_sizes = filesizeformat(sum_file_sizes) dir_metadata = DirectoryMetadata( object_type=DIRECTORY, object_id=sha1_git, directory=root_sha1_git, nb_files=len(files), nb_dirs=len(dirs), sum_file_sizes=sum_file_sizes, root_directory=root_sha1_git, path=f"/{path}" if path else None, revision=None, revision_found=None, release=None, snapshot=None, ) vault_cooking = { "directory_context": True, "directory_id": sha1_git, "revision_context": False, "revision_id": None, } swh_objects = [SWHObjectInfo(object_type=DIRECTORY, object_id=sha1_git)] if snapshot_context: swh_objects.append( SWHObjectInfo( object_type=REVISION, object_id=snapshot_context["revision_id"] ) ) swh_objects.append( SWHObjectInfo( object_type=SNAPSHOT, object_id=snapshot_context["snapshot_id"] ) ) if snapshot_context["release_id"]: swh_objects.append( SWHObjectInfo( object_type=RELEASE, object_id=snapshot_context["release_id"] ) ) swhids_info = get_swhids_info(swh_objects, snapshot_context, dir_metadata) heading = "Directory - %s" % sha1_git if breadcrumbs: dir_path = "/".join([bc["name"] for bc in breadcrumbs]) + "/" heading += " - %s" % dir_path top_right_link = None if snapshot_context is not None and not snapshot_context["is_empty"]: history_url = reverse( "browse-revision-log", url_args={"sha1_git": snapshot_context["revision_id"]}, query_params=query_params, ) top_right_link = { "url": history_url, "icon": swh_object_icons["revisions history"], "text": "History", } return render( request, "browse/directory.html", { "heading": heading, "swh_object_id": swhids_info[0]["swhid"], "swh_object_name": "Directory", "swh_object_metadata": dir_metadata, "dirs": dirs, "files": files, "breadcrumbs": breadcrumbs, "top_right_link": top_right_link, "readme_name": readme_name, "readme_url": readme_url, "readme_html": readme_html, "snapshot_context": snapshot_context, "vault_cooking": vault_cooking, "show_actions": True, "swhids_info": swhids_info, }, ) @browse_route( r"directory/(?P[0-9a-f]+)/", view_name="browse-directory", checksum_args=["sha1_git"], ) def directory_browse(request, sha1_git): """Django view for browsing the content of a directory identified by its sha1_git value. The url that points to it is :http:get:`/browse/directory/(sha1_git)/` """ return _directory_browse(request, sha1_git, request.GET.get("path")) @browse_route( r"directory/(?P[0-9a-f]+)/(?P.+)/", view_name="browse-directory-legacy", checksum_args=["sha1_git"], ) def directory_browse_legacy(request, sha1_git, path): """Django view for browsing the content of a directory identified by its sha1_git value. The url that points to it is :http:get:`/browse/directory/(sha1_git)/(path)/` """ return _directory_browse(request, sha1_git, path) @browse_route( r"directory/resolve/content-path/(?P[0-9a-f]+)/", view_name="browse-directory-resolve-content-path", checksum_args=["sha1_git"], ) def _directory_resolve_content_path(request, sha1_git): """ Internal endpoint redirecting to data url for a specific file path relative to a root directory. """ try: path = os.path.normpath(request.GET.get("path")) if not path.startswith("../"): dir_info = archive.lookup_directory_with_path(sha1_git, path) if dir_info["type"] == "file": sha1 = dir_info["checksums"]["sha1"] data_url = reverse( "browse-content-raw", url_args={"query_string": sha1} ) return redirect(data_url) except Exception as exc: sentry_sdk.capture_exception(exc) return HttpResponse(status=404) diff --git a/swh/web/browse/views/origin.py b/swh/web/browse/views/origin.py index a8cac672..8742c0ed 100644 --- a/swh/web/browse/views/origin.py +++ b/swh/web/browse/views/origin.py @@ -1,319 +1,315 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import redirect, render from swh.web.browse.browseurls import browse_route from swh.web.browse.snapshot_context import ( browse_snapshot_branches, browse_snapshot_content, browse_snapshot_directory, browse_snapshot_log, browse_snapshot_releases, get_snapshot_context, ) from swh.web.common import archive -from swh.web.common.exc import BadInputExc, handle_view_exception +from swh.web.common.exc import BadInputExc from swh.web.common.origin_visits import get_origin_visits from swh.web.common.utils import format_utc_iso_date, parse_iso8601_date_to_utc, reverse @browse_route( r"origin/directory/", view_name="browse-origin-directory", ) def origin_directory_browse(request): """Django view for browsing the content of a directory associated to an origin for a given visit. The URL that points to it is :http:get:`/browse/origin/directory/` """ return browse_snapshot_directory( request, origin_url=request.GET.get("origin_url"), snapshot_id=request.GET.get("snapshot"), timestamp=request.GET.get("timestamp"), path=request.GET.get("path"), ) @browse_route( r"origin/(?P.+)/visit/(?P.+)/directory/", r"origin/(?P.+)/visit/(?P.+)/directory/(?P.+)/", r"origin/(?P.+)/directory/(?P.+)/", r"origin/(?P.+)/directory/", view_name="browse-origin-directory-legacy", ) def origin_directory_browse_legacy(request, origin_url, timestamp=None, path=None): """Django view for browsing the content of a directory associated to an origin for a given visit. The URLs that point to it are :http:get:`/browse/origin/(origin_url)/directory/[(path)/]` and :http:get:`/browse/origin/(origin_url)/visit/(timestamp)/directory/[(path)/]` """ return browse_snapshot_directory( request, origin_url=origin_url, snapshot_id=request.GET.get("snapshot"), timestamp=timestamp, path=path, ) @browse_route( r"origin/content/", view_name="browse-origin-content", ) def origin_content_browse(request): """Django view that produces an HTML display of a content associated to an origin for a given visit. The URL that points to it is :http:get:`/browse/origin/content/` """ return browse_snapshot_content( request, origin_url=request.GET.get("origin_url"), snapshot_id=request.GET.get("snapshot"), timestamp=request.GET.get("timestamp"), path=request.GET.get("path"), selected_language=request.GET.get("language"), ) @browse_route( r"origin/(?P.+)/visit/(?P.+)/content/(?P.+)/", r"origin/(?P.+)/content/(?P.+)/", r"origin/(?P.+)/content/", view_name="browse-origin-content-legacy", ) def origin_content_browse_legacy(request, origin_url, path=None, timestamp=None): """Django view that produces an HTML display of a content associated to an origin for a given visit. The URLs that point to it are :http:get:`/browse/origin/(origin_url)/content/(path)/` and :http:get:`/browse/origin/(origin_url)/visit/(timestamp)/content/(path)/` """ return browse_snapshot_content( request, origin_url=origin_url, snapshot_id=request.GET.get("snapshot"), timestamp=timestamp, path=path, selected_language=request.GET.get("language"), ) @browse_route( r"origin/log/", view_name="browse-origin-log", ) def origin_log_browse(request): """Django view that produces an HTML display of revisions history (aka the commit log) associated to a software origin. The URL that points to it is :http:get:`/browse/origin/log/` """ return browse_snapshot_log( request, origin_url=request.GET.get("origin_url"), snapshot_id=request.GET.get("snapshot"), timestamp=request.GET.get("timestamp"), ) @browse_route( r"origin/(?P.+)/visit/(?P.+)/log/", r"origin/(?P.+)/log/", view_name="browse-origin-log-legacy", ) def origin_log_browse_legacy(request, origin_url, timestamp=None): """Django view that produces an HTML display of revisions history (aka the commit log) associated to a software origin. The URLs that point to it are :http:get:`/browse/origin/(origin_url)/log/` and :http:get:`/browse/origin/(origin_url)/visit/(timestamp)/log/` """ return browse_snapshot_log( request, origin_url=origin_url, snapshot_id=request.GET.get("snapshot"), timestamp=timestamp, ) @browse_route( r"origin/branches/", view_name="browse-origin-branches", ) def origin_branches_browse(request): """Django view that produces an HTML display of the list of branches associated to an origin for a given visit. The URL that points to it is :http:get:`/browse/origin/branches/` """ return browse_snapshot_branches( request, origin_url=request.GET.get("origin_url"), snapshot_id=request.GET.get("snapshot"), timestamp=request.GET.get("timestamp"), ) @browse_route( r"origin/(?P.+)/visit/(?P.+)/branches/", r"origin/(?P.+)/branches/", view_name="browse-origin-branches-legacy", ) def origin_branches_browse_legacy(request, origin_url, timestamp=None): """Django view that produces an HTML display of the list of branches associated to an origin for a given visit. The URLs that point to it are :http:get:`/browse/origin/(origin_url)/branches/` and :http:get:`/browse/origin/(origin_url)/visit/(timestamp)/branches/` """ return browse_snapshot_branches( request, origin_url=origin_url, snapshot_id=request.GET.get("snapshot"), timestamp=timestamp, ) @browse_route( r"origin/releases/", view_name="browse-origin-releases", ) def origin_releases_browse(request): """Django view that produces an HTML display of the list of releases associated to an origin for a given visit. The URL that points to it is :http:get:`/browse/origin/releases/` """ return browse_snapshot_releases( request, origin_url=request.GET.get("origin_url"), snapshot_id=request.GET.get("snapshot"), timestamp=request.GET.get("timestamp"), ) @browse_route( r"origin/(?P.+)/visit/(?P.+)/releases/", r"origin/(?P.+)/releases/", view_name="browse-origin-releases-legacy", ) def origin_releases_browse_legacy(request, origin_url, timestamp=None): """Django view that produces an HTML display of the list of releases associated to an origin for a given visit. The URLs that point to it are :http:get:`/browse/origin/(origin_url)/releases/` and :http:get:`/browse/origin/(origin_url)/visit/(timestamp)/releases/` """ return browse_snapshot_releases( request, origin_url=origin_url, snapshot_id=request.GET.get("snapshot"), timestamp=timestamp, ) def _origin_visits_browse(request, origin_url): - try: + if origin_url is None: + raise BadInputExc("An origin URL must be provided as query parameter.") - if origin_url is None: - raise BadInputExc("An origin URL must be provided as query parameter.") - - origin_info = archive.lookup_origin({"url": origin_url}) - origin_visits = get_origin_visits(origin_info) - snapshot_context = get_snapshot_context(origin_url=origin_url) - except Exception as exc: - return handle_view_exception(request, exc) + origin_info = archive.lookup_origin({"url": origin_url}) + origin_visits = get_origin_visits(origin_info) + snapshot_context = get_snapshot_context(origin_url=origin_url) for i, visit in enumerate(origin_visits): url_date = format_utc_iso_date(visit["date"], "%Y-%m-%dT%H:%M:%SZ") visit["formatted_date"] = format_utc_iso_date(visit["date"]) query_params = {"origin_url": origin_url, "timestamp": url_date} if i < len(origin_visits) - 1: if visit["date"] == origin_visits[i + 1]["date"]: query_params = {"visit_id": visit["visit"]} if i > 0: if visit["date"] == origin_visits[i - 1]["date"]: query_params = {"visit_id": visit["visit"]} snapshot = visit["snapshot"] if visit["snapshot"] else "" visit["url"] = reverse("browse-origin-directory", query_params=query_params,) if not snapshot: visit["snapshot"] = "" visit["date"] = parse_iso8601_date_to_utc(visit["date"]).timestamp() heading = "Origin visits - %s" % origin_url return render( request, "browse/origin-visits.html", { "heading": heading, "swh_object_name": "Visits", "swh_object_metadata": origin_info, "origin_visits": origin_visits, "origin_info": origin_info, "snapshot_context": snapshot_context, "vault_cooking": None, "show_actions": False, }, ) @browse_route(r"origin/visits/", view_name="browse-origin-visits") def origin_visits_browse(request): """Django view that produces an HTML display of visits reporting for a given origin. The URL that points to it is :http:get:`/browse/origin/visits/`. """ return _origin_visits_browse(request, request.GET.get("origin_url")) @browse_route( r"origin/(?P.+)/visits/", view_name="browse-origin-visits-legacy" ) def origin_visits_browse_legacy(request, origin_url): """Django view that produces an HTML display of visits reporting for a given origin. The URL that points to it is :http:get:`/browse/origin/(origin_url)/visits/`. """ return _origin_visits_browse(request, origin_url) @browse_route(r"origin/", view_name="browse-origin") def origin_browse(request): """Django view that redirects to the display of the latest archived snapshot for a given software origin. """ last_snapshot_url = reverse("browse-origin-directory", query_params=request.GET,) return redirect(last_snapshot_url) @browse_route(r"origin/(?P.+)/", view_name="browse-origin-legacy") def origin_browse_legacy(request, origin_url): """Django view that redirects to the display of the latest archived snapshot for a given software origin. """ last_snapshot_url = reverse( "browse-origin-directory", query_params={"origin_url": origin_url, **request.GET}, ) return redirect(last_snapshot_url) diff --git a/swh/web/browse/views/release.py b/swh/web/browse/views/release.py index 4f7a5b9e..15eb8e76 100644 --- a/swh/web/browse/views/release.py +++ b/swh/web/browse/views/release.py @@ -1,242 +1,239 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import sentry_sdk from django.shortcuts import render from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT from swh.web.browse.browseurls import browse_route from swh.web.browse.snapshot_context import get_snapshot_context from swh.web.browse.utils import ( gen_content_link, gen_directory_link, gen_link, gen_person_mail_link, gen_release_link, gen_revision_link, gen_snapshot_link, ) from swh.web.common import archive -from swh.web.common.exc import NotFoundExc, handle_view_exception +from swh.web.common.exc import NotFoundExc from swh.web.common.identifiers import get_swhids_info from swh.web.common.typing import ReleaseMetadata, SWHObjectInfo from swh.web.common.utils import format_utc_iso_date, reverse @browse_route( r"release/(?P[0-9a-f]+)/", view_name="browse-release", checksum_args=["sha1_git"], ) def release_browse(request, sha1_git): """ Django view that produces an HTML display of a release identified by its id. The url that points to it is :http:get:`/browse/release/(sha1_git)/`. """ - try: - release = archive.lookup_release(sha1_git) - snapshot_context = {} - origin_info = None - snapshot_id = request.GET.get("snapshot_id") - if not snapshot_id: - snapshot_id = request.GET.get("snapshot") - origin_url = request.GET.get("origin_url") - if not origin_url: - origin_url = request.GET.get("origin") - timestamp = request.GET.get("timestamp") - visit_id = request.GET.get("visit_id") - if origin_url: - try: - snapshot_context = get_snapshot_context( - snapshot_id, origin_url, timestamp, visit_id - ) - except NotFoundExc as e: - raw_rel_url = reverse("browse-release", url_args={"sha1_git": sha1_git}) - error_message = ( - "The Software Heritage archive has a release " - "with the hash you provided but the origin " - "mentioned in your request appears broken: %s. " - "Please check the URL and try again.\n\n" - "Nevertheless, you can still browse the release " - "without origin information: %s" - % (gen_link(origin_url), gen_link(raw_rel_url)) - ) - if str(e).startswith("Origin"): - raise NotFoundExc(error_message) - else: - raise e - origin_info = snapshot_context["origin_info"] - elif snapshot_id: - snapshot_context = get_snapshot_context(snapshot_id) - except Exception as exc: - return handle_view_exception(request, exc) + release = archive.lookup_release(sha1_git) + snapshot_context = {} + origin_info = None + snapshot_id = request.GET.get("snapshot_id") + if not snapshot_id: + snapshot_id = request.GET.get("snapshot") + origin_url = request.GET.get("origin_url") + if not origin_url: + origin_url = request.GET.get("origin") + timestamp = request.GET.get("timestamp") + visit_id = request.GET.get("visit_id") + if origin_url: + try: + snapshot_context = get_snapshot_context( + snapshot_id, origin_url, timestamp, visit_id + ) + except NotFoundExc as e: + raw_rel_url = reverse("browse-release", url_args={"sha1_git": sha1_git}) + error_message = ( + "The Software Heritage archive has a release " + "with the hash you provided but the origin " + "mentioned in your request appears broken: %s. " + "Please check the URL and try again.\n\n" + "Nevertheless, you can still browse the release " + "without origin information: %s" + % (gen_link(origin_url), gen_link(raw_rel_url)) + ) + if str(e).startswith("Origin"): + raise NotFoundExc(error_message) + else: + raise e + origin_info = snapshot_context["origin_info"] + elif snapshot_id: + snapshot_context = get_snapshot_context(snapshot_id) target_url = None if release["target_type"] == REVISION: target_url = gen_revision_link(release["target"]) elif release["target_type"] == CONTENT: target_url = gen_content_link(release["target"]) elif release["target_type"] == DIRECTORY: target_url = gen_directory_link(release["target"]) elif release["target_type"] == RELEASE: target_url = gen_release_link(release["target"]) snapshot_id = None browse_snp_link = None if snapshot_context: snapshot_id = snapshot_context["snapshot_id"] browse_snp_link = gen_snapshot_link(snapshot_id) release_metadata = ReleaseMetadata( object_type=RELEASE, object_id=sha1_git, release=sha1_git, release_url=gen_release_link(release["id"]), author=release["author"]["fullname"] if release["author"] else "None", author_url=gen_person_mail_link(release["author"]) if release["author"] else "None", date=format_utc_iso_date(release["date"]), name=release["name"], synthetic=release["synthetic"], target=release["target"], target_type=release["target_type"], target_url=target_url, snapshot=snapshot_context.get("snapshot_id", None), snapshot_url=browse_snp_link, origin_url=origin_url, ) release_note_lines = [] if release["message"]: release_note_lines = release["message"].split("\n") vault_cooking = None rev_directory = None target_link = None if release["target_type"] == REVISION: target_link = gen_revision_link( release["target"], snapshot_context=snapshot_context, link_text=None, link_attrs=None, ) try: revision = archive.lookup_revision(release["target"]) rev_directory = revision["directory"] vault_cooking = { "directory_context": True, "directory_id": rev_directory, "revision_context": True, "revision_id": release["target"], } except Exception as exc: sentry_sdk.capture_exception(exc) elif release["target_type"] == DIRECTORY: target_link = gen_directory_link( release["target"], snapshot_context=snapshot_context, link_text=None, link_attrs=None, ) try: # check directory exists archive.lookup_directory(release["target"]) vault_cooking = { "directory_context": True, "directory_id": release["target"], "revision_context": False, "revision_id": None, } except Exception as exc: sentry_sdk.capture_exception(exc) elif release["target_type"] == CONTENT: target_link = gen_content_link( release["target"], snapshot_context=snapshot_context, link_text=None, link_attrs=None, ) elif release["target_type"] == RELEASE: target_link = gen_release_link( release["target"], snapshot_context=snapshot_context, link_text=None, link_attrs=None, ) rev_directory_url = None if rev_directory is not None: if origin_info: rev_directory_url = reverse( "browse-origin-directory", query_params={ "origin_url": origin_info["url"], "release": release["name"], "snapshot": snapshot_id, }, ) elif snapshot_id: rev_directory_url = reverse( "browse-snapshot-directory", url_args={"snapshot_id": snapshot_id}, query_params={"release": release["name"]}, ) else: rev_directory_url = reverse( "browse-directory", url_args={"sha1_git": rev_directory} ) directory_link = None if rev_directory_url is not None: directory_link = gen_link(rev_directory_url, rev_directory) release["directory_link"] = directory_link release["target_link"] = target_link swh_objects = [SWHObjectInfo(object_type=RELEASE, object_id=sha1_git)] if snapshot_context: snapshot_id = snapshot_context["snapshot_id"] if snapshot_id: swh_objects.append(SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id)) swhids_info = get_swhids_info(swh_objects, snapshot_context) note_header = "None" if len(release_note_lines) > 0: note_header = release_note_lines[0] release["note_header"] = note_header release["note_body"] = "\n".join(release_note_lines[1:]) heading = "Release - %s" % release["name"] if snapshot_context: context_found = "snapshot: %s" % snapshot_context["snapshot_id"] if origin_info: context_found = "origin: %s" % origin_info["url"] heading += " - %s" % context_found return render( request, "browse/release.html", { "heading": heading, "swh_object_id": swhids_info[0]["swhid"], "swh_object_name": "Release", "swh_object_metadata": release_metadata, "release": release, "snapshot_context": snapshot_context, "show_actions": True, "breadcrumbs": None, "vault_cooking": vault_cooking, "top_right_link": None, "swhids_info": swhids_info, }, ) diff --git a/swh/web/browse/views/revision.py b/swh/web/browse/views/revision.py index 493a4198..8216dd38 100644 --- a/swh/web/browse/views/revision.py +++ b/swh/web/browse/views/revision.py @@ -1,599 +1,588 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib import json import textwrap from django.http import JsonResponse from django.shortcuts import render from django.template.defaultfilters import filesizeformat from django.utils.safestring import mark_safe from swh.model.identifiers import CONTENT, DIRECTORY, REVISION, SNAPSHOT, swhid from swh.web.browse.browseurls import browse_route from swh.web.browse.snapshot_context import get_snapshot_context from swh.web.browse.utils import ( content_display_max_size, format_log_entries, gen_directory_link, gen_link, gen_person_mail_link, gen_revision_link, gen_revision_url, gen_snapshot_link, get_directory_entries, get_readme_to_display, get_revision_log_url, prepare_content_for_display, request_content, ) from swh.web.common import archive -from swh.web.common.exc import NotFoundExc, handle_view_exception +from swh.web.common.exc import NotFoundExc from swh.web.common.identifiers import get_swhids_info from swh.web.common.typing import RevisionMetadata, SWHObjectInfo from swh.web.common.utils import ( format_utc_iso_date, gen_path_info, reverse, swh_object_icons, ) def _gen_content_url(revision, query_string, path, snapshot_context): if snapshot_context: query_params = snapshot_context["query_params"] query_params["path"] = path query_params["revision"] = revision["id"] content_url = reverse("browse-origin-content", query_params=query_params) else: content_path = "%s/%s" % (revision["directory"], path) content_url = reverse( "browse-content", url_args={"query_string": query_string}, query_params={"path": content_path}, ) return content_url def _gen_diff_link(idx, diff_anchor, link_text): if idx < _max_displayed_file_diffs: return gen_link(diff_anchor, link_text) else: return link_text # TODO: put in conf _max_displayed_file_diffs = 1000 def _gen_revision_changes_list(revision, changes, snapshot_context): """ Returns a HTML string describing the file changes introduced in a revision. As this string will be displayed in the browse revision view, links to adequate file diffs are also generated. Args: revision (str): hexadecimal representation of a revision identifier changes (list): list of file changes in the revision snapshot_context (dict): optional origin context used to reverse the content urls Returns: A string to insert in a revision HTML view. """ changes_msg = [] for i, change in enumerate(changes): hasher = hashlib.sha1() from_query_string = "" to_query_string = "" diff_id = "diff-" if change["from"]: from_query_string = "sha1_git:" + change["from"]["target"] diff_id += change["from"]["target"] + "-" + change["from_path"] diff_id += "-" if change["to"]: to_query_string = "sha1_git:" + change["to"]["target"] diff_id += change["to"]["target"] + change["to_path"] change["path"] = change["to_path"] or change["from_path"] url_args = { "from_query_string": from_query_string, "to_query_string": to_query_string, } query_params = {"path": change["path"]} change["diff_url"] = reverse( "diff-contents", url_args=url_args, query_params=query_params ) hasher.update(diff_id.encode("utf-8")) diff_id = hasher.hexdigest() change["id"] = diff_id diff_link = "#diff_" + diff_id if change["type"] == "modify": change["content_url"] = _gen_content_url( revision, to_query_string, change["to_path"], snapshot_context ) changes_msg.append( "modified: %s" % _gen_diff_link(i, diff_link, change["to_path"]) ) elif change["type"] == "insert": change["content_url"] = _gen_content_url( revision, to_query_string, change["to_path"], snapshot_context ) changes_msg.append( "new file: %s" % _gen_diff_link(i, diff_link, change["to_path"]) ) elif change["type"] == "delete": parent = archive.lookup_revision(revision["parents"][0]) change["content_url"] = _gen_content_url( parent, from_query_string, change["from_path"], snapshot_context ) changes_msg.append( "deleted: %s" % _gen_diff_link(i, diff_link, change["from_path"]) ) elif change["type"] == "rename": change["content_url"] = _gen_content_url( revision, to_query_string, change["to_path"], snapshot_context ) link_text = change["from_path"] + " → " + change["to_path"] changes_msg.append( "renamed: %s" % _gen_diff_link(i, diff_link, link_text) ) if not changes: changes_msg.append("No changes") return mark_safe("\n".join(changes_msg)) @browse_route( r"revision/(?P[0-9a-f]+)/diff/", view_name="diff-revision", checksum_args=["sha1_git"], ) def _revision_diff(request, sha1_git): """ Browse internal endpoint to compute revision diff """ - try: - revision = archive.lookup_revision(sha1_git) - snapshot_context = None - origin_url = request.GET.get("origin_url", None) - if not origin_url: - origin_url = request.GET.get("origin", None) - timestamp = request.GET.get("timestamp", None) - visit_id = request.GET.get("visit_id", None) - if origin_url: - snapshot_context = get_snapshot_context( - origin_url=origin_url, timestamp=timestamp, visit_id=visit_id - ) - except Exception as exc: - return handle_view_exception(request, exc) + revision = archive.lookup_revision(sha1_git) + snapshot_context = None + origin_url = request.GET.get("origin_url", None) + if not origin_url: + origin_url = request.GET.get("origin", None) + timestamp = request.GET.get("timestamp", None) + visit_id = request.GET.get("visit_id", None) + if origin_url: + snapshot_context = get_snapshot_context( + origin_url=origin_url, timestamp=timestamp, visit_id=visit_id + ) changes = archive.diff_revision(sha1_git) changes_msg = _gen_revision_changes_list(revision, changes, snapshot_context) diff_data = { "total_nb_changes": len(changes), "changes": changes[:_max_displayed_file_diffs], "changes_msg": changes_msg, } return JsonResponse(diff_data) NB_LOG_ENTRIES = 100 @browse_route( r"revision/(?P[0-9a-f]+)/log/", view_name="browse-revision-log", checksum_args=["sha1_git"], ) def revision_log_browse(request, sha1_git): """ Django view that produces an HTML display of the history log for a revision identified by its id. The url that points to it is :http:get:`/browse/revision/(sha1_git)/log/` """ - try: - origin_url = request.GET.get("origin_url") - snapshot_id = request.GET.get("snapshot") - snapshot_context = None - if origin_url or snapshot_id: - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - timestamp=request.GET.get("timestamp"), - visit_id=request.GET.get("visit_id"), - branch_name=request.GET.get("branch"), - release_name=request.GET.get("release"), - revision_id=sha1_git, - ) - per_page = int(request.GET.get("per_page", NB_LOG_ENTRIES)) - offset = int(request.GET.get("offset", 0)) - revs_ordering = request.GET.get("revs_ordering", "committer_date") - session_key = "rev_%s_log_ordering_%s" % (sha1_git, revs_ordering) - rev_log_session = request.session.get(session_key, None) - rev_log = [] - revs_walker_state = None - if rev_log_session: - rev_log = rev_log_session["rev_log"] - revs_walker_state = rev_log_session["revs_walker_state"] - - if len(rev_log) < offset + per_page: - revs_walker = archive.get_revisions_walker( - revs_ordering, - sha1_git, - max_revs=offset + per_page + 1, - state=revs_walker_state, - ) + origin_url = request.GET.get("origin_url") + snapshot_id = request.GET.get("snapshot") + snapshot_context = None + if origin_url or snapshot_id: + snapshot_context = get_snapshot_context( + snapshot_id=snapshot_id, + origin_url=origin_url, + timestamp=request.GET.get("timestamp"), + visit_id=request.GET.get("visit_id"), + branch_name=request.GET.get("branch"), + release_name=request.GET.get("release"), + revision_id=sha1_git, + ) + per_page = int(request.GET.get("per_page", NB_LOG_ENTRIES)) + offset = int(request.GET.get("offset", 0)) + revs_ordering = request.GET.get("revs_ordering", "committer_date") + session_key = "rev_%s_log_ordering_%s" % (sha1_git, revs_ordering) + rev_log_session = request.session.get(session_key, None) + rev_log = [] + revs_walker_state = None + if rev_log_session: + rev_log = rev_log_session["rev_log"] + revs_walker_state = rev_log_session["revs_walker_state"] + + if len(rev_log) < offset + per_page: + revs_walker = archive.get_revisions_walker( + revs_ordering, + sha1_git, + max_revs=offset + per_page + 1, + state=revs_walker_state, + ) - rev_log += [rev["id"] for rev in revs_walker] - revs_walker_state = revs_walker.export_state() + rev_log += [rev["id"] for rev in revs_walker] + revs_walker_state = revs_walker.export_state() - revs = rev_log[offset : offset + per_page] - revision_log = archive.lookup_revision_multiple(revs) + revs = rev_log[offset : offset + per_page] + revision_log = archive.lookup_revision_multiple(revs) - request.session[session_key] = { - "rev_log": rev_log, - "revs_walker_state": revs_walker_state, - } - except Exception as exc: - return handle_view_exception(request, exc) + request.session[session_key] = { + "rev_log": rev_log, + "revs_walker_state": revs_walker_state, + } revs_ordering = request.GET.get("revs_ordering", "") prev_log_url = None if len(rev_log) > offset + per_page: prev_log_url = reverse( "browse-revision-log", url_args={"sha1_git": sha1_git}, query_params={ "per_page": per_page, "offset": offset + per_page, "revs_ordering": revs_ordering or None, }, ) next_log_url = None if offset != 0: next_log_url = reverse( "browse-revision-log", url_args={"sha1_git": sha1_git}, query_params={ "per_page": per_page, "offset": offset - per_page, "revs_ordering": revs_ordering or None, }, ) revision_log_data = format_log_entries(revision_log, per_page) swh_rev_id = swhid("revision", sha1_git) return render( request, "browse/revision-log.html", { "heading": "Revision history", "swh_object_id": swh_rev_id, "swh_object_name": "Revisions history", "swh_object_metadata": None, "revision_log": revision_log_data, "revs_ordering": revs_ordering, "next_log_url": next_log_url, "prev_log_url": prev_log_url, "breadcrumbs": None, "top_right_link": None, "snapshot_context": snapshot_context, "vault_cooking": None, "show_actions": True, "swhids_info": None, }, ) @browse_route( r"revision/(?P[0-9a-f]+)/", view_name="browse-revision", checksum_args=["sha1_git"], ) def revision_browse(request, sha1_git): """ Django view that produces an HTML display of a revision identified by its id. The url that points to it is :http:get:`/browse/revision/(sha1_git)/`. """ - try: - revision = archive.lookup_revision(sha1_git) - origin_info = None - snapshot_context = None - origin_url = request.GET.get("origin_url") - if not origin_url: - origin_url = request.GET.get("origin") - timestamp = request.GET.get("timestamp") - visit_id = request.GET.get("visit_id") - snapshot_id = request.GET.get("snapshot_id") - if not snapshot_id: - snapshot_id = request.GET.get("snapshot") - path = request.GET.get("path") - dir_id = None - dirs, files = None, None - content_data = {} - if origin_url: - try: - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - timestamp=timestamp, - visit_id=visit_id, - branch_name=request.GET.get("branch"), - release_name=request.GET.get("release"), - revision_id=sha1_git, - ) - except NotFoundExc as e: - raw_rev_url = reverse( - "browse-revision", url_args={"sha1_git": sha1_git} - ) - error_message = ( - "The Software Heritage archive has a revision " - "with the hash you provided but the origin " - "mentioned in your request appears broken: %s. " - "Please check the URL and try again.\n\n" - "Nevertheless, you can still browse the revision " - "without origin information: %s" - % (gen_link(origin_url), gen_link(raw_rev_url)) - ) - if str(e).startswith("Origin"): - raise NotFoundExc(error_message) - else: - raise e - origin_info = snapshot_context["origin_info"] - snapshot_id = snapshot_context["snapshot_id"] - elif snapshot_id: - snapshot_context = get_snapshot_context(snapshot_id) - - if path: - file_info = archive.lookup_directory_with_path(revision["directory"], path) - if file_info["type"] == "dir": - dir_id = file_info["target"] + revision = archive.lookup_revision(sha1_git) + origin_info = None + snapshot_context = None + origin_url = request.GET.get("origin_url") + if not origin_url: + origin_url = request.GET.get("origin") + timestamp = request.GET.get("timestamp") + visit_id = request.GET.get("visit_id") + snapshot_id = request.GET.get("snapshot_id") + if not snapshot_id: + snapshot_id = request.GET.get("snapshot") + path = request.GET.get("path") + dir_id = None + dirs, files = None, None + content_data = {} + if origin_url: + try: + snapshot_context = get_snapshot_context( + snapshot_id=snapshot_id, + origin_url=origin_url, + timestamp=timestamp, + visit_id=visit_id, + branch_name=request.GET.get("branch"), + release_name=request.GET.get("release"), + revision_id=sha1_git, + ) + except NotFoundExc as e: + raw_rev_url = reverse("browse-revision", url_args={"sha1_git": sha1_git}) + error_message = ( + "The Software Heritage archive has a revision " + "with the hash you provided but the origin " + "mentioned in your request appears broken: %s. " + "Please check the URL and try again.\n\n" + "Nevertheless, you can still browse the revision " + "without origin information: %s" + % (gen_link(origin_url), gen_link(raw_rev_url)) + ) + if str(e).startswith("Origin"): + raise NotFoundExc(error_message) else: - query_string = "sha1_git:" + file_info["target"] - content_data = request_content(query_string, raise_if_unavailable=False) + raise e + origin_info = snapshot_context["origin_info"] + snapshot_id = snapshot_context["snapshot_id"] + elif snapshot_id: + snapshot_context = get_snapshot_context(snapshot_id) + + if path: + file_info = archive.lookup_directory_with_path(revision["directory"], path) + if file_info["type"] == "dir": + dir_id = file_info["target"] else: - dir_id = revision["directory"] + query_string = "sha1_git:" + file_info["target"] + content_data = request_content(query_string, raise_if_unavailable=False) + else: + dir_id = revision["directory"] - if dir_id: - path = "" if path is None else (path + "/") - dirs, files = get_directory_entries(dir_id) - except Exception as exc: - return handle_view_exception(request, exc) + if dir_id: + path = "" if path is None else (path + "/") + dirs, files = get_directory_entries(dir_id) revision_metadata = RevisionMetadata( object_type=REVISION, object_id=sha1_git, revision=sha1_git, revision_url=gen_revision_link(sha1_git), author=revision["author"]["fullname"] if revision["author"] else "None", author_url=gen_person_mail_link(revision["author"]) if revision["author"] else "None", committer=revision["committer"]["fullname"] if revision["committer"] else "None", committer_url=gen_person_mail_link(revision["committer"]) if revision["committer"] else "None", committer_date=format_utc_iso_date(revision["committer_date"]), date=format_utc_iso_date(revision["date"]), directory=revision["directory"], directory_url=gen_directory_link(revision["directory"]), merge=revision["merge"], metadata=json.dumps( revision["metadata"], sort_keys=True, indent=4, separators=(",", ": ") ), parents=revision["parents"], synthetic=revision["synthetic"], type=revision["type"], snapshot=snapshot_id, snapshot_url=gen_snapshot_link(snapshot_id) if snapshot_id else None, origin_url=origin_url, ) message_lines = ["None"] if revision["message"]: message_lines = revision["message"].split("\n") parents = [] for p in revision["parents"]: parent_url = gen_revision_url(p, snapshot_context) parents.append({"id": p, "url": parent_url}) path_info = gen_path_info(path) query_params = snapshot_context["query_params"] if snapshot_context else {} breadcrumbs = [] breadcrumbs.append( { "name": revision["directory"][:7], "url": reverse( "browse-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ), } ) for pi in path_info: query_params["path"] = pi["path"] breadcrumbs.append( { "name": pi["name"], "url": reverse( "browse-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ), } ) vault_cooking = { "directory_context": False, "directory_id": None, "revision_context": True, "revision_id": sha1_git, } swh_objects = [SWHObjectInfo(object_type=REVISION, object_id=sha1_git)] content = None content_size = None filename = None mimetype = None language = None readme_name = None readme_url = None readme_html = None readmes = {} error_code = 200 error_message = "" error_description = "" extra_context = dict(revision_metadata) extra_context["path"] = f"/{path}" if path else None if content_data: breadcrumbs[-1]["url"] = None content_size = content_data["length"] mimetype = content_data["mimetype"] if content_data["raw_data"]: content_display_data = prepare_content_for_display( content_data["raw_data"], content_data["mimetype"], path ) content = content_display_data["content_data"] language = content_display_data["language"] mimetype = content_display_data["mimetype"] if path: filename = path_info[-1]["name"] query_params["filename"] = filename filepath = "/".join(pi["name"] for pi in path_info[:-1]) extra_context["path"] = f"/{filepath}/" if filepath else "/" extra_context["filename"] = filename top_right_link = { "url": reverse( "browse-content-raw", url_args={"query_string": query_string}, query_params={"filename": filename}, ), "icon": swh_object_icons["content"], "text": "Raw File", } swh_objects.append( SWHObjectInfo(object_type=CONTENT, object_id=file_info["target"]) ) error_code = content_data["error_code"] error_message = content_data["error_message"] error_description = content_data["error_description"] else: for d in dirs: if d["type"] == "rev": d["url"] = reverse( "browse-revision", url_args={"sha1_git": d["target"]} ) else: query_params["path"] = path + d["name"] d["url"] = reverse( "browse-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ) for f in files: query_params["path"] = path + f["name"] f["url"] = reverse( "browse-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ) if f["length"] is not None: f["length"] = filesizeformat(f["length"]) if f["name"].lower().startswith("readme"): readmes[f["name"]] = f["checksums"]["sha1"] readme_name, readme_url, readme_html = get_readme_to_display(readmes) top_right_link = { "url": get_revision_log_url(sha1_git, snapshot_context), "icon": swh_object_icons["revisions history"], "text": "History", } vault_cooking["directory_context"] = True vault_cooking["directory_id"] = dir_id swh_objects.append(SWHObjectInfo(object_type=DIRECTORY, object_id=dir_id)) query_params.pop("path", None) diff_revision_url = reverse( "diff-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ) if snapshot_id: swh_objects.append(SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id)) swhids_info = get_swhids_info(swh_objects, snapshot_context, extra_context) heading = "Revision - %s - %s" % ( sha1_git[:7], textwrap.shorten(message_lines[0], width=70), ) if snapshot_context: context_found = "snapshot: %s" % snapshot_context["snapshot_id"] if origin_info: context_found = "origin: %s" % origin_info["url"] heading += " - %s" % context_found return render( request, "browse/revision.html", { "heading": heading, "swh_object_id": swhids_info[0]["swhid"], "swh_object_name": "Revision", "swh_object_metadata": revision_metadata, "message_header": message_lines[0], "message_body": "\n".join(message_lines[1:]), "parents": parents, "snapshot_context": snapshot_context, "dirs": dirs, "files": files, "content": content, "content_size": content_size, "max_content_size": content_display_max_size, "filename": filename, "encoding": content_data.get("encoding"), "mimetype": mimetype, "language": language, "readme_name": readme_name, "readme_url": readme_url, "readme_html": readme_html, "breadcrumbs": breadcrumbs, "top_right_link": top_right_link, "vault_cooking": vault_cooking, "diff_revision_url": diff_revision_url, "show_actions": True, "swhids_info": swhids_info, "error_code": error_code, "error_message": error_message, "error_description": error_description, }, status=error_code, ) diff --git a/swh/web/common/exc.py b/swh/web/common/exc.py index f3c9550c..a68d6d18 100644 --- a/swh/web/common/exc.py +++ b/swh/web/common/exc.py @@ -1,150 +1,145 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import traceback import sentry_sdk -from django.http import HttpResponse from django.shortcuts import render from django.utils.html import escape from django.utils.safestring import mark_safe from swh.web.config import get_config class BadInputExc(ValueError): """Wrong request to the api. Example: Asking a content with the wrong identifier format. """ pass class NotFoundExc(Exception): """Good request to the api but no result were found. Example: Asking a content with the right identifier format but that content does not exist. """ pass class ForbiddenExc(Exception): """Good request to the api, forbidden result to return due to enforce policy. Example: Asking for a raw content which exists but whose mimetype is not text. """ pass class LargePayloadExc(Exception): """The input size is too large. Example: Asking to resolve 10000 SWHIDs when the limit is 1000. """ pass http_status_code_message = { 400: "Bad Request", 401: "Unauthorized", 403: "Access Denied", 404: "Resource not found", 413: "Payload Too Large", 500: "Internal Server Error", 501: "Not Implemented", 502: "Bad Gateway", 503: "Service unavailable", } def _generate_error_page(request, error_code, error_description): return render( request, "error.html", { "error_code": error_code, "error_message": http_status_code_message[error_code], "error_description": mark_safe(error_description), }, status=error_code, ) def swh_handle400(request, exception=None): """ Custom Django HTTP error 400 handler for swh-web. """ error_description = ( "The server cannot process the request to %s due to " "something that is perceived to be a client error." % escape(request.META["PATH_INFO"]) ) return _generate_error_page(request, 400, error_description) def swh_handle403(request, exception=None): """ Custom Django HTTP error 403 handler for swh-web. """ error_description = "The resource %s requires an authentication." % escape( request.META["PATH_INFO"] ) return _generate_error_page(request, 403, error_description) def swh_handle404(request, exception=None): """ Custom Django HTTP error 404 handler for swh-web. """ error_description = "The resource %s could not be found on the server." % escape( request.META["PATH_INFO"] ) return _generate_error_page(request, 404, error_description) def swh_handle500(request): """ Custom Django HTTP error 500 handler for swh-web. """ error_description = ( "An unexpected condition was encountered when " "requesting resource %s." % escape(request.META["PATH_INFO"]) ) return _generate_error_page(request, 500, error_description) -def handle_view_exception(request, exc, html_response=True): +def handle_view_exception(request, exc): """ Function used to generate an error page when an exception was raised inside a swh-web browse view. """ sentry_sdk.capture_exception(exc) error_code = 500 error_description = "%s: %s" % (type(exc).__name__, str(exc)) if get_config()["debug"]: error_description = traceback.format_exc() if isinstance(exc, BadInputExc): error_code = 400 if isinstance(exc, ForbiddenExc): error_code = 403 if isinstance(exc, NotFoundExc): error_code = 404 - if html_response: - return _generate_error_page(request, error_code, error_description) - else: - return HttpResponse( - error_description, content_type="text/plain", status=error_code - ) + + return _generate_error_page(request, error_code, error_description) diff --git a/swh/web/common/middlewares.py b/swh/web/common/middlewares.py index d3f55bcf..0c589fe0 100644 --- a/swh/web/common/middlewares.py +++ b/swh/web/common/middlewares.py @@ -1,73 +1,91 @@ # Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information + from htmlmin import minify import sentry_sdk +from swh.web.common.exc import handle_view_exception from swh.web.common.utils import prettify_html class HtmlPrettifyMiddleware(object): """ Django middleware for prettifying generated HTML in development mode. """ def __init__(self, get_response): self.get_response = get_response def __call__(self, request): response = self.get_response(request) if "text/html" in response.get("Content-Type", ""): if hasattr(response, "content"): content = response.content response.content = prettify_html(content) elif hasattr(response, "streaming_content"): content = b"".join(response.streaming_content) response.streaming_content = prettify_html(content) return response class HtmlMinifyMiddleware(object): """ Django middleware for minifying generated HTML in production mode. """ def __init__(self, get_response=None): self.get_response = get_response def __call__(self, request): response = self.get_response(request) if "text/html" in response.get("Content-Type", ""): try: minified_html = minify( response.content.decode("utf-8"), convert_charrefs=False ) response.content = minified_html.encode("utf-8") except Exception as exc: sentry_sdk.capture_exception(exc) return response class ThrottlingHeadersMiddleware(object): """ Django middleware for inserting rate limiting related headers in HTTP response. """ def __init__(self, get_response=None): self.get_response = get_response def __call__(self, request): resp = self.get_response(request) if "RateLimit-Limit" in request.META: resp["X-RateLimit-Limit"] = request.META["RateLimit-Limit"] if "RateLimit-Remaining" in request.META: resp["X-RateLimit-Remaining"] = request.META["RateLimit-Remaining"] if "RateLimit-Reset" in request.META: resp["X-RateLimit-Reset"] = request.META["RateLimit-Reset"] return resp + + +class ExceptionMiddleware(object): + """ + Django middleware for handling uncaught exception raised when + processing a view. + """ + + def __init__(self, get_response=None): + self.get_response = get_response + + def __call__(self, request): + return self.get_response(request) + + def process_exception(self, request, exception): + return handle_view_exception(request, exception) diff --git a/swh/web/settings/common.py b/swh/web/settings/common.py index 4a3ba8d1..ed62c0ef 100644 --- a/swh/web/settings/common.py +++ b/swh/web/settings/common.py @@ -1,281 +1,282 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information """ Django common settings for swh-web. """ import os import sys from typing import Any, Dict from swh.web.config import get_config swh_web_config = get_config() # Build paths inside the project like this: os.path.join(BASE_DIR, ...) PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = swh_web_config["secret_key"] # SECURITY WARNING: don't run with debug turned on in production! DEBUG = swh_web_config["debug"] DEBUG_PROPAGATE_EXCEPTIONS = swh_web_config["debug"] ALLOWED_HOSTS = ["127.0.0.1", "localhost"] + swh_web_config["allowed_hosts"] # Application definition INSTALLED_APPS = [ "django.contrib.admin", "django.contrib.auth", "django.contrib.contenttypes", "django.contrib.sessions", "django.contrib.messages", "django.contrib.staticfiles", "rest_framework", "swh.web.common", "swh.web.api", "swh.web.auth", "swh.web.browse", "webpack_loader", "django_js_reverse", "corsheaders", ] MIDDLEWARE = [ "django.middleware.security.SecurityMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", "corsheaders.middleware.CorsMiddleware", "django.middleware.common.CommonMiddleware", "django.middleware.csrf.CsrfViewMiddleware", "django.contrib.auth.middleware.AuthenticationMiddleware", "swh.web.auth.middlewares.OIDCSessionRefreshMiddleware", "django.contrib.messages.middleware.MessageMiddleware", "django.middleware.clickjacking.XFrameOptionsMiddleware", "swh.web.common.middlewares.ThrottlingHeadersMiddleware", + "swh.web.common.middlewares.ExceptionMiddleware", ] # Compress all assets (static ones and dynamically generated html) # served by django in a local development environment context. # In a production environment, assets compression will be directly # handled by web servers like apache or nginx. if swh_web_config["serve_assets"]: MIDDLEWARE.insert(0, "django.middleware.gzip.GZipMiddleware") ROOT_URLCONF = "swh.web.urls" TEMPLATES = [ { "BACKEND": "django.template.backends.django.DjangoTemplates", "DIRS": [os.path.join(PROJECT_DIR, "../templates")], "APP_DIRS": True, "OPTIONS": { "context_processors": [ "django.template.context_processors.debug", "django.template.context_processors.request", "django.contrib.auth.context_processors.auth", "django.contrib.messages.context_processors.messages", "swh.web.common.utils.context_processor", ], "libraries": {"swh_templatetags": "swh.web.common.swh_templatetags",}, }, }, ] DATABASES = { "default": { "ENGINE": "django.db.backends.sqlite3", "NAME": swh_web_config["development_db"], } } # Password validation # https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators AUTH_PASSWORD_VALIDATORS = [ { "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", # noqa }, {"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",}, {"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",}, {"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",}, ] # Internationalization # https://docs.djangoproject.com/en/1.11/topics/i18n/ LANGUAGE_CODE = "en-us" TIME_ZONE = "UTC" USE_I18N = True USE_L10N = True USE_TZ = True # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/1.11/howto/static-files/ STATIC_URL = "/static/" # static folder location when swh-web has been installed with pip STATIC_DIR = os.path.join(sys.prefix, "share/swh/web/static") if not os.path.exists(STATIC_DIR): # static folder location when developping swh-web STATIC_DIR = os.path.join(PROJECT_DIR, "../../../static") STATICFILES_DIRS = [STATIC_DIR] INTERNAL_IPS = ["127.0.0.1"] throttle_rates = {} http_requests = ["GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "PATCH"] throttling = swh_web_config["throttling"] for limiter_scope, limiter_conf in throttling["scopes"].items(): if "default" in limiter_conf["limiter_rate"]: throttle_rates[limiter_scope] = limiter_conf["limiter_rate"]["default"] # for backward compatibility else: throttle_rates[limiter_scope] = limiter_conf["limiter_rate"] # register sub scopes specific for HTTP request types for http_request in http_requests: if http_request in limiter_conf["limiter_rate"]: throttle_rates[limiter_scope + "_" + http_request.lower()] = limiter_conf[ "limiter_rate" ][http_request] REST_FRAMEWORK: Dict[str, Any] = { "DEFAULT_RENDERER_CLASSES": ( "rest_framework.renderers.JSONRenderer", "swh.web.api.renderers.YAMLRenderer", "rest_framework.renderers.TemplateHTMLRenderer", ), "DEFAULT_THROTTLE_CLASSES": ("swh.web.api.throttling.SwhWebRateThrottle",), "DEFAULT_THROTTLE_RATES": throttle_rates, "DEFAULT_AUTHENTICATION_CLASSES": [ "rest_framework.authentication.SessionAuthentication", "swh.web.auth.backends.OIDCBearerTokenAuthentication", ], "EXCEPTION_HANDLER": "swh.web.api.apiresponse.error_response_handler", } LOGGING = { "version": 1, "disable_existing_loggers": False, "filters": { "require_debug_false": {"()": "django.utils.log.RequireDebugFalse",}, "require_debug_true": {"()": "django.utils.log.RequireDebugTrue",}, }, "formatters": { "request": { "format": "[%(asctime)s] [%(levelname)s] %(request)s %(status_code)s", "datefmt": "%d/%b/%Y %H:%M:%S", }, "simple": { "format": "[%(asctime)s] [%(levelname)s] %(message)s", "datefmt": "%d/%b/%Y %H:%M:%S", }, "verbose": { "format": ( "[%(asctime)s] [%(levelname)s] %(name)s.%(funcName)s:%(lineno)s " "- %(message)s" ), "datefmt": "%d/%b/%Y %H:%M:%S", }, }, "handlers": { "console": { "level": "DEBUG", "filters": ["require_debug_true"], "class": "logging.StreamHandler", "formatter": "simple", }, "file": { "level": "WARNING", "filters": ["require_debug_false"], "class": "logging.FileHandler", "filename": os.path.join(swh_web_config["log_dir"], "swh-web.log"), "formatter": "simple", }, "file_request": { "level": "WARNING", "filters": ["require_debug_false"], "class": "logging.FileHandler", "filename": os.path.join(swh_web_config["log_dir"], "swh-web.log"), "formatter": "request", }, "console_verbose": { "level": "DEBUG", "filters": ["require_debug_true"], "class": "logging.StreamHandler", "formatter": "verbose", }, "file_verbose": { "level": "WARNING", "filters": ["require_debug_false"], "class": "logging.FileHandler", "filename": os.path.join(swh_web_config["log_dir"], "swh-web.log"), "formatter": "verbose", }, "null": {"class": "logging.NullHandler",}, }, "loggers": { "": { "handlers": ["console_verbose", "file_verbose"], "level": "DEBUG" if DEBUG else "WARNING", }, "django": { "handlers": ["console"], "level": "DEBUG" if DEBUG else "WARNING", "propagate": False, }, "django.request": { "handlers": ["file_request"], "level": "DEBUG" if DEBUG else "WARNING", "propagate": False, }, "django.db.backends": {"handlers": ["null"], "propagate": False}, "django.utils.autoreload": {"level": "INFO",}, }, } WEBPACK_LOADER = { "DEFAULT": { "CACHE": False, "BUNDLE_DIR_NAME": "./", "STATS_FILE": os.path.join(STATIC_DIR, "webpack-stats.json"), "POLL_INTERVAL": 0.1, "TIMEOUT": None, "IGNORE": [".+\\.hot-update.js", ".+\\.map"], } } LOGIN_URL = "/admin/login/" LOGIN_REDIRECT_URL = "admin" SESSION_ENGINE = "django.contrib.sessions.backends.cache" CACHES = { "default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"}, } JS_REVERSE_JS_MINIFY = False CORS_ORIGIN_ALLOW_ALL = True CORS_URLS_REGEX = r"^/badge/.*$" AUTHENTICATION_BACKENDS = [ "django.contrib.auth.backends.ModelBackend", "swh.web.auth.backends.OIDCAuthorizationCodePKCEBackend", ] diff --git a/swh/web/tests/common/test_middlewares.py b/swh/web/tests/common/test_middlewares.py new file mode 100644 index 00000000..2cca166c --- /dev/null +++ b/swh/web/tests/common/test_middlewares.py @@ -0,0 +1,41 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from hypothesis import given +import pytest + +from django.test import modify_settings + +from swh.web.common.utils import reverse +from swh.web.tests.strategies import snapshot + + +@modify_settings( + MIDDLEWARE={"remove": ["swh.web.common.middlewares.ExceptionMiddleware"]} +) +@given(snapshot()) +def test_exception_middleware_disabled(client, mocker, snapshot): + mock_browse_snapshot_directory = mocker.patch( + "swh.web.browse.views.snapshot.browse_snapshot_directory" + ) + mock_browse_snapshot_directory.side_effect = Exception("Something went wrong") + + url = reverse("browse-snapshot-directory", url_args={"snapshot_id": snapshot}) + + with pytest.raises(Exception, match="Something went wrong"): + client.get(url) + + +@given(snapshot()) +def test_exception_middleware_enabled(client, mocker, snapshot): + mock_browse_snapshot_directory = mocker.patch( + "swh.web.browse.views.snapshot.browse_snapshot_directory" + ) + mock_browse_snapshot_directory.side_effect = Exception("Something went wrong") + + url = reverse("browse-snapshot-directory", url_args={"snapshot_id": snapshot}) + + resp = client.get(url) + assert resp.status_code == 500