diff --git a/swh/web/browse/views/content.py b/swh/web/browse/views/content.py index 909bed29..d7c4babd 100644 --- a/swh/web/browse/views/content.py +++ b/swh/web/browse/views/content.py @@ -1,443 +1,444 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import difflib from distutils.util import strtobool import sentry_sdk from django.http import HttpResponse, JsonResponse from django.shortcuts import redirect, render from swh.model.hashutil import hash_to_hex from swh.model.swhids import ObjectType from swh.web.browse.browseurls import browse_route from swh.web.browse.snapshot_context import get_snapshot_context from swh.web.browse.utils import ( content_display_max_size, gen_link, prepare_content_for_display, request_content, ) from swh.web.common import archive, highlightjs, query from swh.web.common.exc import BadInputExc, NotFoundExc, http_status_code_message from swh.web.common.identifiers import get_swhids_info from swh.web.common.typing import ContentMetadata, SWHObjectInfo from swh.web.common.utils import gen_path_info, reverse, swh_object_icons @browse_route( r"content/(?P<query_string>[0-9a-z_:]*[0-9a-f]+.)/raw/", view_name="browse-content-raw", checksum_args=["query_string"], ) def content_raw(request, query_string): """Django view that produces a raw display of a content identified by its hash value. The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/raw/` """ re_encode = bool(strtobool(request.GET.get("re_encode", "false"))) algo, checksum = query.parse_hash(query_string) checksum = hash_to_hex(checksum) content_data = request_content(query_string, max_size=None, re_encode=re_encode) filename = request.GET.get("filename", None) if not filename: filename = "%s_%s" % (algo, checksum) if ( content_data["mimetype"].startswith("text/") or content_data["mimetype"] == "inode/x-empty" ): response = HttpResponse(content_data["raw_data"], content_type="text/plain") response["Content-disposition"] = "filename=%s" % filename else: response = HttpResponse( content_data["raw_data"], content_type="application/octet-stream" ) response["Content-disposition"] = "attachment; filename=%s" % filename return response _auto_diff_size_limit = 20000 @browse_route( r"content/(?P<from_query_string>.*)/diff/(?P<to_query_string>.*)", view_name="diff-contents", ) def _contents_diff(request, from_query_string, to_query_string): """ Browse endpoint used to compute unified diffs between two contents. Diffs are generated only if the two contents are textual. By default, diffs whose size are greater than 20 kB will not be generated. To force the generation of large diffs, the 'force' boolean query parameter must be used. Args: request: input django http request from_query_string: a string of the form "[ALGO_HASH:]HASH" where optional ALGO_HASH can be either ``sha1``, ``sha1_git``, ``sha256``, or ``blake2s256`` (default to ``sha1``) and HASH the hexadecimal representation of the hash value identifying the first content to_query_string: same as above for identifying the second content Returns: A JSON object containing the unified diff. """ diff_data = {} content_from = None content_to = None content_from_size = 0 content_to_size = 0 content_from_lines = [] content_to_lines = [] force = request.GET.get("force", "false") path = request.GET.get("path", None) language = "nohighlight" force = bool(strtobool(force)) if from_query_string == to_query_string: diff_str = "File renamed without changes" else: try: text_diff = True if from_query_string: content_from = request_content(from_query_string, max_size=None) content_from_display_data = prepare_content_for_display( content_from["raw_data"], content_from["mimetype"], path ) language = content_from_display_data["language"] content_from_size = content_from["length"] if not ( content_from["mimetype"].startswith("text/") or content_from["mimetype"] == "inode/x-empty" ): text_diff = False if text_diff and to_query_string: content_to = request_content(to_query_string, max_size=None) content_to_display_data = prepare_content_for_display( content_to["raw_data"], content_to["mimetype"], path ) language = content_to_display_data["language"] content_to_size = content_to["length"] if not ( content_to["mimetype"].startswith("text/") or content_to["mimetype"] == "inode/x-empty" ): text_diff = False diff_size = abs(content_to_size - content_from_size) if not text_diff: diff_str = "Diffs are not generated for non textual content" language = "nohighlight" elif not force and diff_size > _auto_diff_size_limit: diff_str = "Large diffs are not automatically computed" language = "nohighlight" else: if content_from: content_from_lines = ( content_from["raw_data"].decode("utf-8").splitlines(True) ) if content_from_lines and content_from_lines[-1][-1] != "\n": content_from_lines[-1] += "[swh-no-nl-marker]\n" if content_to: content_to_lines = ( content_to["raw_data"].decode("utf-8").splitlines(True) ) if content_to_lines and content_to_lines[-1][-1] != "\n": content_to_lines[-1] += "[swh-no-nl-marker]\n" diff_lines = difflib.unified_diff(content_from_lines, content_to_lines) diff_str = "".join(list(diff_lines)[2:]) except Exception as exc: sentry_sdk.capture_exception(exc) diff_str = str(exc) diff_data["diff_str"] = diff_str diff_data["language"] = language return JsonResponse(diff_data) def _get_content_from_request(request): path = request.GET.get("path") if path is None: raise BadInputExc("The path query parameter must be provided.") snapshot = request.GET.get("snapshot") or request.GET.get("snapshot_id") origin_url = request.GET.get("origin_url") if snapshot is None and origin_url is None: raise BadInputExc( "The origin_url or snapshot query parameters must be provided." ) snapshot_context = get_snapshot_context( snapshot_id=snapshot, origin_url=origin_url, path=path, timestamp=request.GET.get("timestamp"), visit_id=request.GET.get("visit_id"), branch_name=request.GET.get("branch"), release_name=request.GET.get("release"), browse_context="content", ) root_directory = snapshot_context["root_directory"] return archive.lookup_directory_with_path(root_directory, path) @browse_route( r"content/(?P<query_string>[0-9a-z_:]*[0-9a-f]+.)/", r"content/", view_name="browse-content", checksum_args=["query_string"], ) def content_display(request, query_string=None): """Django view that produces an HTML display of a content identified by its hash value. The URLs that points to it are :http:get:`/browse/content/[(algo_hash):](hash)/` :http:get:`/browse/content/` """ if query_string is None: # this case happens when redirected from origin/content or snapshot/content content = _get_content_from_request(request) return redirect( reverse( "browse-content", url_args={"query_string": f"sha1_git:{content['target']}"}, query_params=request.GET, ), ) algo, checksum = query.parse_hash(query_string) checksum = hash_to_hex(checksum) origin_url = request.GET.get("origin_url") selected_language = request.GET.get("language") if not origin_url: origin_url = request.GET.get("origin") snapshot_id = request.GET.get("snapshot") or request.GET.get("snapshot_id") path = request.GET.get("path") content_data = {} error_info = {"status_code": 200, "description": None} try: content_data = request_content(query_string) except NotFoundExc as e: error_info["status_code"] = 404 error_info["description"] = f"NotFoundExc: {str(e)}" snapshot_context = None if origin_url is not None or snapshot_id is not None: try: snapshot_context = get_snapshot_context( origin_url=origin_url, snapshot_id=snapshot_id, timestamp=request.GET.get("timestamp"), visit_id=request.GET.get("visit_id"), branch_name=request.GET.get("branch"), release_name=request.GET.get("release"), revision_id=request.GET.get("revision"), path=path, browse_context="content", ) except NotFoundExc as e: if str(e).startswith("Origin"): raw_cnt_url = reverse( "browse-content", url_args={"query_string": query_string} ) error_message = ( "The Software Heritage archive has a content " "with the hash you provided but the origin " "mentioned in your request appears broken: %s. " "Please check the URL and try again.\n\n" "Nevertheless, you can still browse the content " "without origin information: %s" % (gen_link(origin_url), gen_link(raw_cnt_url)) ) raise NotFoundExc(error_message) else: raise e content = None language = None mimetype = None if content_data.get("raw_data") is not None: content_display_data = prepare_content_for_display( content_data["raw_data"], content_data["mimetype"], path ) content = content_display_data["content_data"] language = content_display_data["language"] mimetype = content_display_data["mimetype"] # Override language with user-selected language if selected_language is not None: language = selected_language available_languages = None if mimetype and "text/" in mimetype: available_languages = highlightjs.get_supported_languages() filename = None path_info = None directory_id = None root_dir = None if snapshot_context: root_dir = snapshot_context.get("root_directory") query_params = snapshot_context["query_params"] if snapshot_context else {} breadcrumbs = [] if path: split_path = path.split("/") root_dir = root_dir or split_path[0] filename = split_path[-1] if root_dir != path: path = path.replace(root_dir + "/", "") path = path[: -len(filename)] path_info = gen_path_info(path) query_params.pop("path", None) dir_url = reverse( "browse-directory", url_args={"sha1_git": root_dir}, query_params=query_params, ) breadcrumbs.append({"name": root_dir[:7], "url": dir_url}) for pi in path_info: query_params["path"] = pi["path"] dir_url = reverse( "browse-directory", url_args={"sha1_git": root_dir}, query_params=query_params, ) breadcrumbs.append({"name": pi["name"], "url": dir_url}) breadcrumbs.append({"name": filename, "url": None}) if path and root_dir != path: dir_info = archive.lookup_directory_with_path(root_dir, path) directory_id = dir_info["target"] elif root_dir != path: directory_id = root_dir else: root_dir = None query_params = {"filename": filename} content_checksums = content_data.get("checksums", {}) content_url = reverse("browse-content", url_args={"query_string": query_string},) content_raw_url = reverse( "browse-content-raw", url_args={"query_string": query_string}, query_params=query_params, ) content_metadata = ContentMetadata( object_type=ObjectType.CONTENT, object_id=content_checksums.get("sha1_git"), sha1=content_checksums.get("sha1"), sha1_git=content_checksums.get("sha1_git"), sha256=content_checksums.get("sha256"), blake2s256=content_checksums.get("blake2s256"), content_url=content_url, mimetype=content_data.get("mimetype"), encoding=content_data.get("encoding"), size=content_data.get("length", 0), language=content_data.get("language"), root_directory=root_dir, path=f"/{path}" if path else None, filename=filename or "", directory=directory_id, revision=None, release=None, snapshot=None, origin_url=origin_url, ) swh_objects = [] if content_checksums: swh_objects.append( SWHObjectInfo( object_type=ObjectType.CONTENT, object_id=content_checksums.get("sha1_git"), ) ) if directory_id: swh_objects.append( SWHObjectInfo(object_type=ObjectType.DIRECTORY, object_id=directory_id) ) if snapshot_context: - swh_objects.append( - SWHObjectInfo( - object_type=ObjectType.REVISION, - object_id=snapshot_context["revision_id"], + if snapshot_context["revision_id"]: + swh_objects.append( + SWHObjectInfo( + object_type=ObjectType.REVISION, + object_id=snapshot_context["revision_id"], + ) ) - ) swh_objects.append( SWHObjectInfo( object_type=ObjectType.SNAPSHOT, object_id=snapshot_context["snapshot_id"], ) ) if snapshot_context["release_id"]: swh_objects.append( SWHObjectInfo( object_type=ObjectType.RELEASE, object_id=snapshot_context["release_id"], ) ) swhids_info = get_swhids_info( swh_objects, snapshot_context, extra_context=content_metadata, ) heading = "Content - %s" % content_checksums.get("sha1_git") if breadcrumbs: content_path = "/".join([bc["name"] for bc in breadcrumbs]) heading += " - %s" % content_path return render( request, "browse/content.html", { "heading": heading, "swh_object_id": swhids_info[0]["swhid"] if swhids_info else "", "swh_object_name": "Content", "swh_object_metadata": content_metadata, "content": content, "content_size": content_data.get("length"), "max_content_size": content_display_max_size, "filename": filename, "encoding": content_data.get("encoding"), "mimetype": mimetype, "language": language, "available_languages": available_languages, "breadcrumbs": breadcrumbs, "top_right_link": { "url": content_raw_url, "icon": swh_object_icons["content"], "text": "Raw File", }, "snapshot_context": snapshot_context, "vault_cooking": None, "show_actions": True, "swhids_info": swhids_info, "error_code": error_info["status_code"], "error_message": http_status_code_message.get(error_info["status_code"]), "error_description": error_info["description"], }, status=error_info["status_code"], ) diff --git a/swh/web/browse/views/directory.py b/swh/web/browse/views/directory.py index 7b1f8aae..3447f67a 100644 --- a/swh/web/browse/views/directory.py +++ b/swh/web/browse/views/directory.py @@ -1,285 +1,286 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import os import sentry_sdk from django.http import HttpResponse from django.shortcuts import redirect, render from swh.model.swhids import ObjectType from swh.web.browse.browseurls import browse_route from swh.web.browse.snapshot_context import get_snapshot_context from swh.web.browse.utils import gen_link, get_directory_entries, get_readme_to_display from swh.web.common import archive from swh.web.common.exc import NotFoundExc, http_status_code_message from swh.web.common.identifiers import get_swhids_info from swh.web.common.typing import DirectoryMetadata, SWHObjectInfo from swh.web.common.utils import gen_path_info, reverse, swh_object_icons def _directory_browse(request, sha1_git, path=None): root_sha1_git = sha1_git error_info = {"status_code": 200, "description": None} if path: try: dir_info = archive.lookup_directory_with_path(sha1_git, path) sha1_git = dir_info["target"] except NotFoundExc as e: error_info["status_code"] = 404 error_info["description"] = f"NotFoundExc: {str(e)}" sha1_git = None dirs, files = [], [] if sha1_git is not None: dirs, files = get_directory_entries(sha1_git) origin_url = request.GET.get("origin_url") if not origin_url: origin_url = request.GET.get("origin") snapshot_id = request.GET.get("snapshot") snapshot_context = None if origin_url is not None or snapshot_id is not None: try: snapshot_context = get_snapshot_context( snapshot_id=snapshot_id, origin_url=origin_url, branch_name=request.GET.get("branch"), release_name=request.GET.get("release"), revision_id=request.GET.get("revision"), path=path, ) except NotFoundExc as e: if str(e).startswith("Origin"): raw_dir_url = reverse( "browse-directory", url_args={"sha1_git": sha1_git} ) error_message = ( "The Software Heritage archive has a directory " "with the hash you provided but the origin " "mentioned in your request appears broken: %s. " "Please check the URL and try again.\n\n" "Nevertheless, you can still browse the directory " "without origin information: %s" % (gen_link(origin_url), gen_link(raw_dir_url)) ) raise NotFoundExc(error_message) else: raise e path_info = gen_path_info(path) query_params = snapshot_context["query_params"] if snapshot_context else {} breadcrumbs = [] breadcrumbs.append( { "name": root_sha1_git[:7], "url": reverse( "browse-directory", url_args={"sha1_git": root_sha1_git}, query_params={**query_params, "path": None}, ), } ) for pi in path_info: breadcrumbs.append( { "name": pi["name"], "url": reverse( "browse-directory", url_args={"sha1_git": root_sha1_git}, query_params={**query_params, "path": pi["path"],}, ), } ) path = "" if path is None else (path + "/") for d in dirs: if d["type"] == "rev": d["url"] = reverse( "browse-revision", url_args={"sha1_git": d["target"]}, query_params=query_params, ) else: d["url"] = reverse( "browse-directory", url_args={"sha1_git": root_sha1_git}, query_params={**query_params, "path": path + d["name"],}, ) sum_file_sizes = 0 readmes = {} for f in files: query_string = "sha1_git:" + f["target"] f["url"] = reverse( "browse-content", url_args={"query_string": query_string}, query_params={ **query_params, "path": root_sha1_git + "/" + path + f["name"], }, ) if f["length"] is not None: sum_file_sizes += f["length"] if f["name"].lower().startswith("readme"): readmes[f["name"]] = f["checksums"]["sha1"] readme_name, readme_url, readme_html = get_readme_to_display(readmes) dir_metadata = DirectoryMetadata( object_type=ObjectType.DIRECTORY, object_id=sha1_git, directory=root_sha1_git, nb_files=len(files), nb_dirs=len(dirs), sum_file_sizes=sum_file_sizes, root_directory=root_sha1_git, path=f"/{path}" if path else None, revision=None, revision_found=None, release=None, snapshot=None, ) vault_cooking = { "directory_context": True, "directory_swhid": f"swh:1:dir:{sha1_git}", "revision_context": False, "revision_swhid": None, } swh_objects = [SWHObjectInfo(object_type=ObjectType.DIRECTORY, object_id=sha1_git)] if snapshot_context: - swh_objects.append( - SWHObjectInfo( - object_type=ObjectType.REVISION, - object_id=snapshot_context["revision_id"], + if snapshot_context["revision_id"]: + swh_objects.append( + SWHObjectInfo( + object_type=ObjectType.REVISION, + object_id=snapshot_context["revision_id"], + ) ) - ) swh_objects.append( SWHObjectInfo( object_type=ObjectType.SNAPSHOT, object_id=snapshot_context["snapshot_id"], ) ) if snapshot_context["release_id"]: swh_objects.append( SWHObjectInfo( object_type=ObjectType.RELEASE, object_id=snapshot_context["release_id"], ) ) swhids_info = get_swhids_info(swh_objects, snapshot_context, dir_metadata) heading = "Directory - %s" % sha1_git if breadcrumbs: dir_path = "/".join([bc["name"] for bc in breadcrumbs]) + "/" heading += " - %s" % dir_path top_right_link = None if ( snapshot_context is not None and not snapshot_context["is_empty"] and snapshot_context["revision_id"] is not None ): history_url = reverse( "browse-revision-log", url_args={"sha1_git": snapshot_context["revision_id"]}, query_params=query_params, ) top_right_link = { "url": history_url, "icon": swh_object_icons["revisions history"], "text": "History", } return render( request, "browse/directory.html", { "heading": heading, "swh_object_id": swhids_info[0]["swhid"], "swh_object_name": "Directory", "swh_object_metadata": dir_metadata, "dirs": dirs, "files": files, "breadcrumbs": breadcrumbs, "top_right_link": top_right_link, "readme_name": readme_name, "readme_url": readme_url, "readme_html": readme_html, "snapshot_context": snapshot_context, "vault_cooking": vault_cooking, "show_actions": True, "swhids_info": swhids_info, "error_code": error_info["status_code"], "error_message": http_status_code_message.get(error_info["status_code"]), "error_description": error_info["description"], }, status=error_info["status_code"], ) @browse_route( r"directory/(?P<sha1_git>[0-9a-f]+)/", view_name="browse-directory", checksum_args=["sha1_git"], ) def directory_browse(request, sha1_git): """Django view for browsing the content of a directory identified by its sha1_git value. The url that points to it is :http:get:`/browse/directory/(sha1_git)/` """ return _directory_browse(request, sha1_git, request.GET.get("path")) @browse_route( r"directory/(?P<sha1_git>[0-9a-f]+)/(?P<path>.+)/", view_name="browse-directory-legacy", checksum_args=["sha1_git"], ) def directory_browse_legacy(request, sha1_git, path): """Django view for browsing the content of a directory identified by its sha1_git value. The url that points to it is :http:get:`/browse/directory/(sha1_git)/(path)/` """ return _directory_browse(request, sha1_git, path) @browse_route( r"directory/resolve/content-path/(?P<sha1_git>[0-9a-f]+)/", view_name="browse-directory-resolve-content-path", checksum_args=["sha1_git"], ) def _directory_resolve_content_path(request, sha1_git): """ Internal endpoint redirecting to data url for a specific file path relative to a root directory. """ try: path = os.path.normpath(request.GET.get("path")) if not path.startswith("../"): dir_info = archive.lookup_directory_with_path(sha1_git, path) if dir_info["type"] == "file": sha1 = dir_info["checksums"]["sha1"] data_url = reverse( "browse-content-raw", url_args={"query_string": sha1} ) return redirect(data_url) except Exception as exc: sentry_sdk.capture_exception(exc) return HttpResponse(status=404) diff --git a/swh/web/tests/browse/views/test_content.py b/swh/web/tests/browse/views/test_content.py index a662d7f1..b0a53a18 100644 --- a/swh/web/tests/browse/views/test_content.py +++ b/swh/web/tests/browse/views/test_content.py @@ -1,1021 +1,1074 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import random import re import pytest from django.utils.html import escape +from swh.model.hashutil import hash_to_bytes +from swh.model.model import ObjectType as ModelObjectType +from swh.model.model import Release, Snapshot, SnapshotBranch, TargetType from swh.model.swhids import ObjectType from swh.web.browse.snapshot_context import process_snapshot_branches from swh.web.browse.utils import ( _re_encode_content, get_mimetype_and_encoding_for_content, prepare_content_for_display, ) from swh.web.common.exc import NotFoundExc from swh.web.common.identifiers import gen_swhid from swh.web.common.utils import ( format_utc_iso_date, gen_path_info, parse_iso8601_date_to_utc, reverse, ) from swh.web.tests.data import get_content from swh.web.tests.django_asserts import assert_contains, assert_not_contains from swh.web.tests.utils import check_html_get_response, check_http_get_response def test_content_view_text(client, archive_data, content_text): sha1_git = content_text["sha1_git"] url = reverse( "browse-content", url_args={"query_string": content_text["sha1"]}, query_params={"path": content_text["path"]}, ) url_raw = reverse( "browse-content-raw", url_args={"query_string": content_text["sha1"]} ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/content.html" ) content_display = _process_content_for_display(archive_data, content_text) mimetype = content_display["mimetype"] if mimetype.startswith("text/"): assert_contains(resp, '<code class="%s">' % content_display["language"]) assert_contains(resp, escape(content_display["content_data"])) assert_contains(resp, url_raw) swh_cnt_id = gen_swhid(ObjectType.CONTENT, sha1_git) swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id}) assert_contains(resp, swh_cnt_id) assert_contains(resp, swh_cnt_id_url) assert_not_contains(resp, "swh-metadata-popover") def test_content_view_no_highlight( client, archive_data, content_application_no_highlight, content_text_no_highlight ): for content_ in (content_application_no_highlight, content_text_no_highlight): content = content_ sha1_git = content["sha1_git"] url = reverse("browse-content", url_args={"query_string": content["sha1"]}) url_raw = reverse( "browse-content-raw", url_args={"query_string": content["sha1"]} ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/content.html" ) content_display = _process_content_for_display(archive_data, content) assert_contains(resp, '<code class="nohighlight">') assert_contains(resp, escape(content_display["content_data"])) assert_contains(resp, url_raw) swh_cnt_id = gen_swhid(ObjectType.CONTENT, sha1_git) swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id}) assert_contains(resp, swh_cnt_id) assert_contains(resp, swh_cnt_id_url) def test_content_view_no_utf8_text(client, archive_data, content_text_non_utf8): sha1_git = content_text_non_utf8["sha1_git"] url = reverse( "browse-content", url_args={"query_string": content_text_non_utf8["sha1"]} ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/content.html" ) content_display = _process_content_for_display(archive_data, content_text_non_utf8) swh_cnt_id = gen_swhid(ObjectType.CONTENT, sha1_git) swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id}) assert_contains(resp, swh_cnt_id_url) assert_contains(resp, escape(content_display["content_data"])) def test_content_view_image(client, archive_data, content_image_type): url = reverse( "browse-content", url_args={"query_string": content_image_type["sha1"]} ) url_raw = reverse( "browse-content-raw", url_args={"query_string": content_image_type["sha1"]} ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/content.html" ) content_display = _process_content_for_display(archive_data, content_image_type) mimetype = content_display["mimetype"] content_data = content_display["content_data"] assert_contains(resp, '<img src="data:%s;base64,%s"/>' % (mimetype, content_data)) assert_contains(resp, url_raw) def test_content_view_image_no_rendering( client, archive_data, content_unsupported_image_type_rendering ): url = reverse( "browse-content", url_args={"query_string": content_unsupported_image_type_rendering["sha1"]}, ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/content.html" ) mimetype = content_unsupported_image_type_rendering["mimetype"] encoding = content_unsupported_image_type_rendering["encoding"] assert_contains( resp, ( f"Content with mime type {mimetype} and encoding {encoding} " "cannot be displayed." ), ) def test_content_view_text_with_path(client, archive_data, content_text): path = content_text["path"] url = reverse( "browse-content", url_args={"query_string": content_text["sha1"]}, query_params={"path": path}, ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/content.html" ) assert_contains(resp, '<nav class="bread-crumbs') content_display = _process_content_for_display(archive_data, content_text) mimetype = content_display["mimetype"] if mimetype.startswith("text/"): hljs_language = content_text["hljs_language"] assert_contains(resp, '<code class="%s">' % hljs_language) assert_contains(resp, escape(content_display["content_data"])) split_path = path.split("/") root_dir_sha1 = split_path[0] filename = split_path[-1] path = path.replace(root_dir_sha1 + "/", "").replace(filename, "") swhid_context = { "anchor": gen_swhid(ObjectType.DIRECTORY, root_dir_sha1), "path": f"/{path}{filename}", } swh_cnt_id = gen_swhid( ObjectType.CONTENT, content_text["sha1_git"], metadata=swhid_context ) swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id}) assert_contains(resp, swh_cnt_id) assert_contains(resp, swh_cnt_id_url) path_info = gen_path_info(path) root_dir_url = reverse("browse-directory", url_args={"sha1_git": root_dir_sha1}) assert_contains(resp, '<li class="swh-path">', count=len(path_info) + 1) assert_contains( resp, '<a href="' + root_dir_url + '">' + root_dir_sha1[:7] + "</a>" ) for p in path_info: dir_url = reverse( "browse-directory", url_args={"sha1_git": root_dir_sha1}, query_params={"path": p["path"]}, ) assert_contains(resp, '<a href="' + dir_url + '">' + p["name"] + "</a>") assert_contains(resp, "<li>" + filename + "</li>") url_raw = reverse( "browse-content-raw", url_args={"query_string": content_text["sha1"]}, query_params={"filename": filename}, ) assert_contains(resp, url_raw) url = reverse( "browse-content", url_args={"query_string": content_text["sha1"]}, query_params={"path": filename}, ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/content.html" ) assert_not_contains(resp, '<nav class="bread-crumbs') invalid_path = "%s/foo/bar/baz" % root_dir_sha1 url = reverse( "browse-content", url_args={"query_string": content_text["sha1"]}, query_params={"path": invalid_path}, ) resp = check_html_get_response( client, url, status_code=404, template_used="error.html" ) def test_content_raw_text(client, archive_data, content_text): url = reverse("browse-content-raw", url_args={"query_string": content_text["sha1"]}) resp = check_http_get_response( client, url, status_code=200, content_type="text/plain" ) content_data = archive_data.content_get_data(content_text["sha1"])["data"] assert resp["Content-Type"] == "text/plain" assert resp["Content-disposition"] == ( "filename=%s_%s" % ("sha1", content_text["sha1"]) ) assert resp.content == content_data filename = content_text["path"].split("/")[-1] url = reverse( "browse-content-raw", url_args={"query_string": content_text["sha1"]}, query_params={"filename": filename}, ) resp = check_http_get_response( client, url, status_code=200, content_type="text/plain" ) assert resp["Content-Type"] == "text/plain" assert resp["Content-disposition"] == "filename=%s" % filename assert resp.content == content_data def test_content_raw_no_utf8_text(client, content_text_non_utf8): url = reverse( "browse-content-raw", url_args={"query_string": content_text_non_utf8["sha1"]} ) resp = check_http_get_response( client, url, status_code=200, content_type="text/plain" ) _, encoding = get_mimetype_and_encoding_for_content(resp.content) assert encoding == content_text_non_utf8["encoding"] def test_content_raw_bin(client, archive_data, content_image_type): url = reverse( "browse-content-raw", url_args={"query_string": content_image_type["sha1"]} ) resp = check_http_get_response( client, url, status_code=200, content_type="application/octet-stream" ) filename = content_image_type["path"].split("/")[-1] content_data = archive_data.content_get_data(content_image_type["sha1"])["data"] assert resp["Content-Type"] == "application/octet-stream" assert resp["Content-disposition"] == "attachment; filename=%s_%s" % ( "sha1", content_image_type["sha1"], ) assert resp.content == content_data url = reverse( "browse-content-raw", url_args={"query_string": content_image_type["sha1"]}, query_params={"filename": filename}, ) resp = check_http_get_response( client, url, status_code=200, content_type="application/octet-stream" ) assert resp["Content-Type"] == "application/octet-stream" assert resp["Content-disposition"] == "attachment; filename=%s" % filename assert resp.content == content_data @pytest.mark.django_db @pytest.mark.parametrize("staff_user_logged_in", [False, True]) def test_content_request_errors( client, staff_user, invalid_sha1, unknown_content, staff_user_logged_in ): if staff_user_logged_in: client.force_login(staff_user) url = reverse("browse-content", url_args={"query_string": invalid_sha1}) check_html_get_response(client, url, status_code=400, template_used="error.html") url = reverse("browse-content", url_args={"query_string": unknown_content["sha1"]}) check_html_get_response( client, url, status_code=404, template_used="browse/content.html" ) def test_content_bytes_missing(client, archive_data, mocker, content): mock_archive = mocker.patch("swh.web.browse.utils.archive") content_data = archive_data.content_get(content["sha1"]) mock_archive.lookup_content.return_value = content_data mock_archive.lookup_content_filetype.side_effect = Exception() mock_archive.lookup_content_raw.side_effect = NotFoundExc( "Content bytes not available!" ) url = reverse("browse-content", url_args={"query_string": content["sha1"]}) check_html_get_response( client, url, status_code=404, template_used="browse/content.html" ) def test_content_too_large(client, mocker): mock_request_content = mocker.patch("swh.web.browse.views.content.request_content") stub_content_too_large_data = { "checksums": { "sha1": "8624bcdae55baeef00cd11d5dfcfa60f68710a02", "sha1_git": "94a9ed024d3859793618152ea559a168bbcbb5e2", "sha256": ( "8ceb4b9ee5adedde47b31e975c1d90c73ad27b6b16" "5a1dcd80c7c545eb65b903" ), "blake2s256": ( "38702b7168c7785bfe748b51b45d9856070ba90" "f9dc6d90f2ea75d4356411ffe" ), }, "length": 30000000, "raw_data": None, "mimetype": "text/plain", "encoding": "us-ascii", "language": "not detected", "licenses": "GPL", "error_code": 200, "error_message": "", "error_description": "", } content_sha1 = stub_content_too_large_data["checksums"]["sha1"] mock_request_content.return_value = stub_content_too_large_data url = reverse("browse-content", url_args={"query_string": content_sha1}) url_raw = reverse("browse-content-raw", url_args={"query_string": content_sha1}) resp = check_html_get_response( client, url, status_code=200, template_used="browse/content.html" ) assert_contains(resp, "Content is too large to be displayed") assert_contains(resp, url_raw) def test_content_uppercase(client, content): url = reverse( "browse-content-uppercase-checksum", url_args={"query_string": content["sha1"].upper()}, ) resp = check_html_get_response(client, url, status_code=302) redirect_url = reverse("browse-content", url_args={"query_string": content["sha1"]}) assert resp["location"] == redirect_url def test_content_utf8_detected_as_binary_display( client, archive_data, content_utf8_detected_as_binary ): url = reverse( "browse-content", url_args={"query_string": content_utf8_detected_as_binary["sha1"]}, ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/content.html" ) content_display = _process_content_for_display( archive_data, content_utf8_detected_as_binary ) assert_contains(resp, escape(content_display["content_data"])) def test_content_origin_snapshot_branch_browse( client, archive_data, origin_with_multiple_visits ): origin_url = origin_with_multiple_visits["url"] visits = archive_data.origin_visit_get(origin_url) visit = random.choice(visits) snapshot = archive_data.snapshot_get(visit["snapshot"]) snapshot_sizes = archive_data.snapshot_count_branches(visit["snapshot"]) branches, releases, _ = process_snapshot_branches(snapshot) branch_info = random.choice(branches) directory = archive_data.revision_get(branch_info["revision"])["directory"] directory_content = archive_data.directory_ls(directory) directory_file = random.choice( [e for e in directory_content if e["type"] == "file"] ) url = reverse( "browse-content", url_args={"query_string": directory_file["checksums"]["sha1"]}, query_params={ "origin_url": origin_with_multiple_visits["url"], "snapshot": snapshot["id"], "branch": branch_info["name"], "path": directory_file["name"], }, ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/content.html" ) _check_origin_snapshot_related_html( resp, origin_with_multiple_visits, snapshot, snapshot_sizes, branches, releases ) assert_contains(resp, directory_file["name"]) assert_contains(resp, f"Branch: <strong>{branch_info['name']}</strong>") cnt_swhid = gen_swhid( ObjectType.CONTENT, directory_file["checksums"]["sha1_git"], metadata={ "origin": origin_url, "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]), "anchor": gen_swhid(ObjectType.REVISION, branch_info["revision"]), "path": f"/{directory_file['name']}", }, ) assert_contains(resp, cnt_swhid) dir_swhid = gen_swhid( ObjectType.DIRECTORY, directory, metadata={ "origin": origin_url, "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]), "anchor": gen_swhid(ObjectType.REVISION, branch_info["revision"]), }, ) assert_contains(resp, dir_swhid) rev_swhid = gen_swhid( ObjectType.REVISION, branch_info["revision"], metadata={ "origin": origin_url, "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]), }, ) assert_contains(resp, rev_swhid) snp_swhid = gen_swhid( ObjectType.SNAPSHOT, snapshot["id"], metadata={"origin": origin_url,}, ) assert_contains(resp, snp_swhid) def test_content_origin_snapshot_release_browse( client, archive_data, origin_with_multiple_visits ): origin_url = origin_with_multiple_visits["url"] visits = archive_data.origin_visit_get(origin_url) visit = random.choice(visits) snapshot = archive_data.snapshot_get(visit["snapshot"]) snapshot_sizes = archive_data.snapshot_count_branches(visit["snapshot"]) branches, releases, _ = process_snapshot_branches(snapshot) release_info = random.choice(releases) directory_content = archive_data.directory_ls(release_info["directory"]) directory_file = random.choice( [e for e in directory_content if e["type"] == "file"] ) url = reverse( "browse-content", url_args={"query_string": directory_file["checksums"]["sha1"]}, query_params={ "origin_url": origin_url, "snapshot": snapshot["id"], "release": release_info["name"], "path": directory_file["name"], }, ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/content.html" ) _check_origin_snapshot_related_html( resp, origin_with_multiple_visits, snapshot, snapshot_sizes, branches, releases ) assert_contains(resp, directory_file["name"]) assert_contains(resp, f"Release: <strong>{release_info['name']}</strong>") cnt_swhid = gen_swhid( ObjectType.CONTENT, directory_file["checksums"]["sha1_git"], metadata={ "origin": origin_url, "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]), "anchor": gen_swhid(ObjectType.RELEASE, release_info["id"]), "path": f"/{directory_file['name']}", }, ) assert_contains(resp, cnt_swhid) dir_swhid = gen_swhid( ObjectType.DIRECTORY, release_info["directory"], metadata={ "origin": origin_url, "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]), "anchor": gen_swhid(ObjectType.RELEASE, release_info["id"]), }, ) assert_contains(resp, dir_swhid) rev_swhid = gen_swhid( ObjectType.REVISION, release_info["target"], metadata={ "origin": origin_url, "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]), }, ) assert_contains(resp, rev_swhid) rel_swhid = gen_swhid( ObjectType.RELEASE, release_info["id"], metadata={ "origin": origin_url, "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]), }, ) assert_contains(resp, rel_swhid) snp_swhid = gen_swhid( ObjectType.SNAPSHOT, snapshot["id"], metadata={"origin": origin_url,}, ) assert_contains(resp, snp_swhid) def _check_origin_snapshot_related_html( resp, origin, snapshot, snapshot_sizes, branches, releases ): browse_origin_url = reverse( "browse-origin", query_params={"origin_url": origin["url"]} ) assert_contains(resp, f'href="{browse_origin_url}"') origin_branches_url = reverse( "browse-origin-branches", query_params={"origin_url": origin["url"], "snapshot": snapshot["id"]}, ) assert_contains(resp, f'href="{escape(origin_branches_url)}"') assert_contains(resp, f"Branches ({snapshot_sizes['revision']})") origin_releases_url = reverse( "browse-origin-releases", query_params={"origin_url": origin["url"], "snapshot": snapshot["id"]}, ) assert_contains(resp, f'href="{escape(origin_releases_url)}"') assert_contains(resp, f"Releases ({snapshot_sizes['release']})") assert_contains(resp, '<li class="swh-branch">', count=len(branches)) assert_contains(resp, '<li class="swh-release">', count=len(releases)) def _process_content_for_display(archive_data, content): content_data = archive_data.content_get_data(content["sha1"]) mime_type, encoding = get_mimetype_and_encoding_for_content(content_data["data"]) mime_type, encoding, content_data = _re_encode_content( mime_type, encoding, content_data["data"] ) content_display = prepare_content_for_display( content_data, mime_type, content["path"] ) assert type(content_display["content_data"]) == str return content_display def test_content_dispaly_empty_query_string_missing_path(client): url = reverse("browse-content", query_params={"origin_url": "http://example.com"},) resp = check_html_get_response( client, url, status_code=400, template_used="error.html" ) assert_contains(resp, "The path query parameter must be provided.", status_code=400) def test_content_dispaly_empty_query_string_and_snapshot_origin(client): url = reverse("browse-content", query_params={"path": "test.txt"},) resp = check_html_get_response(client, url, status_code=400,) assert_contains( resp, "The origin_url or snapshot query parameters must be provided.", status_code=400, ) def test_content_dispaly_empty_query_string_with_origin( client, archive_data, origin_with_multiple_visits ): origin_url = origin_with_multiple_visits["url"] snapshot = archive_data.snapshot_get_latest(origin_url) head_rev_id = archive_data.snapshot_get_head(snapshot) head_rev = archive_data.revision_get(head_rev_id) dir_content = archive_data.directory_ls(head_rev["directory"]) dir_files = [e for e in dir_content if e["type"] == "file"] dir_file = random.choice(dir_files) url = reverse( "browse-content", query_params={"origin_url": origin_url, "path": dir_file["name"],}, ) resp = check_html_get_response(client, url, status_code=302,) redict_url = reverse( "browse-content", url_args={"query_string": f"sha1_git:{dir_file['checksums']['sha1_git']}"}, query_params={"origin_url": origin_url, "path": dir_file["name"],}, ) assert resp.url == redict_url def test_content_dispaly_empty_query_string_with_snapshot( client, archive_data, origin_with_multiple_visits ): origin_url = origin_with_multiple_visits["url"] snapshot = archive_data.snapshot_get_latest(origin_url) head_rev_id = archive_data.snapshot_get_head(snapshot) head_rev = archive_data.revision_get(head_rev_id) dir_content = archive_data.directory_ls(head_rev["directory"]) dir_files = [e for e in dir_content if e["type"] == "file"] dir_file = random.choice(dir_files) url = reverse( "browse-content", query_params={"snapshot": snapshot["id"], "path": dir_file["name"],}, ) resp = check_html_get_response(client, url, status_code=302,) redict_url = reverse( "browse-content", url_args={"query_string": f"sha1_git:{dir_file['checksums']['sha1_git']}"}, query_params={"snapshot": snapshot["id"], "path": dir_file["name"],}, ) assert resp.url == redict_url def test_browse_origin_content_no_visit(client, mocker, origin): mock_get_origin_visits = mocker.patch( "swh.web.common.origin_visits.get_origin_visits" ) mock_get_origin_visits.return_value = [] mock_archive = mocker.patch("swh.web.common.origin_visits.archive") mock_archive.lookup_origin_visit_latest.return_value = None url = reverse( "browse-content", query_params={"origin_url": origin["url"], "path": "foo"}, ) resp = check_html_get_response( client, url, status_code=404, template_used="error.html" ) assert_contains(resp, "No valid visit", status_code=404) assert not mock_get_origin_visits.called def test_browse_origin_content_unknown_visit(client, mocker, origin): mock_get_origin_visits = mocker.patch( "swh.web.common.origin_visits.get_origin_visits" ) mock_get_origin_visits.return_value = [{"visit": 1}] url = reverse( "browse-content", query_params={"origin_url": origin["url"], "path": "foo", "visit_id": 2}, ) resp = check_html_get_response( client, url, status_code=404, template_used="error.html" ) assert re.search("Resource not found", resp.content.decode("utf-8")) def test_browse_origin_content_not_found(client, origin): url = reverse( "browse-content", query_params={"origin_url": origin["url"], "path": "/invalid/file/path"}, ) resp = check_html_get_response( client, url, status_code=404, template_used="error.html" ) assert re.search("Resource not found", resp.content.decode("utf-8")) def test_browse_content_invalid_origin(client): url = reverse( "browse-content", query_params={ "origin_url": "http://invalid-origin", "path": "/invalid/file/path", }, ) resp = check_html_get_response( client, url, status_code=404, template_used="error.html" ) assert re.search("Resource not found", resp.content.decode("utf-8")) def test_origin_content_view( client, archive_data, swh_scheduler, origin_with_multiple_visits ): origin_visits = archive_data.origin_visit_get(origin_with_multiple_visits["url"]) def _get_archive_data(visit_idx): snapshot = archive_data.snapshot_get(origin_visits[visit_idx]["snapshot"]) head_rev_id = archive_data.snapshot_get_head(snapshot) head_rev = archive_data.revision_get(head_rev_id) dir_content = archive_data.directory_ls(head_rev["directory"]) dir_files = [e for e in dir_content if e["type"] == "file"] dir_file = random.choice(dir_files) branches, releases, _ = process_snapshot_branches(snapshot) return { "branches": branches, "releases": releases, "root_dir_sha1": head_rev["directory"], "content": get_content(dir_file["checksums"]["sha1"]), "visit": origin_visits[visit_idx], "snapshot_sizes": archive_data.snapshot_count_branches(snapshot["id"]), } tdata = _get_archive_data(-1) _origin_content_view_test_helper( client, archive_data, origin_with_multiple_visits, origin_visits[-1], tdata["snapshot_sizes"], tdata["branches"], tdata["releases"], tdata["root_dir_sha1"], tdata["content"], ) _origin_content_view_test_helper( client, archive_data, origin_with_multiple_visits, origin_visits[-1], tdata["snapshot_sizes"], tdata["branches"], tdata["releases"], tdata["root_dir_sha1"], tdata["content"], timestamp=tdata["visit"]["date"], ) _origin_content_view_test_helper( client, archive_data, origin_with_multiple_visits, origin_visits[-1], tdata["snapshot_sizes"], tdata["branches"], tdata["releases"], tdata["root_dir_sha1"], tdata["content"], snapshot_id=tdata["visit"]["snapshot"], ) tdata = _get_archive_data(0) _origin_content_view_test_helper( client, archive_data, origin_with_multiple_visits, origin_visits[0], tdata["snapshot_sizes"], tdata["branches"], tdata["releases"], tdata["root_dir_sha1"], tdata["content"], visit_id=tdata["visit"]["visit"], ) _origin_content_view_test_helper( client, archive_data, origin_with_multiple_visits, origin_visits[0], tdata["snapshot_sizes"], tdata["branches"], tdata["releases"], tdata["root_dir_sha1"], tdata["content"], snapshot_id=tdata["visit"]["snapshot"], ) def _origin_content_view_test_helper( client, archive_data, origin_info, origin_visit, snapshot_sizes, origin_branches, origin_releases, root_dir_sha1, content, visit_id=None, timestamp=None, snapshot_id=None, ): content_path = "/".join(content["path"].split("/")[1:]) if not visit_id and not snapshot_id: visit_id = origin_visit["visit"] query_params = {"origin_url": origin_info["url"], "path": content_path} if timestamp: query_params["timestamp"] = timestamp if visit_id: query_params["visit_id"] = visit_id elif snapshot_id: query_params["snapshot"] = snapshot_id url = reverse( "browse-content", url_args={"query_string": f"sha1_git:{content['sha1_git']}"}, query_params=query_params, ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/content.html" ) assert type(content["data"]) == str assert_contains(resp, '<code class="%s">' % content["hljs_language"]) assert_contains(resp, escape(content["data"])) split_path = content_path.split("/") filename = split_path[-1] path = content_path.replace(filename, "")[:-1] path_info = gen_path_info(path) del query_params["path"] if timestamp: query_params["timestamp"] = format_utc_iso_date( parse_iso8601_date_to_utc(timestamp).isoformat(), "%Y-%m-%dT%H:%M:%SZ" ) root_dir_url = reverse( "browse-directory", url_args={"sha1_git": root_dir_sha1}, query_params=query_params, ) assert_contains(resp, '<li class="swh-path">', count=len(path_info) + 1) assert_contains(resp, '<a href="%s">%s</a>' % (root_dir_url, root_dir_sha1[:7])) for p in path_info: query_params["path"] = p["path"] dir_url = reverse("browse-origin-directory", query_params=query_params) assert_contains(resp, '<a href="%s">%s</a>' % (dir_url, p["name"])) assert_contains(resp, "<li>%s</li>" % filename) query_string = "sha1_git:" + content["sha1_git"] url_raw = reverse( "browse-content-raw", url_args={"query_string": query_string}, query_params={"filename": filename}, ) assert_contains(resp, url_raw) if "path" in query_params: del query_params["path"] origin_branches_url = reverse("browse-origin-branches", query_params=query_params) assert_contains(resp, f'href="{escape(origin_branches_url)}"') assert_contains(resp, f"Branches ({snapshot_sizes['revision']})") origin_releases_url = reverse("browse-origin-releases", query_params=query_params) assert_contains(resp, f'href="{escape(origin_releases_url)}">') assert_contains(resp, f"Releases ({snapshot_sizes['release']})") assert_contains(resp, '<li class="swh-branch">', count=len(origin_branches)) query_params["path"] = content_path for branch in origin_branches: root_dir_branch_url = reverse( "browse-origin-content", query_params={"branch": branch["name"], **query_params}, ) assert_contains(resp, '<a href="%s">' % root_dir_branch_url) assert_contains(resp, '<li class="swh-release">', count=len(origin_releases)) query_params["branch"] = None for release in origin_releases: root_dir_release_url = reverse( "browse-origin-content", query_params={"release": release["name"], **query_params}, ) assert_contains(resp, '<a href="%s">' % root_dir_release_url) url = reverse( "browse-content", url_args={"query_string": query_string}, query_params=query_params, ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/content.html" ) snapshot = archive_data.snapshot_get(origin_visit["snapshot"]) head_rev_id = archive_data.snapshot_get_head(snapshot) swhid_context = { "origin": origin_info["url"], "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]), "anchor": gen_swhid(ObjectType.REVISION, head_rev_id), "path": f"/{content_path}", } swh_cnt_id = gen_swhid( ObjectType.CONTENT, content["sha1_git"], metadata=swhid_context ) swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id}) assert_contains(resp, swh_cnt_id) assert_contains(resp, swh_cnt_id_url) assert_contains(resp, "swh-take-new-snapshot") _check_origin_link(resp, origin_info["url"]) assert_not_contains(resp, "swh-metadata-popover") def _check_origin_link(resp, origin_url): browse_origin_url = reverse( "browse-origin", query_params={"origin_url": origin_url} ) assert_contains(resp, f'href="{browse_origin_url}"') + + +@pytest.mark.django_db +@pytest.mark.parametrize("staff_user_logged_in", [False, True]) +def test_browse_content_snapshot_context_release_directory_target( + client, staff_user, archive_data, directory_with_files, staff_user_logged_in +): + + if staff_user_logged_in: + client.force_login(staff_user) + + release_name = "v1.0.0" + release = Release( + name=release_name.encode(), + message=f"release {release_name}".encode(), + target=hash_to_bytes(directory_with_files), + target_type=ModelObjectType.DIRECTORY, + synthetic=True, + ) + archive_data.release_add([release]) + + snapshot = Snapshot( + branches={ + release_name.encode(): SnapshotBranch( + target=release.id, target_type=TargetType.RELEASE + ), + }, + ) + archive_data.snapshot_add([snapshot]) + + dir_content = archive_data.directory_ls(directory_with_files) + file_entry = random.choice( + [entry for entry in dir_content if entry["type"] == "file"] + ) + + sha1_git = file_entry["checksums"]["sha1_git"] + + browse_url = reverse( + "browse-content", + url_args={"query_string": f"sha1_git:{sha1_git}"}, + query_params={ + "path": file_entry["name"], + "release": release_name, + "snapshot": snapshot.id.hex(), + }, + ) + + check_html_get_response( + client, browse_url, status_code=200, template_used="browse/content.html" + ) diff --git a/swh/web/tests/browse/views/test_directory.py b/swh/web/tests/browse/views/test_directory.py index f2beb824..e990b3ee 100644 --- a/swh/web/tests/browse/views/test_directory.py +++ b/swh/web/tests/browse/views/test_directory.py @@ -1,499 +1,549 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import random from hypothesis import given +import pytest from django.utils.html import escape from swh.model.from_disk import DentryPerms from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.model.model import ( - Directory, - DirectoryEntry, Origin, OriginVisit, OriginVisitStatus, + Release, Revision, RevisionType, Snapshot, SnapshotBranch, TargetType, TimestampWithTimezone, ) +from swh.model.model import Directory, DirectoryEntry +from swh.model.model import ObjectType as ModelObjectType from swh.model.swhids import ObjectType from swh.storage.utils import now from swh.web.browse.snapshot_context import process_snapshot_branches from swh.web.common.identifiers import gen_swhid from swh.web.common.utils import gen_path_info, reverse from swh.web.tests.django_asserts import assert_contains, assert_not_contains from swh.web.tests.strategies import new_person, new_swh_date from swh.web.tests.utils import check_html_get_response def test_root_directory_view(client, archive_data, directory): _directory_view_checks(client, directory, archive_data.directory_ls(directory)) def test_sub_directory_view(client, archive_data, directory_with_subdirs): dir_content = archive_data.directory_ls(directory_with_subdirs) subdir = random.choice([e for e in dir_content if e["type"] == "dir"]) subdir_content = archive_data.directory_ls(subdir["target"]) _directory_view_checks( client, directory_with_subdirs, subdir_content, subdir["name"] ) @given(new_person(), new_swh_date()) def test_sub_directory_view_origin_context( client, archive_data, empty_directory, person, date ): origin_url = "test_sub_directory_view_origin_context" subdir = Directory( entries=( DirectoryEntry( name=b"foo", type="dir", target=hash_to_bytes(empty_directory), perms=DentryPerms.directory, ), DirectoryEntry( name=b"bar", type="dir", target=hash_to_bytes(empty_directory), perms=DentryPerms.directory, ), ) ) parentdir = Directory( entries=( DirectoryEntry( name=b"baz", type="dir", target=subdir.id, perms=DentryPerms.directory, ), ) ) archive_data.directory_add([subdir, parentdir]) revision = Revision( directory=parentdir.id, author=person, committer=person, message=b"commit message", date=TimestampWithTimezone.from_datetime(date), committer_date=TimestampWithTimezone.from_datetime(date), synthetic=False, type=RevisionType.GIT, ) archive_data.revision_add([revision]) snapshot = Snapshot( branches={ b"HEAD": SnapshotBranch( target="refs/head/master".encode(), target_type=TargetType.ALIAS, ), b"refs/head/master": SnapshotBranch( target=revision.id, target_type=TargetType.REVISION, ), } ) archive_data.snapshot_add([snapshot]) archive_data.origin_add([Origin(url=origin_url)]) date = now() visit = OriginVisit(origin=origin_url, date=date, type="git") visit = archive_data.origin_visit_add([visit])[0] visit_status = OriginVisitStatus( origin=origin_url, visit=visit.visit, date=date, status="full", snapshot=snapshot.id, ) archive_data.origin_visit_status_add([visit_status]) dir_content = archive_data.directory_ls(hash_to_hex(parentdir.id)) subdir = dir_content[0] subdir_content = archive_data.directory_ls(subdir["target"]) _directory_view_checks( client, hash_to_hex(parentdir.id), subdir_content, subdir["name"], origin_url, hash_to_hex(snapshot.id), hash_to_hex(revision.id), ) def test_directory_request_errors(client, invalid_sha1, unknown_directory): dir_url = reverse("browse-directory", url_args={"sha1_git": invalid_sha1}) check_html_get_response( client, dir_url, status_code=400, template_used="error.html" ) dir_url = reverse("browse-directory", url_args={"sha1_git": unknown_directory}) check_html_get_response( client, dir_url, status_code=404, template_used="error.html" ) def test_directory_with_invalid_path(client, directory): path = "foo/bar" dir_url = reverse( "browse-directory", url_args={"sha1_git": directory}, query_params={"path": path}, ) resp = check_html_get_response( client, dir_url, status_code=404, template_used="browse/directory.html" ) error_message = ( f"Directory entry with path {path} from root directory {directory} not found" ) assert_contains(resp, error_message, status_code=404) def test_directory_uppercase(client, directory): url = reverse( "browse-directory-uppercase-checksum", url_args={"sha1_git": directory.upper()} ) resp = check_html_get_response(client, url, status_code=302) redirect_url = reverse("browse-directory", url_args={"sha1_git": directory}) assert resp["location"] == redirect_url def test_permalink_box_context(client, tests_data, directory): origin_url = random.choice(tests_data["origins"])["url"] url = reverse( "browse-directory", url_args={"sha1_git": directory}, query_params={"origin_url": origin_url}, ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/directory.html" ) assert_contains(resp, 'id="swhid-context-option-directory"') def test_directory_origin_snapshot_branch_browse( client, archive_data, origin_with_multiple_visits ): origin_url = origin_with_multiple_visits["url"] visits = archive_data.origin_visit_get(origin_url) visit = random.choice(visits) snapshot = archive_data.snapshot_get(visit["snapshot"]) snapshot_sizes = archive_data.snapshot_count_branches(visit["snapshot"]) branches, releases, _ = process_snapshot_branches(snapshot) branch_info = next( branch for branch in branches if branch["name"] == "refs/heads/master" ) directory = archive_data.revision_get(branch_info["revision"])["directory"] directory_content = archive_data.directory_ls(directory) directory_subdir = random.choice( [e for e in directory_content if e["type"] == "dir"] ) url = reverse( "browse-directory", url_args={"sha1_git": directory}, query_params={ "origin_url": origin_url, "snapshot": snapshot["id"], "branch": branch_info["name"], "path": directory_subdir["name"], }, ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/directory.html" ) _check_origin_snapshot_related_html( resp, origin_with_multiple_visits, snapshot, snapshot_sizes, branches, releases ) assert_contains(resp, directory_subdir["name"]) assert_contains(resp, f"Branch: <strong>{branch_info['name']}</strong>") dir_swhid = gen_swhid( ObjectType.DIRECTORY, directory_subdir["target"], metadata={ "origin": origin_url, "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]), "anchor": gen_swhid(ObjectType.REVISION, branch_info["revision"]), "path": "/", }, ) assert_contains(resp, dir_swhid) rev_swhid = gen_swhid( ObjectType.REVISION, branch_info["revision"], metadata={ "origin": origin_url, "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]), }, ) assert_contains(resp, rev_swhid) snp_swhid = gen_swhid( ObjectType.SNAPSHOT, snapshot["id"], metadata={"origin": origin_url,}, ) assert_contains(resp, snp_swhid) def test_drectory_origin_snapshot_release_browse( client, archive_data, origin_with_multiple_visits ): origin_url = origin_with_multiple_visits["url"] visits = archive_data.origin_visit_get(origin_url) visit = random.choice(visits) snapshot = archive_data.snapshot_get(visit["snapshot"]) snapshot_sizes = archive_data.snapshot_count_branches(visit["snapshot"]) branches, releases, _ = process_snapshot_branches(snapshot) release_info = random.choice(releases) directory = release_info["directory"] directory_content = archive_data.directory_ls(directory) directory_subdir = random.choice( [e for e in directory_content if e["type"] == "dir"] ) url = reverse( "browse-directory", url_args={"sha1_git": directory}, query_params={ "origin_url": origin_url, "snapshot": snapshot["id"], "release": release_info["name"], "path": directory_subdir["name"], }, ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/directory.html" ) _check_origin_snapshot_related_html( resp, origin_with_multiple_visits, snapshot, snapshot_sizes, branches, releases ) assert_contains(resp, directory_subdir["name"]) assert_contains(resp, f"Release: <strong>{release_info['name']}</strong>") dir_swhid = gen_swhid( ObjectType.DIRECTORY, directory_subdir["target"], metadata={ "origin": origin_url, "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]), "anchor": gen_swhid(ObjectType.RELEASE, release_info["id"]), "path": "/", }, ) assert_contains(resp, dir_swhid) rev_swhid = gen_swhid( ObjectType.REVISION, release_info["target"], metadata={ "origin": origin_url, "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]), }, ) assert_contains(resp, rev_swhid) rel_swhid = gen_swhid( ObjectType.RELEASE, release_info["id"], metadata={ "origin": origin_url, "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]), }, ) assert_contains(resp, rel_swhid) snp_swhid = gen_swhid( ObjectType.SNAPSHOT, snapshot["id"], metadata={"origin": origin_url,}, ) assert_contains(resp, snp_swhid) def test_directory_origin_snapshot_revision_browse( client, archive_data, origin_with_multiple_visits ): origin_url = origin_with_multiple_visits["url"] visits = archive_data.origin_visit_get(origin_url) visit = random.choice(visits) snapshot = archive_data.snapshot_get(visit["snapshot"]) branches, releases, _ = process_snapshot_branches(snapshot) branch_info = next( branch for branch in branches if branch["name"] == "refs/heads/master" ) directory = archive_data.revision_get(branch_info["revision"])["directory"] directory_content = archive_data.directory_ls(directory) directory_subdir = random.choice( [e for e in directory_content if e["type"] == "dir"] ) url = reverse( "browse-directory", url_args={"sha1_git": directory}, query_params={ "origin_url": origin_url, "snapshot": snapshot["id"], "revision": branch_info["revision"], "path": directory_subdir["name"], }, ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/directory.html" ) assert_contains(resp, f"Revision: <strong>{branch_info['revision']}</strong>") def _check_origin_snapshot_related_html( resp, origin, snapshot, snapshot_sizes, branches, releases ): browse_origin_url = reverse( "browse-origin", query_params={"origin_url": origin["url"]} ) assert_contains(resp, f'href="{browse_origin_url}"') origin_branches_url = reverse( "browse-origin-branches", query_params={"origin_url": origin["url"], "snapshot": snapshot["id"]}, ) assert_contains(resp, f'href="{escape(origin_branches_url)}"') assert_contains(resp, f"Branches ({snapshot_sizes['revision']})") origin_releases_url = reverse( "browse-origin-releases", query_params={"origin_url": origin["url"], "snapshot": snapshot["id"]}, ) assert_contains(resp, f'href="{escape(origin_releases_url)}"') assert_contains(resp, f"Releases ({snapshot_sizes['release']})") assert_contains(resp, '<li class="swh-branch">', count=len(branches)) assert_contains(resp, '<li class="swh-release">', count=len(releases)) def _directory_view_checks( client, root_directory_sha1, directory_entries, path=None, origin_url=None, snapshot_id=None, revision_id=None, ): dirs = [e for e in directory_entries if e["type"] in ("dir", "rev")] files = [e for e in directory_entries if e["type"] == "file"] url_args = {"sha1_git": root_directory_sha1} query_params = {"origin_url": origin_url, "snapshot": snapshot_id} url = reverse( "browse-directory", url_args=url_args, query_params={**query_params, "path": path}, ) root_dir_url = reverse( "browse-directory", url_args=url_args, query_params=query_params, ) resp = check_html_get_response( client, url, status_code=200, template_used="browse/directory.html" ) assert_contains( resp, '<a href="' + root_dir_url + '">' + root_directory_sha1[:7] + "</a>", ) assert_contains(resp, '<td class="swh-directory">', count=len(dirs)) assert_contains(resp, '<td class="swh-content">', count=len(files)) for d in dirs: if d["type"] == "rev": dir_url = reverse("browse-revision", url_args={"sha1_git": d["target"]}) else: dir_path = d["name"] if path: dir_path = "%s/%s" % (path, d["name"]) dir_url = reverse( "browse-directory", url_args={"sha1_git": root_directory_sha1}, query_params={**query_params, "path": dir_path}, ) assert_contains(resp, dir_url) for f in files: file_path = "%s/%s" % (root_directory_sha1, f["name"]) if path: file_path = "%s/%s/%s" % (root_directory_sha1, path, f["name"]) query_string = "sha1_git:" + f["target"] file_url = reverse( "browse-content", url_args={"query_string": query_string}, query_params={**query_params, "path": file_path}, ) assert_contains(resp, file_url) path_info = gen_path_info(path) assert_contains(resp, '<li class="swh-path">', count=len(path_info) + 1) assert_contains( resp, '<a href="%s">%s</a>' % (root_dir_url, root_directory_sha1[:7]) ) for p in path_info: dir_url = reverse( "browse-directory", url_args={"sha1_git": root_directory_sha1}, query_params={**query_params, "path": p["path"]}, ) assert_contains(resp, '<a href="%s">%s</a>' % (dir_url, p["name"])) assert_contains(resp, "vault-cook-directory") swh_dir_id = gen_swhid(ObjectType.DIRECTORY, directory_entries[0]["dir_id"]) swh_dir_id_url = reverse("browse-swhid", url_args={"swhid": swh_dir_id}) swhid_context = {} if origin_url: swhid_context["origin"] = origin_url if snapshot_id: swhid_context["visit"] = gen_swhid(ObjectType.SNAPSHOT, snapshot_id) if root_directory_sha1 != directory_entries[0]["dir_id"]: swhid_context["anchor"] = gen_swhid(ObjectType.DIRECTORY, root_directory_sha1) if root_directory_sha1 != directory_entries[0]["dir_id"]: swhid_context["anchor"] = gen_swhid(ObjectType.DIRECTORY, root_directory_sha1) if revision_id: swhid_context["anchor"] = gen_swhid(ObjectType.REVISION, revision_id) swhid_context["path"] = f"/{path}/" if path else None swh_dir_id = gen_swhid( ObjectType.DIRECTORY, directory_entries[0]["dir_id"], metadata=swhid_context ) swh_dir_id_url = reverse("browse-swhid", url_args={"swhid": swh_dir_id}) assert_contains(resp, swh_dir_id) assert_contains(resp, swh_dir_id_url) assert_not_contains(resp, "swh-metadata-popover") + + +@pytest.mark.django_db +@pytest.mark.parametrize("staff_user_logged_in", [False, True]) +def test_browse_directory_snapshot_context_release_directory_target( + client, staff_user, archive_data, directory_with_subdirs, staff_user_logged_in +): + + if staff_user_logged_in: + client.force_login(staff_user) + + release_name = "v1.0.0" + release = Release( + name=release_name.encode(), + message=f"release {release_name}".encode(), + target=hash_to_bytes(directory_with_subdirs), + target_type=ModelObjectType.DIRECTORY, + synthetic=True, + ) + archive_data.release_add([release]) + + snapshot = Snapshot( + branches={ + release_name.encode(): SnapshotBranch( + target=release.id, target_type=TargetType.RELEASE + ), + }, + ) + archive_data.snapshot_add([snapshot]) + + dir_content = archive_data.directory_ls(directory_with_subdirs) + dir_entry = random.choice( + [entry for entry in dir_content if entry["type"] == "dir"] + ) + + browse_url = reverse( + "browse-directory", + url_args={"sha1_git": directory_with_subdirs}, + query_params={ + "path": dir_entry["name"], + "release": release_name, + "snapshot": snapshot.id.hex(), + }, + ) + + check_html_get_response( + client, browse_url, status_code=200, template_used="browse/directory.html" + )