Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/web/browse/views/content.py b/swh/web/browse/views/content.py
index a338a19f..a13f868d 100644
--- a/swh/web/browse/views/content.py
+++ b/swh/web/browse/views/content.py
@@ -1,401 +1,404 @@
# Copyright (C) 2017-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import difflib
from distutils.util import strtobool
import sentry_sdk
from django.http import HttpResponse, JsonResponse
from django.shortcuts import render
from swh.model.hashutil import hash_to_hex
from swh.model.swhids import ObjectType
from swh.web.browse.browseurls import browse_route
from swh.web.browse.snapshot_context import get_snapshot_context
from swh.web.browse.utils import (
content_display_max_size,
gen_link,
prepare_content_for_display,
request_content,
)
from swh.web.common import archive, highlightjs, query
from swh.web.common.exc import NotFoundExc, http_status_code_message
from swh.web.common.identifiers import get_swhids_info
from swh.web.common.typing import ContentMetadata, SWHObjectInfo
from swh.web.common.utils import gen_path_info, reverse, swh_object_icons
@browse_route(
r"content/(?P<query_string>[0-9a-z_:]*[0-9a-f]+.)/raw/",
view_name="browse-content-raw",
checksum_args=["query_string"],
)
def content_raw(request, query_string):
"""Django view that produces a raw display of a content identified
by its hash value.
The url that points to it is
:http:get:`/browse/content/[(algo_hash):](hash)/raw/`
"""
re_encode = bool(strtobool(request.GET.get("re_encode", "false")))
algo, checksum = query.parse_hash(query_string)
checksum = hash_to_hex(checksum)
content_data = request_content(query_string, max_size=None, re_encode=re_encode)
filename = request.GET.get("filename", None)
if not filename:
filename = "%s_%s" % (algo, checksum)
if (
content_data["mimetype"].startswith("text/")
or content_data["mimetype"] == "inode/x-empty"
):
response = HttpResponse(content_data["raw_data"], content_type="text/plain")
response["Content-disposition"] = "filename=%s" % filename
else:
response = HttpResponse(
content_data["raw_data"], content_type="application/octet-stream"
)
response["Content-disposition"] = "attachment; filename=%s" % filename
return response
_auto_diff_size_limit = 20000
@browse_route(
r"content/(?P<from_query_string>.*)/diff/(?P<to_query_string>.*)",
view_name="diff-contents",
)
def _contents_diff(request, from_query_string, to_query_string):
"""
Browse endpoint used to compute unified diffs between two contents.
Diffs are generated only if the two contents are textual.
By default, diffs whose size are greater than 20 kB will
not be generated. To force the generation of large diffs,
the 'force' boolean query parameter must be used.
Args:
request: input django http request
from_query_string: a string of the form "[ALGO_HASH:]HASH" where
optional ALGO_HASH can be either ``sha1``, ``sha1_git``,
``sha256``, or ``blake2s256`` (default to ``sha1``) and HASH
the hexadecimal representation of the hash value identifying
the first content
to_query_string: same as above for identifying the second content
Returns:
A JSON object containing the unified diff.
"""
diff_data = {}
content_from = None
content_to = None
content_from_size = 0
content_to_size = 0
content_from_lines = []
content_to_lines = []
force = request.GET.get("force", "false")
path = request.GET.get("path", None)
language = "nohighlight"
force = bool(strtobool(force))
if from_query_string == to_query_string:
diff_str = "File renamed without changes"
else:
try:
text_diff = True
if from_query_string:
content_from = request_content(from_query_string, max_size=None)
content_from_display_data = prepare_content_for_display(
content_from["raw_data"], content_from["mimetype"], path
)
language = content_from_display_data["language"]
content_from_size = content_from["length"]
if not (
content_from["mimetype"].startswith("text/")
or content_from["mimetype"] == "inode/x-empty"
):
text_diff = False
if text_diff and to_query_string:
content_to = request_content(to_query_string, max_size=None)
content_to_display_data = prepare_content_for_display(
content_to["raw_data"], content_to["mimetype"], path
)
language = content_to_display_data["language"]
content_to_size = content_to["length"]
if not (
content_to["mimetype"].startswith("text/")
or content_to["mimetype"] == "inode/x-empty"
):
text_diff = False
diff_size = abs(content_to_size - content_from_size)
if not text_diff:
diff_str = "Diffs are not generated for non textual content"
language = "nohighlight"
elif not force and diff_size > _auto_diff_size_limit:
diff_str = "Large diffs are not automatically computed"
language = "nohighlight"
else:
if content_from:
content_from_lines = (
content_from["raw_data"].decode("utf-8").splitlines(True)
)
if content_from_lines and content_from_lines[-1][-1] != "\n":
content_from_lines[-1] += "[swh-no-nl-marker]\n"
if content_to:
content_to_lines = (
content_to["raw_data"].decode("utf-8").splitlines(True)
)
if content_to_lines and content_to_lines[-1][-1] != "\n":
content_to_lines[-1] += "[swh-no-nl-marker]\n"
diff_lines = difflib.unified_diff(content_from_lines, content_to_lines)
diff_str = "".join(list(diff_lines)[2:])
except Exception as exc:
sentry_sdk.capture_exception(exc)
diff_str = str(exc)
diff_data["diff_str"] = diff_str
diff_data["language"] = language
return JsonResponse(diff_data)
@browse_route(
r"content/(?P<query_string>[0-9a-z_:]*[0-9a-f]+.)/",
view_name="browse-content",
checksum_args=["query_string"],
)
def content_display(request, query_string):
"""Django view that produces an HTML display of a content identified
by its hash value.
The url that points to it is
:http:get:`/browse/content/[(algo_hash):](hash)/`
"""
algo, checksum = query.parse_hash(query_string)
checksum = hash_to_hex(checksum)
origin_url = request.GET.get("origin_url")
selected_language = request.GET.get("language")
if not origin_url:
origin_url = request.GET.get("origin")
snapshot_id = request.GET.get("snapshot")
path = request.GET.get("path")
content_data = {}
error_info = {"status_code": 200, "description": None}
try:
content_data = request_content(query_string)
except NotFoundExc as e:
error_info["status_code"] = 404
error_info["description"] = f"NotFoundExc: {str(e)}"
snapshot_context = None
if origin_url is not None or snapshot_id is not None:
try:
snapshot_context = get_snapshot_context(
origin_url=origin_url,
snapshot_id=snapshot_id,
branch_name=request.GET.get("branch"),
release_name=request.GET.get("release"),
revision_id=request.GET.get("revision"),
path=path,
browse_context="content",
)
except NotFoundExc as e:
if str(e).startswith("Origin"):
raw_cnt_url = reverse(
"browse-content", url_args={"query_string": query_string}
)
error_message = (
"The Software Heritage archive has a content "
"with the hash you provided but the origin "
"mentioned in your request appears broken: %s. "
"Please check the URL and try again.\n\n"
"Nevertheless, you can still browse the content "
"without origin information: %s"
% (gen_link(origin_url), gen_link(raw_cnt_url))
)
raise NotFoundExc(error_message)
else:
raise e
content = None
language = None
mimetype = None
if content_data.get("raw_data") is not None:
content_display_data = prepare_content_for_display(
content_data["raw_data"], content_data["mimetype"], path
)
content = content_display_data["content_data"]
language = content_display_data["language"]
mimetype = content_display_data["mimetype"]
# Override language with user-selected language
if selected_language is not None:
language = selected_language
available_languages = None
if mimetype and "text/" in mimetype:
available_languages = highlightjs.get_supported_languages()
filename = None
path_info = None
directory_id = None
root_dir = None
if snapshot_context:
root_dir = snapshot_context.get("root_directory")
query_params = snapshot_context["query_params"] if snapshot_context else {}
breadcrumbs = []
if path:
split_path = path.split("/")
root_dir = root_dir or split_path[0]
filename = split_path[-1]
if root_dir != path:
path = path.replace(root_dir + "/", "")
path = path[: -len(filename)]
path_info = gen_path_info(path)
query_params.pop("path", None)
dir_url = reverse(
"browse-directory",
url_args={"sha1_git": root_dir},
query_params=query_params,
)
breadcrumbs.append({"name": root_dir[:7], "url": dir_url})
for pi in path_info:
query_params["path"] = pi["path"]
dir_url = reverse(
"browse-directory",
url_args={"sha1_git": root_dir},
query_params=query_params,
)
breadcrumbs.append({"name": pi["name"], "url": dir_url})
breadcrumbs.append({"name": filename, "url": None})
if path and root_dir != path:
dir_info = archive.lookup_directory_with_path(root_dir, path)
directory_id = dir_info["target"]
elif root_dir != path:
directory_id = root_dir
else:
root_dir = None
query_params = {"filename": filename}
content_checksums = content_data.get("checksums", {})
content_url = reverse("browse-content", url_args={"query_string": query_string},)
content_raw_url = reverse(
"browse-content-raw",
url_args={"query_string": query_string},
query_params=query_params,
)
content_metadata = ContentMetadata(
object_type=ObjectType.CONTENT,
object_id=content_checksums.get("sha1_git"),
sha1=content_checksums.get("sha1"),
sha1_git=content_checksums.get("sha1_git"),
sha256=content_checksums.get("sha256"),
blake2s256=content_checksums.get("blake2s256"),
content_url=content_url,
mimetype=content_data.get("mimetype"),
encoding=content_data.get("encoding"),
size=content_data.get("length", 0),
language=content_data.get("language"),
root_directory=root_dir,
path=f"/{path}" if path else None,
filename=filename or "",
directory=directory_id,
revision=None,
release=None,
snapshot=None,
origin_url=origin_url,
)
- swh_objects = [
- SWHObjectInfo(
- object_type=ObjectType.CONTENT, object_id=content_checksums.get("sha1_git")
+ swh_objects = []
+ if content_checksums:
+ swh_objects.append(
+ SWHObjectInfo(
+ object_type=ObjectType.CONTENT,
+ object_id=content_checksums.get("sha1_git"),
+ )
)
- ]
if directory_id:
swh_objects.append(
SWHObjectInfo(object_type=ObjectType.DIRECTORY, object_id=directory_id)
)
if snapshot_context:
swh_objects.append(
SWHObjectInfo(
object_type=ObjectType.REVISION,
object_id=snapshot_context["revision_id"],
)
)
swh_objects.append(
SWHObjectInfo(
object_type=ObjectType.SNAPSHOT,
object_id=snapshot_context["snapshot_id"],
)
)
if snapshot_context["release_id"]:
swh_objects.append(
SWHObjectInfo(
object_type=ObjectType.RELEASE,
object_id=snapshot_context["release_id"],
)
)
swhids_info = get_swhids_info(
swh_objects, snapshot_context, extra_context=content_metadata,
)
heading = "Content - %s" % content_checksums.get("sha1_git")
if breadcrumbs:
content_path = "/".join([bc["name"] for bc in breadcrumbs])
heading += " - %s" % content_path
return render(
request,
"browse/content.html",
{
"heading": heading,
- "swh_object_id": swhids_info[0]["swhid"],
+ "swh_object_id": swhids_info[0]["swhid"] if swhids_info else "",
"swh_object_name": "Content",
"swh_object_metadata": content_metadata,
"content": content,
"content_size": content_data.get("length"),
"max_content_size": content_display_max_size,
"filename": filename,
"encoding": content_data.get("encoding"),
"mimetype": mimetype,
"language": language,
"available_languages": available_languages,
"breadcrumbs": breadcrumbs,
"top_right_link": {
"url": content_raw_url,
"icon": swh_object_icons["content"],
"text": "Raw File",
},
"snapshot_context": snapshot_context,
"vault_cooking": None,
"show_actions": True,
"swhids_info": swhids_info,
"error_code": error_info["status_code"],
"error_message": http_status_code_message.get(error_info["status_code"]),
"error_description": error_info["description"],
},
status=error_info["status_code"],
)
diff --git a/swh/web/tests/browse/views/test_content.py b/swh/web/tests/browse/views/test_content.py
index 60583cb5..1186b7c1 100644
--- a/swh/web/tests/browse/views/test_content.py
+++ b/swh/web/tests/browse/views/test_content.py
@@ -1,623 +1,633 @@
# Copyright (C) 2017-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import random
+import pytest
+
from django.utils.html import escape
from swh.model.swhids import ObjectType
from swh.web.browse.snapshot_context import process_snapshot_branches
from swh.web.browse.utils import (
_re_encode_content,
get_mimetype_and_encoding_for_content,
prepare_content_for_display,
)
from swh.web.common.exc import NotFoundExc
from swh.web.common.identifiers import gen_swhid
from swh.web.common.utils import gen_path_info, reverse
from swh.web.tests.django_asserts import assert_contains, assert_not_contains
from swh.web.tests.utils import check_html_get_response, check_http_get_response
def test_content_view_text(client, archive_data, content_text):
sha1_git = content_text["sha1_git"]
url = reverse(
"browse-content",
url_args={"query_string": content_text["sha1"]},
query_params={"path": content_text["path"]},
)
url_raw = reverse(
"browse-content-raw", url_args={"query_string": content_text["sha1"]}
)
resp = check_html_get_response(
client, url, status_code=200, template_used="browse/content.html"
)
content_display = _process_content_for_display(archive_data, content_text)
mimetype = content_display["mimetype"]
if mimetype.startswith("text/"):
assert_contains(resp, '<code class="%s">' % content_display["language"])
assert_contains(resp, escape(content_display["content_data"]))
assert_contains(resp, url_raw)
swh_cnt_id = gen_swhid(ObjectType.CONTENT, sha1_git)
swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id})
assert_contains(resp, swh_cnt_id)
assert_contains(resp, swh_cnt_id_url)
assert_not_contains(resp, "swh-metadata-popover")
def test_content_view_no_highlight(
client, archive_data, content_application_no_highlight, content_text_no_highlight
):
for content_ in (content_application_no_highlight, content_text_no_highlight):
content = content_
sha1_git = content["sha1_git"]
url = reverse("browse-content", url_args={"query_string": content["sha1"]})
url_raw = reverse(
"browse-content-raw", url_args={"query_string": content["sha1"]}
)
resp = check_html_get_response(
client, url, status_code=200, template_used="browse/content.html"
)
content_display = _process_content_for_display(archive_data, content)
assert_contains(resp, '<code class="nohighlight">')
assert_contains(resp, escape(content_display["content_data"]))
assert_contains(resp, url_raw)
swh_cnt_id = gen_swhid(ObjectType.CONTENT, sha1_git)
swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id})
assert_contains(resp, swh_cnt_id)
assert_contains(resp, swh_cnt_id_url)
def test_content_view_no_utf8_text(client, archive_data, content_text_non_utf8):
sha1_git = content_text_non_utf8["sha1_git"]
url = reverse(
"browse-content", url_args={"query_string": content_text_non_utf8["sha1"]}
)
resp = check_html_get_response(
client, url, status_code=200, template_used="browse/content.html"
)
content_display = _process_content_for_display(archive_data, content_text_non_utf8)
swh_cnt_id = gen_swhid(ObjectType.CONTENT, sha1_git)
swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id})
assert_contains(resp, swh_cnt_id_url)
assert_contains(resp, escape(content_display["content_data"]))
def test_content_view_image(client, archive_data, content_image_type):
url = reverse(
"browse-content", url_args={"query_string": content_image_type["sha1"]}
)
url_raw = reverse(
"browse-content-raw", url_args={"query_string": content_image_type["sha1"]}
)
resp = check_html_get_response(
client, url, status_code=200, template_used="browse/content.html"
)
content_display = _process_content_for_display(archive_data, content_image_type)
mimetype = content_display["mimetype"]
content_data = content_display["content_data"]
assert_contains(resp, '<img src="data:%s;base64,%s"/>' % (mimetype, content_data))
assert_contains(resp, url_raw)
def test_content_view_image_no_rendering(
client, archive_data, content_unsupported_image_type_rendering
):
url = reverse(
"browse-content",
url_args={"query_string": content_unsupported_image_type_rendering["sha1"]},
)
resp = check_html_get_response(
client, url, status_code=200, template_used="browse/content.html"
)
mimetype = content_unsupported_image_type_rendering["mimetype"]
encoding = content_unsupported_image_type_rendering["encoding"]
assert_contains(
resp,
(
f"Content with mime type {mimetype} and encoding {encoding} "
"cannot be displayed."
),
)
def test_content_view_text_with_path(client, archive_data, content_text):
path = content_text["path"]
url = reverse(
"browse-content",
url_args={"query_string": content_text["sha1"]},
query_params={"path": path},
)
resp = check_html_get_response(
client, url, status_code=200, template_used="browse/content.html"
)
assert_contains(resp, '<nav class="bread-crumbs')
content_display = _process_content_for_display(archive_data, content_text)
mimetype = content_display["mimetype"]
if mimetype.startswith("text/"):
hljs_language = content_text["hljs_language"]
assert_contains(resp, '<code class="%s">' % hljs_language)
assert_contains(resp, escape(content_display["content_data"]))
split_path = path.split("/")
root_dir_sha1 = split_path[0]
filename = split_path[-1]
path = path.replace(root_dir_sha1 + "/", "").replace(filename, "")
swhid_context = {
"anchor": gen_swhid(ObjectType.DIRECTORY, root_dir_sha1),
"path": f"/{path}{filename}",
}
swh_cnt_id = gen_swhid(
ObjectType.CONTENT, content_text["sha1_git"], metadata=swhid_context
)
swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id})
assert_contains(resp, swh_cnt_id)
assert_contains(resp, swh_cnt_id_url)
path_info = gen_path_info(path)
root_dir_url = reverse("browse-directory", url_args={"sha1_git": root_dir_sha1})
assert_contains(resp, '<li class="swh-path">', count=len(path_info) + 1)
assert_contains(
resp, '<a href="' + root_dir_url + '">' + root_dir_sha1[:7] + "</a>"
)
for p in path_info:
dir_url = reverse(
"browse-directory",
url_args={"sha1_git": root_dir_sha1},
query_params={"path": p["path"]},
)
assert_contains(resp, '<a href="' + dir_url + '">' + p["name"] + "</a>")
assert_contains(resp, "<li>" + filename + "</li>")
url_raw = reverse(
"browse-content-raw",
url_args={"query_string": content_text["sha1"]},
query_params={"filename": filename},
)
assert_contains(resp, url_raw)
url = reverse(
"browse-content",
url_args={"query_string": content_text["sha1"]},
query_params={"path": filename},
)
resp = check_html_get_response(
client, url, status_code=200, template_used="browse/content.html"
)
assert_not_contains(resp, '<nav class="bread-crumbs')
invalid_path = "%s/foo/bar/baz" % root_dir_sha1
url = reverse(
"browse-content",
url_args={"query_string": content_text["sha1"]},
query_params={"path": invalid_path},
)
resp = check_html_get_response(
client, url, status_code=404, template_used="error.html"
)
def test_content_raw_text(client, archive_data, content_text):
url = reverse("browse-content-raw", url_args={"query_string": content_text["sha1"]})
resp = check_http_get_response(
client, url, status_code=200, content_type="text/plain"
)
content_data = archive_data.content_get_data(content_text["sha1"])["data"]
assert resp["Content-Type"] == "text/plain"
assert resp["Content-disposition"] == (
"filename=%s_%s" % ("sha1", content_text["sha1"])
)
assert resp.content == content_data
filename = content_text["path"].split("/")[-1]
url = reverse(
"browse-content-raw",
url_args={"query_string": content_text["sha1"]},
query_params={"filename": filename},
)
resp = check_http_get_response(
client, url, status_code=200, content_type="text/plain"
)
assert resp["Content-Type"] == "text/plain"
assert resp["Content-disposition"] == "filename=%s" % filename
assert resp.content == content_data
def test_content_raw_no_utf8_text(client, content_text_non_utf8):
url = reverse(
"browse-content-raw", url_args={"query_string": content_text_non_utf8["sha1"]}
)
resp = check_http_get_response(
client, url, status_code=200, content_type="text/plain"
)
_, encoding = get_mimetype_and_encoding_for_content(resp.content)
assert encoding == content_text_non_utf8["encoding"]
def test_content_raw_bin(client, archive_data, content_image_type):
url = reverse(
"browse-content-raw", url_args={"query_string": content_image_type["sha1"]}
)
resp = check_http_get_response(
client, url, status_code=200, content_type="application/octet-stream"
)
filename = content_image_type["path"].split("/")[-1]
content_data = archive_data.content_get_data(content_image_type["sha1"])["data"]
assert resp["Content-Type"] == "application/octet-stream"
assert resp["Content-disposition"] == "attachment; filename=%s_%s" % (
"sha1",
content_image_type["sha1"],
)
assert resp.content == content_data
url = reverse(
"browse-content-raw",
url_args={"query_string": content_image_type["sha1"]},
query_params={"filename": filename},
)
resp = check_http_get_response(
client, url, status_code=200, content_type="application/octet-stream"
)
assert resp["Content-Type"] == "application/octet-stream"
assert resp["Content-disposition"] == "attachment; filename=%s" % filename
assert resp.content == content_data
-def test_content_request_errors(client, invalid_sha1, unknown_content):
+@pytest.mark.django_db
+@pytest.mark.parametrize("staff_user_logged_in", [False, True])
+def test_content_request_errors(
+ client, staff_user, invalid_sha1, unknown_content, staff_user_logged_in
+):
+
+ if staff_user_logged_in:
+ client.force_login(staff_user)
+
url = reverse("browse-content", url_args={"query_string": invalid_sha1})
check_html_get_response(client, url, status_code=400, template_used="error.html")
url = reverse("browse-content", url_args={"query_string": unknown_content["sha1"]})
check_html_get_response(
client, url, status_code=404, template_used="browse/content.html"
)
def test_content_bytes_missing(client, archive_data, mocker, content):
mock_archive = mocker.patch("swh.web.browse.utils.archive")
content_data = archive_data.content_get(content["sha1"])
mock_archive.lookup_content.return_value = content_data
mock_archive.lookup_content_filetype.side_effect = Exception()
mock_archive.lookup_content_raw.side_effect = NotFoundExc(
"Content bytes not available!"
)
url = reverse("browse-content", url_args={"query_string": content["sha1"]})
check_html_get_response(
client, url, status_code=404, template_used="browse/content.html"
)
def test_content_too_large(client, mocker):
mock_request_content = mocker.patch("swh.web.browse.views.content.request_content")
stub_content_too_large_data = {
"checksums": {
"sha1": "8624bcdae55baeef00cd11d5dfcfa60f68710a02",
"sha1_git": "94a9ed024d3859793618152ea559a168bbcbb5e2",
"sha256": (
"8ceb4b9ee5adedde47b31e975c1d90c73ad27b6b16" "5a1dcd80c7c545eb65b903"
),
"blake2s256": (
"38702b7168c7785bfe748b51b45d9856070ba90" "f9dc6d90f2ea75d4356411ffe"
),
},
"length": 30000000,
"raw_data": None,
"mimetype": "text/plain",
"encoding": "us-ascii",
"language": "not detected",
"licenses": "GPL",
"error_code": 200,
"error_message": "",
"error_description": "",
}
content_sha1 = stub_content_too_large_data["checksums"]["sha1"]
mock_request_content.return_value = stub_content_too_large_data
url = reverse("browse-content", url_args={"query_string": content_sha1})
url_raw = reverse("browse-content-raw", url_args={"query_string": content_sha1})
resp = check_html_get_response(
client, url, status_code=200, template_used="browse/content.html"
)
assert_contains(resp, "Content is too large to be displayed")
assert_contains(resp, url_raw)
def test_content_uppercase(client, content):
url = reverse(
"browse-content-uppercase-checksum",
url_args={"query_string": content["sha1"].upper()},
)
resp = check_html_get_response(client, url, status_code=302)
redirect_url = reverse("browse-content", url_args={"query_string": content["sha1"]})
assert resp["location"] == redirect_url
def test_content_utf8_detected_as_binary_display(
client, archive_data, content_utf8_detected_as_binary
):
url = reverse(
"browse-content",
url_args={"query_string": content_utf8_detected_as_binary["sha1"]},
)
resp = check_html_get_response(
client, url, status_code=200, template_used="browse/content.html"
)
content_display = _process_content_for_display(
archive_data, content_utf8_detected_as_binary
)
assert_contains(resp, escape(content_display["content_data"]))
def test_content_origin_snapshot_branch_browse(
client, archive_data, origin_with_multiple_visits
):
origin_url = origin_with_multiple_visits["url"]
visits = archive_data.origin_visit_get(origin_url)
visit = random.choice(visits)
snapshot = archive_data.snapshot_get(visit["snapshot"])
snapshot_sizes = archive_data.snapshot_count_branches(visit["snapshot"])
branches, releases, _ = process_snapshot_branches(snapshot)
branch_info = random.choice(branches)
directory = archive_data.revision_get(branch_info["revision"])["directory"]
directory_content = archive_data.directory_ls(directory)
directory_file = random.choice(
[e for e in directory_content if e["type"] == "file"]
)
url = reverse(
"browse-content",
url_args={"query_string": directory_file["checksums"]["sha1"]},
query_params={
"origin_url": origin_with_multiple_visits["url"],
"snapshot": snapshot["id"],
"branch": branch_info["name"],
"path": directory_file["name"],
},
)
resp = check_html_get_response(
client, url, status_code=200, template_used="browse/content.html"
)
_check_origin_snapshot_related_html(
resp, origin_with_multiple_visits, snapshot, snapshot_sizes, branches, releases
)
assert_contains(resp, directory_file["name"])
assert_contains(resp, f"Branch: <strong>{branch_info['name']}</strong>")
cnt_swhid = gen_swhid(
ObjectType.CONTENT,
directory_file["checksums"]["sha1_git"],
metadata={
"origin": origin_url,
"visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]),
"anchor": gen_swhid(ObjectType.REVISION, branch_info["revision"]),
"path": f"/{directory_file['name']}",
},
)
assert_contains(resp, cnt_swhid)
dir_swhid = gen_swhid(
ObjectType.DIRECTORY,
directory,
metadata={
"origin": origin_url,
"visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]),
"anchor": gen_swhid(ObjectType.REVISION, branch_info["revision"]),
},
)
assert_contains(resp, dir_swhid)
rev_swhid = gen_swhid(
ObjectType.REVISION,
branch_info["revision"],
metadata={
"origin": origin_url,
"visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]),
},
)
assert_contains(resp, rev_swhid)
snp_swhid = gen_swhid(
ObjectType.SNAPSHOT, snapshot["id"], metadata={"origin": origin_url,},
)
assert_contains(resp, snp_swhid)
def test_content_origin_snapshot_release_browse(
client, archive_data, origin_with_multiple_visits
):
origin_url = origin_with_multiple_visits["url"]
visits = archive_data.origin_visit_get(origin_url)
visit = random.choice(visits)
snapshot = archive_data.snapshot_get(visit["snapshot"])
snapshot_sizes = archive_data.snapshot_count_branches(visit["snapshot"])
branches, releases, _ = process_snapshot_branches(snapshot)
release_info = random.choice(releases)
directory_content = archive_data.directory_ls(release_info["directory"])
directory_file = random.choice(
[e for e in directory_content if e["type"] == "file"]
)
url = reverse(
"browse-content",
url_args={"query_string": directory_file["checksums"]["sha1"]},
query_params={
"origin_url": origin_url,
"snapshot": snapshot["id"],
"release": release_info["name"],
"path": directory_file["name"],
},
)
resp = check_html_get_response(
client, url, status_code=200, template_used="browse/content.html"
)
_check_origin_snapshot_related_html(
resp, origin_with_multiple_visits, snapshot, snapshot_sizes, branches, releases
)
assert_contains(resp, directory_file["name"])
assert_contains(resp, f"Release: <strong>{release_info['name']}</strong>")
cnt_swhid = gen_swhid(
ObjectType.CONTENT,
directory_file["checksums"]["sha1_git"],
metadata={
"origin": origin_url,
"visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]),
"anchor": gen_swhid(ObjectType.RELEASE, release_info["id"]),
"path": f"/{directory_file['name']}",
},
)
assert_contains(resp, cnt_swhid)
dir_swhid = gen_swhid(
ObjectType.DIRECTORY,
release_info["directory"],
metadata={
"origin": origin_url,
"visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]),
"anchor": gen_swhid(ObjectType.RELEASE, release_info["id"]),
},
)
assert_contains(resp, dir_swhid)
rev_swhid = gen_swhid(
ObjectType.REVISION,
release_info["target"],
metadata={
"origin": origin_url,
"visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]),
},
)
assert_contains(resp, rev_swhid)
rel_swhid = gen_swhid(
ObjectType.RELEASE,
release_info["id"],
metadata={
"origin": origin_url,
"visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]),
},
)
assert_contains(resp, rel_swhid)
snp_swhid = gen_swhid(
ObjectType.SNAPSHOT, snapshot["id"], metadata={"origin": origin_url,},
)
assert_contains(resp, snp_swhid)
def _check_origin_snapshot_related_html(
resp, origin, snapshot, snapshot_sizes, branches, releases
):
browse_origin_url = reverse(
"browse-origin", query_params={"origin_url": origin["url"]}
)
assert_contains(resp, f'href="{browse_origin_url}"')
origin_branches_url = reverse(
"browse-origin-branches",
query_params={"origin_url": origin["url"], "snapshot": snapshot["id"]},
)
assert_contains(resp, f'href="{escape(origin_branches_url)}"')
assert_contains(resp, f"Branches ({snapshot_sizes['revision']})")
origin_releases_url = reverse(
"browse-origin-releases",
query_params={"origin_url": origin["url"], "snapshot": snapshot["id"]},
)
assert_contains(resp, f'href="{escape(origin_releases_url)}"')
assert_contains(resp, f"Releases ({snapshot_sizes['release']})")
assert_contains(resp, '<li class="swh-branch">', count=len(branches))
assert_contains(resp, '<li class="swh-release">', count=len(releases))
def _process_content_for_display(archive_data, content):
content_data = archive_data.content_get_data(content["sha1"])
mime_type, encoding = get_mimetype_and_encoding_for_content(content_data["data"])
mime_type, encoding, content_data = _re_encode_content(
mime_type, encoding, content_data["data"]
)
content_display = prepare_content_for_display(
content_data, mime_type, content["path"]
)
assert type(content_display["content_data"]) == str
return content_display

File Metadata

Mime Type
text/x-diff
Expires
Mon, Aug 18, 11:03 PM (5 d, 18 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3268785

Event Timeline