diff --git a/assets/src/bundles/guided_tour/index.js b/assets/src/bundles/guided_tour/index.js --- a/assets/src/bundles/guided_tour/index.js +++ b/assets/src/bundles/guided_tour/index.js @@ -17,6 +17,8 @@ // we use a origin available both in production and swh-web tests // environment to ease tour testing const originUrl = 'https://github.com/memononen/libtess2'; +// sha1 for the content used +const contentSha1 = 'sha1_git:2d4e23bf1d3f64c1e8b94622178e18d89c653de0'; function openSWHIDsTabBeforeNextStep() { window.scrollTo(0, 0); @@ -64,7 +66,7 @@ } }, { - url: `${Urls.browse_origin_content()}?origin_url=${originUrl}&path=Example/example.c`, + url: `${Urls.browse_content(contentSha1)}?origin_url=${originUrl}&path=Example/example.c`, introJsOptions: { steps: guidedTourSteps.browseContent }, diff --git a/docs/uri-scheme-browse-content.rst b/docs/uri-scheme-browse-content.rst --- a/docs/uri-scheme-browse-content.rst +++ b/docs/uri-scheme-browse-content.rst @@ -41,6 +41,13 @@ :query string path: describe the path of the content relative to a root directory (used to add context aware navigation links when navigating from a directory view) + :query string origin_url: optional; specify the origin from which to retrieve the contnet. + :query string snapshot: optional; specify the snapshot from which to retrieve the contnet. + :query string timestamp: optional; an ISO 8601 datetime string to parse in order to find the closest visit + :query string branch: optional; specify the snapshot branch name from which + to retrieve the content. HEAD branch will be used by default. + :query string release: optional; specify the snapshot release name from which + to retrieve the content. :statuscode 200: no error :statuscode 400: an invalid query string has been provided :statuscode 404: requested content can not be found in the archive @@ -89,4 +96,31 @@ :swh_web_browse:`content/sha1:1cb1447c1c7ddc1b03eac88398e40bd914d46b62/raw/` :swh_web_browse:`content/sha256:8ceb4b9ee5adedde47b31e975c1d90c73ad27b6b165a1dcd80c7c545eb65b903/raw/?filename=COPYING` -.. _highlightjs: https://highlightjs.org/ \ No newline at end of file +.. http:get:: /browse/content/ + + HTML view that displays a content identified by the query parameters. + An origin URL, snapshot or revision must be provided along with a content path + as query parameters. + + :query string path: The path of the content relative to the root directory + :query string origin_url: optional; specify the origin from which to retrieve the contnet. + :query string snapshot: optional; specify the snapshot from which to retrieve the contnet. + :query string timestamp: optional; an ISO 8601 datetime string to parse in order to find the closest visit + :query string branch: optional; specify the snapshot branch name from which + to retrieve the content. HEAD branch will be used by default. + :query string release: optional; specify the snapshot release name from which + to retrieve the content. + + :statuscode 200: no error + :statuscode 404: path and/or the identifier is missing in the query parameters. + :statuscode 404: requested content can not be found in the archive, + or the provided content path does not exist from the origin root directory + + **Examples:** + + .. parsed-literal:: + + :swh_web_browse:`content/?origin_url=https://github.com/python/cpython&path=.gitignore` + :swh_web_browse:`content/?snapshot=673156c31a876c5b99b2fe3e89615529de9a3c44&path=src/opengl/qglbuffer.h` + +.. _highlightjs: https://highlightjs.org/ diff --git a/docs/uri-scheme-browse-origin.rst b/docs/uri-scheme-browse-origin.rst --- a/docs/uri-scheme-browse-origin.rst +++ b/docs/uri-scheme-browse-origin.rst @@ -223,6 +223,10 @@ """""""""""""" .. http:get:: /browse/origin/content/ + :deprecated: + + .. warning:: + That endpoint is deprecated, use :http:get:`/browse/content/` instead. HTML view that produces a display of a content associated to the latest full visit of a software origin. @@ -282,7 +286,7 @@ :deprecated: .. warning:: - That endpoint is deprecated, use :http:get:`/browse/origin/content/` instead. + That endpoint is deprecated, use :http:get:`/browse/content/` instead. HTML view that produces a display of a content associated to the latest full visit of a software origin. diff --git a/docs/uri-scheme-browse-snapshot.rst b/docs/uri-scheme-browse-snapshot.rst --- a/docs/uri-scheme-browse-snapshot.rst +++ b/docs/uri-scheme-browse-snapshot.rst @@ -107,6 +107,10 @@ """""""""""""""" .. http:get:: /browse/snapshot/(snapshot_id)/content/ + :deprecated: + + .. warning:: + That endpoint is deprecated, use :http:get:`/browse/content/` instead. HTML view that produces a display of a content reachable from a snapshot. @@ -142,7 +146,7 @@ :deprecated: .. warning:: - That endpoint is deprecated, use :http:get:`/browse/snapshot/(snapshot_id)/content/` instead. + That endpoint is deprecated, use :http:get:`/browse/content/` instead. HTML view that produces a display of a content reachable from a snapshot. diff --git a/swh/web/browse/snapshot_context.py b/swh/web/browse/snapshot_context.py --- a/swh/web/browse/snapshot_context.py +++ b/swh/web/browse/snapshot_context.py @@ -16,9 +16,7 @@ from swh.model.model import Snapshot from swh.model.swhids import CoreSWHID, ObjectType from swh.web.browse.utils import ( - content_display_max_size, format_log_entries, - gen_content_link, gen_release_link, gen_revision_link, gen_revision_log_link, @@ -26,15 +24,12 @@ gen_snapshot_link, get_directory_entries, get_readme_to_display, - prepare_content_for_display, - request_content, ) -from swh.web.common import archive, highlightjs +from swh.web.common import archive from swh.web.common.exc import BadInputExc, NotFoundExc, http_status_code_message from swh.web.common.identifiers import get_swhids_info from swh.web.common.origin_visits import get_origin_visit from swh.web.common.typing import ( - ContentMetadata, DirectoryMetadata, OriginInfo, SnapshotBranchInfo, @@ -930,203 +925,6 @@ ) -def browse_snapshot_content( - request, - snapshot_id=None, - origin_url=None, - timestamp=None, - path=None, - selected_language=None, -): - """ - Django view implementation for browsing a content in a snapshot context. - """ - _check_origin_url(snapshot_id, origin_url) - - if path is None: - raise BadInputExc("The path of a content must be given as query parameter.") - - snapshot_context = get_snapshot_context( - snapshot_id=snapshot_id, - origin_url=origin_url, - timestamp=timestamp, - visit_id=request.GET.get("visit_id"), - path=path, - browse_context="content", - branch_name=request.GET.get("branch"), - release_name=request.GET.get("release"), - revision_id=request.GET.get("revision"), - ) - - root_directory = snapshot_context["root_directory"] - sha1_git = None - query_string = None - content_data = {} - directory_id = None - split_path = path.split("/") - filename = split_path[-1] - filepath = path[: -len(filename)] - error_info = { - "status_code": 200, - "description": None, - } - if root_directory: - try: - content_info = archive.lookup_directory_with_path(root_directory, path) - sha1_git = content_info["target"] - query_string = "sha1_git:" + sha1_git - content_data = request_content(query_string) - - if filepath: - dir_info = archive.lookup_directory_with_path(root_directory, filepath) - directory_id = dir_info["target"] - else: - directory_id = root_directory - except NotFoundExc as e: - error_info["status_code"] = 404 - error_info["description"] = f"NotFoundExc: {str(e)}" - - revision_id = snapshot_context["revision_id"] - origin_info = snapshot_context["origin_info"] - visit_info = snapshot_context["visit_info"] - snapshot_id = snapshot_context["snapshot_id"] - - if content_data.get("raw_data") is not None: - content_display_data = prepare_content_for_display( - content_data["raw_data"], content_data["mimetype"], path - ) - content_data.update(content_display_data) - - # Override language with user-selected language - if selected_language is not None: - content_data["language"] = selected_language - - available_languages = None - - if content_data.get("mimetype") is not None and "text/" in content_data["mimetype"]: - available_languages = highlightjs.get_supported_languages() - - breadcrumbs = _build_breadcrumbs(snapshot_context, filepath) - - breadcrumbs.append({"name": filename, "url": None}) - - browse_content_link = gen_content_link(sha1_git) - - content_raw_url = None - if query_string: - content_raw_url = reverse( - "browse-content-raw", - url_args={"query_string": query_string}, - query_params={"filename": filename}, - ) - - content_checksums = content_data.get("checksums", {}) - - sha1_git = content_checksums.get("sha1_git") - - swh_objects = [] - - if sha1_git is not None: - swh_objects.append( - SWHObjectInfo(object_type=ObjectType.CONTENT, object_id=sha1_git) - ) - if directory_id is not None: - swh_objects.append( - SWHObjectInfo(object_type=ObjectType.DIRECTORY, object_id=directory_id) - ) - if revision_id is not None: - swh_objects.append( - SWHObjectInfo(object_type=ObjectType.REVISION, object_id=revision_id) - ) - swh_objects.append( - SWHObjectInfo(object_type=ObjectType.SNAPSHOT, object_id=snapshot_id) - ) - - visit_date = None - visit_type = None - if visit_info: - visit_date = format_utc_iso_date(visit_info["date"]) - visit_type = visit_info["type"] - - release_id = snapshot_context["release_id"] - if release_id: - swh_objects.append( - SWHObjectInfo(object_type=ObjectType.RELEASE, object_id=release_id) - ) - - content_metadata = ContentMetadata( - object_type=ObjectType.CONTENT, - object_id=content_checksums.get("sha1_git"), - sha1=content_checksums.get("sha1"), - sha1_git=content_checksums.get("sha1_git"), - sha256=content_checksums.get("sha256"), - blake2s256=content_checksums.get("blake2s256"), - content_url=browse_content_link, - mimetype=content_data.get("mimetype"), - encoding=content_data.get("encoding"), - size=content_data.get("length", 0), - language=content_data.get("language"), - root_directory=root_directory, - path=f"/{filepath}", - filename=filename, - directory=directory_id, - revision=revision_id, - release=release_id, - snapshot=snapshot_id, - origin_url=origin_url, - visit_date=visit_date, - visit_type=visit_type, - ) - - swhids_info = get_swhids_info(swh_objects, snapshot_context, content_metadata) - - content_path = "/".join([bc["name"] for bc in breadcrumbs]) - context_found = "snapshot: %s" % snapshot_context["snapshot_id"] - if origin_info: - context_found = "origin: %s" % origin_info["url"] - heading = "Content - %s - %s - %s" % ( - content_path, - snapshot_context["branch"], - context_found, - ) - - top_right_link = None - if not snapshot_context["is_empty"]: - top_right_link = { - "url": content_raw_url, - "icon": swh_object_icons["content"], - "text": "Raw File", - } - - return render( - request, - "browse/content.html", - { - "heading": heading, - "swh_object_name": "Content", - "swh_object_metadata": content_metadata, - "content": content_data.get("content_data"), - "content_size": content_data.get("length"), - "max_content_size": content_display_max_size, - "filename": filename, - "encoding": content_data.get("encoding"), - "mimetype": content_data.get("mimetype"), - "language": content_data.get("language"), - "available_languages": available_languages, - "breadcrumbs": breadcrumbs if root_directory else [], - "top_right_link": top_right_link, - "snapshot_context": snapshot_context, - "vault_cooking": None, - "show_actions": True, - "swhids_info": swhids_info, - "error_code": error_info["status_code"], - "error_message": http_status_code_message.get(error_info["status_code"]), - "error_description": error_info["description"], - }, - status=error_info["status_code"], - ) - - PER_PAGE = 100 diff --git a/swh/web/browse/views/content.py b/swh/web/browse/views/content.py --- a/swh/web/browse/views/content.py +++ b/swh/web/browse/views/content.py @@ -9,7 +9,7 @@ import sentry_sdk from django.http import HttpResponse, JsonResponse -from django.shortcuts import render +from django.shortcuts import redirect, render from swh.model.hashutil import hash_to_hex from swh.model.swhids import ObjectType @@ -22,7 +22,7 @@ request_content, ) from swh.web.common import archive, highlightjs, query -from swh.web.common.exc import NotFoundExc, http_status_code_message +from swh.web.common.exc import BadInputExc, NotFoundExc, http_status_code_message from swh.web.common.identifiers import get_swhids_info from swh.web.common.typing import ContentMetadata, SWHObjectInfo from swh.web.common.utils import gen_path_info, reverse, swh_object_icons @@ -170,18 +170,55 @@ return JsonResponse(diff_data) +def _get_content_from_request(request): + path = request.GET.get("path") + if path is None: + raise BadInputExc("The path query parameter must be provided.") + snapshot = request.GET.get("snapshot") + origin_url = request.GET.get("origin_url") + if snapshot is None and origin_url is None: + raise BadInputExc( + "The origin_url or snapshot query parameters must be provided." + ) + snapshot_context = get_snapshot_context( + snapshot_id=snapshot, + origin_url=origin_url, + path=path, + timestamp=request.GET.get("timestamp"), + visit_id=request.GET.get("visit_id"), + branch_name=request.GET.get("branch"), + release_name=request.GET.get("release"), + browse_context="content", + ) + root_directory = snapshot_context["root_directory"] + return archive.lookup_directory_with_path(root_directory, path) + + @browse_route( r"content/(?P[0-9a-z_:]*[0-9a-f]+.)/", + r"content/", view_name="browse-content", checksum_args=["query_string"], ) -def content_display(request, query_string): +def content_display(request, query_string=None): """Django view that produces an HTML display of a content identified by its hash value. - The url that points to it is + The URLs that points to it are :http:get:`/browse/content/[(algo_hash):](hash)/` + :http:get:`/browse/content/` """ + if query_string is None: + # this case happens when redirected from origin/content or snapshot/content + content = _get_content_from_request(request) + return redirect( + reverse( + "browse-content", + url_args={"query_string": f"sha1_git:{content['target']}"}, + query_params=request.GET, + ), + ) + algo, checksum = query.parse_hash(query_string) checksum = hash_to_hex(checksum) origin_url = request.GET.get("origin_url") @@ -204,6 +241,8 @@ snapshot_context = get_snapshot_context( origin_url=origin_url, snapshot_id=snapshot_id, + timestamp=request.GET.get("timestamp"), + visit_id=request.GET.get("visit_id"), branch_name=request.GET.get("branch"), release_name=request.GET.get("release"), revision_id=request.GET.get("revision"), diff --git a/swh/web/browse/views/origin.py b/swh/web/browse/views/origin.py --- a/swh/web/browse/views/origin.py +++ b/swh/web/browse/views/origin.py @@ -4,27 +4,21 @@ # See top-level LICENSE file for more information from django.shortcuts import redirect, render -from django.urls import resolve from swh.web.browse.browseurls import browse_route from swh.web.browse.snapshot_context import ( - browse_snapshot_content, browse_snapshot_directory, get_snapshot_context, ) from swh.web.common import archive from swh.web.common.exc import BadInputExc from swh.web.common.origin_visits import get_origin_visits -from swh.web.common.utils import format_utc_iso_date, parse_iso8601_date_to_utc, reverse - - -def redirect_to_new_route(request, new_route): - request_path = resolve(request.path_info) - # Send all the url_args and the request_args as query params - # eg /origin//log?path=test - # will be send as /log?url=&path=test - args = {**request_path.kwargs, **request.GET.dict()} - return redirect(reverse(new_route, query_params=args), permanent=True,) +from swh.web.common.utils import ( + format_utc_iso_date, + parse_iso8601_date_to_utc, + redirect_to_new_route, + reverse, +) @browse_route( @@ -73,20 +67,16 @@ r"origin/content/", view_name="browse-origin-content", ) def origin_content_browse(request): - """Django view that produces an HTML display of a content + """ + This route is deprecated; use http:get:`/browse/content` instead + + Django view that produces an HTML display of a content associated to an origin for a given visit. The URL that points to it is :http:get:`/browse/origin/content/` """ - return browse_snapshot_content( - request, - origin_url=request.GET.get("origin_url"), - snapshot_id=request.GET.get("snapshot"), - timestamp=request.GET.get("timestamp"), - path=request.GET.get("path"), - selected_language=request.GET.get("language"), - ) + return redirect_to_new_route(request, "browse-content") @browse_route( @@ -96,7 +86,10 @@ view_name="browse-origin-content-legacy", ) def origin_content_browse_legacy(request, origin_url, path=None, timestamp=None): - """Django view that produces an HTML display of a content + """ + This route is deprecated; use http:get:`/browse/content` instead + + Django view that produces an HTML display of a content associated to an origin for a given visit. The URLs that point to it are @@ -104,14 +97,7 @@ :http:get:`/browse/origin/(origin_url)/visit/(timestamp)/content/(path)/` """ - return browse_snapshot_content( - request, - origin_url=origin_url, - snapshot_id=request.GET.get("snapshot"), - timestamp=timestamp, - path=path, - selected_language=request.GET.get("language"), - ) + return redirect_to_new_route(request, "browse-content") @browse_route( diff --git a/swh/web/browse/views/snapshot.py b/swh/web/browse/views/snapshot.py --- a/swh/web/browse/views/snapshot.py +++ b/swh/web/browse/views/snapshot.py @@ -9,14 +9,13 @@ from swh.web.browse.browseurls import browse_route from swh.web.browse.snapshot_context import ( browse_snapshot_branches, - browse_snapshot_content, browse_snapshot_directory, browse_snapshot_log, browse_snapshot_releases, get_snapshot_context, ) from swh.web.common.exc import BadInputExc -from swh.web.common.utils import reverse +from swh.web.common.utils import redirect_to_new_route, reverse def get_snapshot_from_request(request): @@ -93,17 +92,16 @@ checksum_args=["snapshot_id"], ) def snapshot_content_browse(request, snapshot_id): - """Django view that produces an HTML display of a content + """ + This route is deprecated; use http:get:`/browse/content` instead + + Django view that produces an HTML display of a content collected in a snapshot. The url that points to it is :http:get:`/browse/snapshot/(snapshot_id)/content/` """ - return browse_snapshot_content( - request, - snapshot_id=snapshot_id, - path=request.GET.get("path"), - selected_language=request.GET.get("language"), - ) + + return redirect_to_new_route(request, "browse-content") @browse_route( @@ -112,18 +110,16 @@ checksum_args=["snapshot_id"], ) def snapshot_content_browse_legacy(request, snapshot_id, path): - """Django view that produces an HTML display of a content + """ + This route is deprecated; use http:get:`/browse/content` instead + + Django view that produces an HTML display of a content collected in a snapshot. The url that points to it is :http:get:`/browse/snapshot/(snapshot_id)/content/(path)/` """ - return browse_snapshot_content( - request, - snapshot_id=snapshot_id, - path=path, - selected_language=request.GET.get("language"), - ) + return redirect_to_new_route(request, "browse-content") @browse_route( diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py --- a/swh/web/common/utils.py +++ b/swh/web/common/utils.py @@ -22,6 +22,8 @@ from django.core.cache import cache from django.http import HttpRequest, QueryDict +from django.shortcuts import redirect +from django.urls import resolve from django.urls import reverse as django_reverse from swh.web.auth.utils import ADMIN_LIST_DEPOSIT_PERMISSION @@ -415,3 +417,13 @@ return sorted(search().visit_types_count().keys()) except Exception: return [] + + +def redirect_to_new_route(request, new_route, permanent=True): + """Redirect a request to another route with url args and query parameters + eg: /origin//log?path=test can be redirected as + /log?url=&path=test. This can be used to deprecate routes + """ + request_path = resolve(request.path_info) + args = {**request_path.kwargs, **request.GET.dict()} + return redirect(reverse(new_route, query_params=args), permanent=permanent,) diff --git a/swh/web/tests/browse/views/test_content.py b/swh/web/tests/browse/views/test_content.py --- a/swh/web/tests/browse/views/test_content.py +++ b/swh/web/tests/browse/views/test_content.py @@ -4,6 +4,7 @@ # See top-level LICENSE file for more information import random +import re import pytest @@ -18,7 +19,13 @@ ) from swh.web.common.exc import NotFoundExc from swh.web.common.identifiers import gen_swhid -from swh.web.common.utils import gen_path_info, reverse +from swh.web.common.utils import ( + format_utc_iso_date, + gen_path_info, + parse_iso8601_date_to_utc, + reverse, +) +from swh.web.tests.data import get_content from swh.web.tests.django_asserts import assert_contains, assert_not_contains from swh.web.tests.utils import check_html_get_response, check_http_get_response @@ -631,3 +638,384 @@ assert type(content_display["content_data"]) == str return content_display + + +def test_content_dispaly_empty_query_string_missing_path(client): + url = reverse("browse-content", query_params={"origin_url": "http://example.com"},) + resp = check_html_get_response( + client, url, status_code=400, template_used="error.html" + ) + assert_contains(resp, "The path query parameter must be provided.", status_code=400) + + +def test_content_dispaly_empty_query_string_and_snapshot_origin(client): + url = reverse("browse-content", query_params={"path": "test.txt"},) + resp = check_html_get_response(client, url, status_code=400,) + assert_contains( + resp, + "The origin_url or snapshot query parameters must be provided.", + status_code=400, + ) + + +def test_content_dispaly_empty_query_string_with_origin( + client, archive_data, origin_with_multiple_visits +): + origin_url = origin_with_multiple_visits["url"] + snapshot = archive_data.snapshot_get_latest(origin_url) + head_rev_id = archive_data.snapshot_get_head(snapshot) + head_rev = archive_data.revision_get(head_rev_id) + dir_content = archive_data.directory_ls(head_rev["directory"]) + dir_files = [e for e in dir_content if e["type"] == "file"] + dir_file = random.choice(dir_files) + + url = reverse( + "browse-content", + query_params={"origin_url": origin_url, "path": dir_file["name"],}, + ) + + resp = check_html_get_response(client, url, status_code=302,) + redict_url = reverse( + "browse-content", + url_args={"query_string": f"sha1_git:{dir_file['checksums']['sha1_git']}"}, + query_params={"origin_url": origin_url, "path": dir_file["name"],}, + ) + assert resp.url == redict_url + + +def test_content_dispaly_empty_query_string_with_snapshot( + client, archive_data, origin_with_multiple_visits +): + origin_url = origin_with_multiple_visits["url"] + snapshot = archive_data.snapshot_get_latest(origin_url) + head_rev_id = archive_data.snapshot_get_head(snapshot) + head_rev = archive_data.revision_get(head_rev_id) + dir_content = archive_data.directory_ls(head_rev["directory"]) + dir_files = [e for e in dir_content if e["type"] == "file"] + dir_file = random.choice(dir_files) + url = reverse( + "browse-content", + query_params={"snapshot": snapshot["id"], "path": dir_file["name"],}, + ) + + resp = check_html_get_response(client, url, status_code=302,) + redict_url = reverse( + "browse-content", + url_args={"query_string": f"sha1_git:{dir_file['checksums']['sha1_git']}"}, + query_params={"snapshot": snapshot["id"], "path": dir_file["name"],}, + ) + assert resp.url == redict_url + + +def test_browse_origin_content_no_visit(client, mocker, origin): + mock_get_origin_visits = mocker.patch( + "swh.web.common.origin_visits.get_origin_visits" + ) + mock_get_origin_visits.return_value = [] + mock_archive = mocker.patch("swh.web.common.origin_visits.archive") + mock_archive.lookup_origin_visit_latest.return_value = None + url = reverse( + "browse-content", query_params={"origin_url": origin["url"], "path": "foo"}, + ) + + resp = check_html_get_response( + client, url, status_code=404, template_used="error.html" + ) + assert_contains(resp, "No valid visit", status_code=404) + assert not mock_get_origin_visits.called + + +def test_browse_origin_content_unknown_visit(client, mocker, origin): + mock_get_origin_visits = mocker.patch( + "swh.web.common.origin_visits.get_origin_visits" + ) + mock_get_origin_visits.return_value = [{"visit": 1}] + + url = reverse( + "browse-content", + query_params={"origin_url": origin["url"], "path": "foo", "visit_id": 2}, + ) + + resp = check_html_get_response( + client, url, status_code=404, template_used="error.html" + ) + assert re.search("Resource not found", resp.content.decode("utf-8")) + + +def test_browse_origin_content_not_found(client, origin): + url = reverse( + "browse-content", + query_params={"origin_url": origin["url"], "path": "/invalid/file/path"}, + ) + + resp = check_html_get_response( + client, url, status_code=404, template_used="error.html" + ) + assert re.search("Resource not found", resp.content.decode("utf-8")) + + +def test_browse_content_invalid_origin(client): + url = reverse( + "browse-content", + query_params={ + "origin_url": "http://invalid-origin", + "path": "/invalid/file/path", + }, + ) + + resp = check_html_get_response( + client, url, status_code=404, template_used="error.html" + ) + assert re.search("Resource not found", resp.content.decode("utf-8")) + + +def test_origin_content_view( + client, archive_data, swh_scheduler, origin_with_multiple_visits +): + origin_visits = archive_data.origin_visit_get(origin_with_multiple_visits["url"]) + + def _get_archive_data(visit_idx): + snapshot = archive_data.snapshot_get(origin_visits[visit_idx]["snapshot"]) + head_rev_id = archive_data.snapshot_get_head(snapshot) + head_rev = archive_data.revision_get(head_rev_id) + dir_content = archive_data.directory_ls(head_rev["directory"]) + dir_files = [e for e in dir_content if e["type"] == "file"] + dir_file = random.choice(dir_files) + branches, releases, _ = process_snapshot_branches(snapshot) + return { + "branches": branches, + "releases": releases, + "root_dir_sha1": head_rev["directory"], + "content": get_content(dir_file["checksums"]["sha1"]), + "visit": origin_visits[visit_idx], + "snapshot_sizes": archive_data.snapshot_count_branches(snapshot["id"]), + } + + tdata = _get_archive_data(-1) + + _origin_content_view_test_helper( + client, + archive_data, + origin_with_multiple_visits, + origin_visits[-1], + tdata["snapshot_sizes"], + tdata["branches"], + tdata["releases"], + tdata["root_dir_sha1"], + tdata["content"], + ) + + _origin_content_view_test_helper( + client, + archive_data, + origin_with_multiple_visits, + origin_visits[-1], + tdata["snapshot_sizes"], + tdata["branches"], + tdata["releases"], + tdata["root_dir_sha1"], + tdata["content"], + timestamp=tdata["visit"]["date"], + ) + + _origin_content_view_test_helper( + client, + archive_data, + origin_with_multiple_visits, + origin_visits[-1], + tdata["snapshot_sizes"], + tdata["branches"], + tdata["releases"], + tdata["root_dir_sha1"], + tdata["content"], + snapshot_id=tdata["visit"]["snapshot"], + ) + + tdata = _get_archive_data(0) + + _origin_content_view_test_helper( + client, + archive_data, + origin_with_multiple_visits, + origin_visits[0], + tdata["snapshot_sizes"], + tdata["branches"], + tdata["releases"], + tdata["root_dir_sha1"], + tdata["content"], + visit_id=tdata["visit"]["visit"], + ) + + _origin_content_view_test_helper( + client, + archive_data, + origin_with_multiple_visits, + origin_visits[0], + tdata["snapshot_sizes"], + tdata["branches"], + tdata["releases"], + tdata["root_dir_sha1"], + tdata["content"], + snapshot_id=tdata["visit"]["snapshot"], + ) + + +def _origin_content_view_test_helper( + client, + archive_data, + origin_info, + origin_visit, + snapshot_sizes, + origin_branches, + origin_releases, + root_dir_sha1, + content, + visit_id=None, + timestamp=None, + snapshot_id=None, +): + content_path = "/".join(content["path"].split("/")[1:]) + + if not visit_id and not snapshot_id: + visit_id = origin_visit["visit"] + + query_params = {"origin_url": origin_info["url"], "path": content_path} + + if timestamp: + query_params["timestamp"] = timestamp + + if visit_id: + query_params["visit_id"] = visit_id + elif snapshot_id: + query_params["snapshot"] = snapshot_id + + url = reverse( + "browse-content", + url_args={"query_string": f"sha1_git:{content['sha1_git']}"}, + query_params=query_params, + ) + + resp = check_html_get_response( + client, url, status_code=200, template_used="browse/content.html" + ) + + assert type(content["data"]) == str + + assert_contains(resp, '' % content["hljs_language"]) + assert_contains(resp, escape(content["data"])) + + split_path = content_path.split("/") + + filename = split_path[-1] + path = content_path.replace(filename, "")[:-1] + + path_info = gen_path_info(path) + + del query_params["path"] + + if timestamp: + query_params["timestamp"] = format_utc_iso_date( + parse_iso8601_date_to_utc(timestamp).isoformat(), "%Y-%m-%dT%H:%M:%SZ" + ) + + root_dir_url = reverse( + "browse-directory", + url_args={"sha1_git": root_dir_sha1}, + query_params=query_params, + ) + + assert_contains(resp, '
  • ', count=len(path_info) + 1) + + assert_contains(resp, '%s' % (root_dir_url, root_dir_sha1[:7])) + + for p in path_info: + query_params["path"] = p["path"] + dir_url = reverse("browse-origin-directory", query_params=query_params) + assert_contains(resp, '%s' % (dir_url, p["name"])) + + assert_contains(resp, "
  • %s
  • " % filename) + + query_string = "sha1_git:" + content["sha1_git"] + + url_raw = reverse( + "browse-content-raw", + url_args={"query_string": query_string}, + query_params={"filename": filename}, + ) + assert_contains(resp, url_raw) + + if "path" in query_params: + del query_params["path"] + + origin_branches_url = reverse("browse-origin-branches", query_params=query_params) + + assert_contains(resp, f'href="{escape(origin_branches_url)}"') + assert_contains(resp, f"Branches ({snapshot_sizes['revision']})") + + origin_releases_url = reverse("browse-origin-releases", query_params=query_params) + + assert_contains(resp, f'href="{escape(origin_releases_url)}">') + assert_contains(resp, f"Releases ({snapshot_sizes['release']})") + + assert_contains(resp, '
  • ', count=len(origin_branches)) + + query_params["path"] = content_path + + for branch in origin_branches: + root_dir_branch_url = reverse( + "browse-origin-content", + query_params={"branch": branch["name"], **query_params}, + ) + + assert_contains(resp, '' % root_dir_branch_url) + + assert_contains(resp, '
  • ', count=len(origin_releases)) + + query_params["branch"] = None + for release in origin_releases: + root_dir_release_url = reverse( + "browse-origin-content", + query_params={"release": release["name"], **query_params}, + ) + + assert_contains(resp, '' % root_dir_release_url) + + url = reverse( + "browse-content", + url_args={"query_string": query_string}, + query_params=query_params, + ) + + resp = check_html_get_response( + client, url, status_code=200, template_used="browse/content.html" + ) + + snapshot = archive_data.snapshot_get(origin_visit["snapshot"]) + head_rev_id = archive_data.snapshot_get_head(snapshot) + + swhid_context = { + "origin": origin_info["url"], + "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]), + "anchor": gen_swhid(ObjectType.REVISION, head_rev_id), + "path": f"/{content_path}", + } + + swh_cnt_id = gen_swhid( + ObjectType.CONTENT, content["sha1_git"], metadata=swhid_context + ) + swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id}) + assert_contains(resp, swh_cnt_id) + assert_contains(resp, swh_cnt_id_url) + + assert_contains(resp, "swh-take-new-snapshot") + + _check_origin_link(resp, origin_info["url"]) + + assert_not_contains(resp, "swh-metadata-popover") + + +def _check_origin_link(resp, origin_url): + browse_origin_url = reverse( + "browse-origin", query_params={"origin_url": origin_url} + ) + assert_contains(resp, f'href="{browse_origin_url}"') diff --git a/swh/web/tests/browse/views/test_origin.py b/swh/web/tests/browse/views/test_origin.py --- a/swh/web/tests/browse/views/test_origin.py +++ b/swh/web/tests/browse/views/test_origin.py @@ -24,13 +24,7 @@ from swh.web.browse.snapshot_context import process_snapshot_branches from swh.web.common.exc import NotFoundExc from swh.web.common.identifiers import gen_swhid -from swh.web.common.utils import ( - format_utc_iso_date, - gen_path_info, - parse_iso8601_date_to_utc, - reverse, -) -from swh.web.tests.data import get_content +from swh.web.common.utils import format_utc_iso_date, parse_iso8601_date_to_utc, reverse from swh.web.tests.django_asserts import assert_contains, assert_not_contains from swh.web.tests.strategies import new_origin, new_snapshot, visit_dates from swh.web.tests.utils import check_html_get_response @@ -57,97 +51,6 @@ _check_origin_link(resp, origin_url) -def test_origin_content_view( - client, archive_data, swh_scheduler, origin_with_multiple_visits -): - origin_visits = archive_data.origin_visit_get(origin_with_multiple_visits["url"]) - - def _get_archive_data(visit_idx): - snapshot = archive_data.snapshot_get(origin_visits[visit_idx]["snapshot"]) - head_rev_id = archive_data.snapshot_get_head(snapshot) - head_rev = archive_data.revision_get(head_rev_id) - dir_content = archive_data.directory_ls(head_rev["directory"]) - dir_files = [e for e in dir_content if e["type"] == "file"] - dir_file = random.choice(dir_files) - branches, releases, _ = process_snapshot_branches(snapshot) - return { - "branches": branches, - "releases": releases, - "root_dir_sha1": head_rev["directory"], - "content": get_content(dir_file["checksums"]["sha1"]), - "visit": origin_visits[visit_idx], - "snapshot_sizes": archive_data.snapshot_count_branches(snapshot["id"]), - } - - tdata = _get_archive_data(-1) - - _origin_content_view_test_helper( - client, - archive_data, - origin_with_multiple_visits, - origin_visits[-1], - tdata["snapshot_sizes"], - tdata["branches"], - tdata["releases"], - tdata["root_dir_sha1"], - tdata["content"], - ) - - _origin_content_view_test_helper( - client, - archive_data, - origin_with_multiple_visits, - origin_visits[-1], - tdata["snapshot_sizes"], - tdata["branches"], - tdata["releases"], - tdata["root_dir_sha1"], - tdata["content"], - timestamp=tdata["visit"]["date"], - ) - - _origin_content_view_test_helper( - client, - archive_data, - origin_with_multiple_visits, - origin_visits[-1], - tdata["snapshot_sizes"], - tdata["branches"], - tdata["releases"], - tdata["root_dir_sha1"], - tdata["content"], - snapshot_id=tdata["visit"]["snapshot"], - ) - - tdata = _get_archive_data(0) - - _origin_content_view_test_helper( - client, - archive_data, - origin_with_multiple_visits, - origin_visits[0], - tdata["snapshot_sizes"], - tdata["branches"], - tdata["releases"], - tdata["root_dir_sha1"], - tdata["content"], - visit_id=tdata["visit"]["visit"], - ) - - _origin_content_view_test_helper( - client, - archive_data, - origin_with_multiple_visits, - origin_visits[0], - tdata["snapshot_sizes"], - tdata["branches"], - tdata["releases"], - tdata["root_dir_sha1"], - tdata["content"], - snapshot_id=tdata["visit"]["snapshot"], - ) - - def test_origin_root_directory_view(client, archive_data, swh_scheduler, origin): origin_visits = archive_data.origin_visit_get(origin["url"]) @@ -529,43 +432,6 @@ assert re.search("Directory.*not found", resp.content.decode("utf-8")) -def test_browse_origin_content_no_visit(client, mocker, origin): - mock_get_origin_visits = mocker.patch( - "swh.web.common.origin_visits.get_origin_visits" - ) - mock_get_origin_visits.return_value = [] - mock_archive = mocker.patch("swh.web.common.origin_visits.archive") - mock_archive.lookup_origin_visit_latest.return_value = None - url = reverse( - "browse-origin-content", - query_params={"origin_url": origin["url"], "path": "foo"}, - ) - - resp = check_html_get_response( - client, url, status_code=404, template_used="error.html" - ) - assert_contains(resp, "No valid visit", status_code=404) - assert not mock_get_origin_visits.called - - -def test_browse_origin_content_unknown_visit(client, mocker, origin): - mock_get_origin_visits = mocker.patch( - "swh.web.common.origin_visits.get_origin_visits" - ) - mock_get_origin_visits.return_value = [{"visit": 1}] - - url = reverse( - "browse-origin-content", - query_params={"origin_url": origin["url"], "path": "foo", "visit_id": 2}, - ) - - resp = check_html_get_response( - client, url, status_code=404, template_used="error.html" - ) - assert re.search("Visit.*not found", resp.content.decode("utf-8")) - assert mock_get_origin_visits.called - - def _add_empty_snapshot_origin(new_origin, archive_data): snapshot = Snapshot(branches={}) archive_data.origin_add([new_origin]) @@ -584,7 +450,7 @@ @pytest.mark.django_db -@pytest.mark.parametrize("object_type", ["content", "directory"]) +@pytest.mark.parametrize("object_type", ["directory"]) @given(new_origin()) def test_browse_origin_content_directory_empty_snapshot( client, staff_user, archive_data, object_type, new_origin @@ -606,18 +472,6 @@ assert re.search("snapshot.*is empty", resp.content.decode("utf-8")) -def test_browse_origin_content_not_found(client, origin): - url = reverse( - "browse-origin-content", - query_params={"origin_url": origin["url"], "path": "/invalid/file/path"}, - ) - - resp = check_html_get_response( - client, url, status_code=404, template_used="browse/content.html" - ) - assert re.search("Directory entry.*not found", resp.content.decode("utf-8")) - - def test_browse_directory_snapshot_not_found(client, mocker, origin): mock_get_snapshot_context = mocker.patch( "swh.web.browse.snapshot_context.get_snapshot_context" @@ -786,20 +640,8 @@ assert_not_contains(resp, "swh:1:rev:") -def test_origin_content_no_path(client, origin): - url = reverse("browse-origin-content", query_params={"origin_url": origin["url"]}) - - resp = check_html_get_response( - client, url, status_code=400, template_used="error.html" - ) - assert_contains( - resp, "The path of a content must be given as query parameter.", status_code=400 - ) - - def test_origin_views_no_url_query_parameter(client): for browse_context in ( - "content", "directory", "visits", ): @@ -824,6 +666,18 @@ ) +@given(new_origin()) +@pytest.mark.parametrize("browse_context", ["content"]) +def test_origin_content_view_redirects(client, browse_context, new_origin): + query_params = {"origin_url": new_origin.url, "path": "test.txt"} + url = reverse(f"browse-origin-{browse_context}", query_params=query_params) + + resp = check_html_get_response(client, url, status_code=301) + assert resp["location"] == reverse( + f"browse-{browse_context}", query_params=query_params + ) + + @given(new_origin()) @pytest.mark.parametrize("browse_context", ["log", "branches", "releases"]) def test_origin_view_legacy_redirects(client, browse_context, new_origin): @@ -846,147 +700,28 @@ ) -def _origin_content_view_test_helper( - client, - archive_data, - origin_info, - origin_visit, - snapshot_sizes, - origin_branches, - origin_releases, - root_dir_sha1, - content, - visit_id=None, - timestamp=None, - snapshot_id=None, -): - content_path = "/".join(content["path"].split("/")[1:]) - - if not visit_id and not snapshot_id: - visit_id = origin_visit["visit"] - - query_params = {"origin_url": origin_info["url"], "path": content_path} - - if timestamp: - query_params["timestamp"] = timestamp - - if visit_id: - query_params["visit_id"] = visit_id - elif snapshot_id: - query_params["snapshot"] = snapshot_id - - url = reverse("browse-origin-content", query_params=query_params) - - resp = check_html_get_response( - client, url, status_code=200, template_used="browse/content.html" - ) - - assert type(content["data"]) == str - - assert_contains(resp, '' % content["hljs_language"]) - assert_contains(resp, escape(content["data"])) - - split_path = content_path.split("/") - - filename = split_path[-1] - path = content_path.replace(filename, "")[:-1] - - path_info = gen_path_info(path) - - del query_params["path"] - - if timestamp: - query_params["timestamp"] = format_utc_iso_date( - parse_iso8601_date_to_utc(timestamp).isoformat(), "%Y-%m-%dT%H:%M:%SZ" - ) - - root_dir_url = reverse("browse-origin-directory", query_params=query_params) - - assert_contains(resp, '
  • ', count=len(path_info) + 1) - - assert_contains(resp, '%s' % (root_dir_url, root_dir_sha1[:7])) - - for p in path_info: - query_params["path"] = p["path"] - dir_url = reverse("browse-origin-directory", query_params=query_params) - assert_contains(resp, '%s' % (dir_url, p["name"])) - - assert_contains(resp, "
  • %s
  • " % filename) - - query_string = "sha1_git:" + content["sha1_git"] - - url_raw = reverse( - "browse-content-raw", - url_args={"query_string": query_string}, - query_params={"filename": filename}, - ) - assert_contains(resp, url_raw) - - if "path" in query_params: - del query_params["path"] - - origin_branches_url = reverse("browse-origin-branches", query_params=query_params) - - assert_contains(resp, f'href="{escape(origin_branches_url)}"') - assert_contains(resp, f"Branches ({snapshot_sizes['revision']})") - - origin_releases_url = reverse("browse-origin-releases", query_params=query_params) - - assert_contains(resp, f'href="{escape(origin_releases_url)}">') - assert_contains(resp, f"Releases ({snapshot_sizes['release']})") - - assert_contains(resp, '
  • ', count=len(origin_branches)) - - query_params["path"] = content_path - - for branch in origin_branches: - root_dir_branch_url = reverse( - "browse-origin-content", - query_params={"branch": branch["name"], **query_params}, +@given(new_origin()) +def test_origin_content_view_legacy_redirects(client, new_origin): + url_args = [ + {"origin_url": new_origin.url}, + { + "origin_url": new_origin.url, + "path": "test.txt", + "timestamp": "2021-01-23T22:24:10Z", + }, + {"origin_url": new_origin.url, "path": "test.txt"}, + ] + params = {"extra-param1": "extra-param1", "extra-param2": "extra-param2"} + for each_arg in url_args: + url = reverse( + "browse-origin-content-legacy", url_args=each_arg, query_params=params, ) - assert_contains(resp, '' % root_dir_branch_url) - - assert_contains(resp, '
  • ', count=len(origin_releases)) - - query_params["branch"] = None - for release in origin_releases: - root_dir_release_url = reverse( - "browse-origin-content", - query_params={"release": release["name"], **query_params}, + resp = check_html_get_response(client, url, status_code=301) + assert resp["location"] == reverse( + "browse-content", query_params={**each_arg, **params} ) - assert_contains(resp, '' % root_dir_release_url) - - url = reverse("browse-origin-content", query_params=query_params) - - resp = check_html_get_response( - client, url, status_code=200, template_used="browse/content.html" - ) - - snapshot = archive_data.snapshot_get(origin_visit["snapshot"]) - head_rev_id = archive_data.snapshot_get_head(snapshot) - - swhid_context = { - "origin": origin_info["url"], - "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]), - "anchor": gen_swhid(ObjectType.REVISION, head_rev_id), - "path": f"/{content_path}", - } - - swh_cnt_id = gen_swhid( - ObjectType.CONTENT, content["sha1_git"], metadata=swhid_context - ) - swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id}) - assert_contains(resp, swh_cnt_id) - assert_contains(resp, swh_cnt_id_url) - - assert_contains(resp, "swh-take-new-snapshot") - - _check_origin_link(resp, origin_info["url"]) - - assert_not_contains(resp, "swh-metadata-popover") - def _origin_directory_view_test_helper( client, diff --git a/swh/web/tests/browse/views/test_snapshot.py b/swh/web/tests/browse/views/test_snapshot.py --- a/swh/web/tests/browse/views/test_snapshot.py +++ b/swh/web/tests/browse/views/test_snapshot.py @@ -310,3 +310,22 @@ assert_contains(resp, '' % escape(browse_revision_url)) _check_origin_link(resp, origin_info["url"]) + + +def test_snapshot_content_redirect(client, snapshot): + qry = {"extra-arg": "extra"} + url = reverse( + "browse-snapshot-content", url_args={"snapshot_id": snapshot}, query_params=qry + ) + resp = check_html_get_response(client, url, status_code=301) + assert resp.url == reverse( + "browse-content", query_params={**{"snapshot_id": snapshot}, **qry} + ) + + +def test_snapshot_content_legacy_redirect(client, snapshot): + qry = {"extra-arg": "extra"} + url_args = {"snapshot_id": snapshot, "path": "test.txt"} + url = reverse("browse-snapshot-content-legacy", url_args=url_args, query_params=qry) + resp = check_html_get_response(client, url, status_code=301) + assert resp.url == reverse("browse-content", query_params={**url_args, **qry})