Page MenuHomeSoftware Heritage

D6615.id24156.diff
No OneTemporary

D6615.id24156.diff

diff --git a/assets/src/bundles/guided_tour/index.js b/assets/src/bundles/guided_tour/index.js
--- a/assets/src/bundles/guided_tour/index.js
+++ b/assets/src/bundles/guided_tour/index.js
@@ -17,6 +17,8 @@
// we use a origin available both in production and swh-web tests
// environment to ease tour testing
const originUrl = 'https://github.com/memononen/libtess2';
+// sha1 for the content used
+const contentSha1 = 'sha1_git:2d4e23bf1d3f64c1e8b94622178e18d89c653de0';
function openSWHIDsTabBeforeNextStep() {
window.scrollTo(0, 0);
@@ -64,7 +66,7 @@
}
},
{
- url: `${Urls.browse_origin_content()}?origin_url=${originUrl}&path=Example/example.c`,
+ url: `${Urls.browse_content(contentSha1)}?origin_url=${originUrl}`,
introJsOptions: {
steps: guidedTourSteps.browseContent
},
diff --git a/docs/uri-scheme-browse-content.rst b/docs/uri-scheme-browse-content.rst
--- a/docs/uri-scheme-browse-content.rst
+++ b/docs/uri-scheme-browse-content.rst
@@ -89,4 +89,31 @@
:swh_web_browse:`content/sha1:1cb1447c1c7ddc1b03eac88398e40bd914d46b62/raw/`
:swh_web_browse:`content/sha256:8ceb4b9ee5adedde47b31e975c1d90c73ad27b6b165a1dcd80c7c545eb65b903/raw/?filename=COPYING`
-.. _highlightjs: https://highlightjs.org/
\ No newline at end of file
+.. http:get:: /browse/content/
+
+ HTML view that displays a content identified by the query parameters.
+ An origin URL, snapshot or revision must be provided along with a content path
+ as query parameters.
+
+ :query string path: The path of the content relative to the root directory
+ :query string origin_url: optional; specify the origin from which to retrieve the contnet.
+ :query string snapshot: optional; specify the snapshot from which to retrieve the contnet.
+ :query string timestamp: optional; an ISO 8601 datetime string to parse in order to find the closest visit
+ :query string branch: specify the snapshot branch name from which
+ to retrieve the content
+ :query string release: specify the snapshot release name from which
+ to retrieve the content
+
+ :statuscode 200: no error
+ :statuscode 404: path and/or the identifier is missing in the query parameters.
+ :statuscode 404: requested content can not be found in the archive,
+ or the provided content path does not exist from the origin root directory
+
+ **Examples:**
+
+ .. parsed-literal::
+
+ :swh_web_browse:`content/?origin_url=https://github.com/python/cpython&path=.gitignore`
+ :swh_web_browse:`content/?snapshot=673156c31a876c5b99b2fe3e89615529de9a3c44&path=src/opengl/qglbuffer.h`
+
+.. _highlightjs: https://highlightjs.org/
diff --git a/docs/uri-scheme-browse-origin.rst b/docs/uri-scheme-browse-origin.rst
--- a/docs/uri-scheme-browse-origin.rst
+++ b/docs/uri-scheme-browse-origin.rst
@@ -223,6 +223,10 @@
""""""""""""""
.. http:get:: /browse/origin/content/
+ :deprecated:
+
+ .. warning::
+ That endpoint is deprecated, use :http:get:`/browse/content/` instead.
HTML view that produces a display of a content
associated to the latest full visit of a software origin.
@@ -282,7 +286,7 @@
:deprecated:
.. warning::
- That endpoint is deprecated, use :http:get:`/browse/origin/content/` instead.
+ That endpoint is deprecated, use :http:get:`/browse/content/` instead.
HTML view that produces a display of a content
associated to the latest full visit of a software origin.
diff --git a/docs/uri-scheme-browse-snapshot.rst b/docs/uri-scheme-browse-snapshot.rst
--- a/docs/uri-scheme-browse-snapshot.rst
+++ b/docs/uri-scheme-browse-snapshot.rst
@@ -107,6 +107,10 @@
""""""""""""""""
.. http:get:: /browse/snapshot/(snapshot_id)/content/
+ :deprecated:
+
+ .. warning::
+ That endpoint is deprecated, use :http:get:`/browse/content/` instead.
HTML view that produces a display of a content reachable from
a snapshot.
@@ -142,7 +146,7 @@
:deprecated:
.. warning::
- That endpoint is deprecated, use :http:get:`/browse/snapshot/(snapshot_id)/content/` instead.
+ That endpoint is deprecated, use :http:get:`/browse/content/` instead.
HTML view that produces a display of a content reachable from
a snapshot.
diff --git a/swh/web/browse/snapshot_context.py b/swh/web/browse/snapshot_context.py
--- a/swh/web/browse/snapshot_context.py
+++ b/swh/web/browse/snapshot_context.py
@@ -16,9 +16,7 @@
from swh.model.model import Snapshot
from swh.model.swhids import CoreSWHID, ObjectType
from swh.web.browse.utils import (
- content_display_max_size,
format_log_entries,
- gen_content_link,
gen_release_link,
gen_revision_link,
gen_revision_log_link,
@@ -26,15 +24,12 @@
gen_snapshot_link,
get_directory_entries,
get_readme_to_display,
- prepare_content_for_display,
- request_content,
)
-from swh.web.common import archive, highlightjs
+from swh.web.common import archive
from swh.web.common.exc import BadInputExc, NotFoundExc, http_status_code_message
from swh.web.common.identifiers import get_swhids_info
from swh.web.common.origin_visits import get_origin_visit
from swh.web.common.typing import (
- ContentMetadata,
DirectoryMetadata,
OriginInfo,
SnapshotBranchInfo,
@@ -894,203 +889,6 @@
)
-def browse_snapshot_content(
- request,
- snapshot_id=None,
- origin_url=None,
- timestamp=None,
- path=None,
- selected_language=None,
-):
- """
- Django view implementation for browsing a content in a snapshot context.
- """
- _check_origin_url(snapshot_id, origin_url)
-
- if path is None:
- raise BadInputExc("The path of a content must be given as query parameter.")
-
- snapshot_context = get_snapshot_context(
- snapshot_id=snapshot_id,
- origin_url=origin_url,
- timestamp=timestamp,
- visit_id=request.GET.get("visit_id"),
- path=path,
- browse_context="content",
- branch_name=request.GET.get("branch"),
- release_name=request.GET.get("release"),
- revision_id=request.GET.get("revision"),
- )
-
- root_directory = snapshot_context["root_directory"]
- sha1_git = None
- query_string = None
- content_data = {}
- directory_id = None
- split_path = path.split("/")
- filename = split_path[-1]
- filepath = path[: -len(filename)]
- error_info = {
- "status_code": 200,
- "description": None,
- }
- if root_directory:
- try:
- content_info = archive.lookup_directory_with_path(root_directory, path)
- sha1_git = content_info["target"]
- query_string = "sha1_git:" + sha1_git
- content_data = request_content(query_string)
-
- if filepath:
- dir_info = archive.lookup_directory_with_path(root_directory, filepath)
- directory_id = dir_info["target"]
- else:
- directory_id = root_directory
- except NotFoundExc as e:
- error_info["status_code"] = 404
- error_info["description"] = f"NotFoundExc: {str(e)}"
-
- revision_id = snapshot_context["revision_id"]
- origin_info = snapshot_context["origin_info"]
- visit_info = snapshot_context["visit_info"]
- snapshot_id = snapshot_context["snapshot_id"]
-
- if content_data.get("raw_data") is not None:
- content_display_data = prepare_content_for_display(
- content_data["raw_data"], content_data["mimetype"], path
- )
- content_data.update(content_display_data)
-
- # Override language with user-selected language
- if selected_language is not None:
- content_data["language"] = selected_language
-
- available_languages = None
-
- if content_data.get("mimetype") is not None and "text/" in content_data["mimetype"]:
- available_languages = highlightjs.get_supported_languages()
-
- breadcrumbs = _build_breadcrumbs(snapshot_context, filepath)
-
- breadcrumbs.append({"name": filename, "url": None})
-
- browse_content_link = gen_content_link(sha1_git)
-
- content_raw_url = None
- if query_string:
- content_raw_url = reverse(
- "browse-content-raw",
- url_args={"query_string": query_string},
- query_params={"filename": filename},
- )
-
- content_checksums = content_data.get("checksums", {})
-
- sha1_git = content_checksums.get("sha1_git")
-
- swh_objects = []
-
- if sha1_git is not None:
- swh_objects.append(
- SWHObjectInfo(object_type=ObjectType.CONTENT, object_id=sha1_git)
- )
- if directory_id is not None:
- swh_objects.append(
- SWHObjectInfo(object_type=ObjectType.DIRECTORY, object_id=directory_id)
- )
- if revision_id is not None:
- swh_objects.append(
- SWHObjectInfo(object_type=ObjectType.REVISION, object_id=revision_id)
- )
- swh_objects.append(
- SWHObjectInfo(object_type=ObjectType.SNAPSHOT, object_id=snapshot_id)
- )
-
- visit_date = None
- visit_type = None
- if visit_info:
- visit_date = format_utc_iso_date(visit_info["date"])
- visit_type = visit_info["type"]
-
- release_id = snapshot_context["release_id"]
- if release_id:
- swh_objects.append(
- SWHObjectInfo(object_type=ObjectType.RELEASE, object_id=release_id)
- )
-
- content_metadata = ContentMetadata(
- object_type=ObjectType.CONTENT,
- object_id=content_checksums.get("sha1_git"),
- sha1=content_checksums.get("sha1"),
- sha1_git=content_checksums.get("sha1_git"),
- sha256=content_checksums.get("sha256"),
- blake2s256=content_checksums.get("blake2s256"),
- content_url=browse_content_link,
- mimetype=content_data.get("mimetype"),
- encoding=content_data.get("encoding"),
- size=content_data.get("length", 0),
- language=content_data.get("language"),
- root_directory=root_directory,
- path=f"/{filepath}",
- filename=filename,
- directory=directory_id,
- revision=revision_id,
- release=release_id,
- snapshot=snapshot_id,
- origin_url=origin_url,
- visit_date=visit_date,
- visit_type=visit_type,
- )
-
- swhids_info = get_swhids_info(swh_objects, snapshot_context, content_metadata)
-
- content_path = "/".join([bc["name"] for bc in breadcrumbs])
- context_found = "snapshot: %s" % snapshot_context["snapshot_id"]
- if origin_info:
- context_found = "origin: %s" % origin_info["url"]
- heading = "Content - %s - %s - %s" % (
- content_path,
- snapshot_context["branch"],
- context_found,
- )
-
- top_right_link = None
- if not snapshot_context["is_empty"]:
- top_right_link = {
- "url": content_raw_url,
- "icon": swh_object_icons["content"],
- "text": "Raw File",
- }
-
- return render(
- request,
- "browse/content.html",
- {
- "heading": heading,
- "swh_object_name": "Content",
- "swh_object_metadata": content_metadata,
- "content": content_data.get("content_data"),
- "content_size": content_data.get("length"),
- "max_content_size": content_display_max_size,
- "filename": filename,
- "encoding": content_data.get("encoding"),
- "mimetype": content_data.get("mimetype"),
- "language": content_data.get("language"),
- "available_languages": available_languages,
- "breadcrumbs": breadcrumbs if root_directory else [],
- "top_right_link": top_right_link,
- "snapshot_context": snapshot_context,
- "vault_cooking": None,
- "show_actions": True,
- "swhids_info": swhids_info,
- "error_code": error_info["status_code"],
- "error_message": http_status_code_message.get(error_info["status_code"]),
- "error_description": error_info["description"],
- },
- status=error_info["status_code"],
- )
-
-
PER_PAGE = 100
diff --git a/swh/web/browse/views/content.py b/swh/web/browse/views/content.py
--- a/swh/web/browse/views/content.py
+++ b/swh/web/browse/views/content.py
@@ -9,7 +9,7 @@
import sentry_sdk
from django.http import HttpResponse, JsonResponse
-from django.shortcuts import render
+from django.shortcuts import redirect, render
from swh.model.hashutil import hash_to_hex
from swh.model.swhids import ObjectType
@@ -22,7 +22,7 @@
request_content,
)
from swh.web.common import archive, highlightjs, query
-from swh.web.common.exc import NotFoundExc, http_status_code_message
+from swh.web.common.exc import BadInputExc, NotFoundExc, http_status_code_message
from swh.web.common.identifiers import get_swhids_info
from swh.web.common.typing import ContentMetadata, SWHObjectInfo
from swh.web.common.utils import gen_path_info, reverse, swh_object_icons
@@ -170,18 +170,54 @@
return JsonResponse(diff_data)
+def _get_content_from_request(request):
+ path = request.GET.get("path")
+ if path is None:
+ raise BadInputExc("Path must be provided as a query parameter.")
+ snapshot = request.GET.get("snapshot")
+ origin_url = request.GET.get("origin_url")
+ if snapshot is None and origin_url is None:
+ raise BadInputExc("Origin or Snapshot must be provided as a query parameter.")
+ snapshot_context = get_snapshot_context(
+ snapshot_id=snapshot,
+ origin_url=origin_url,
+ path=path,
+ timestamp=request.GET.get("timestamp"),
+ visit_id=request.GET.get("visit_id"),
+ branch_name=request.GET.get("branch"),
+ release_name=request.GET.get("release"),
+ browse_context="content",
+ )
+ root_directory = snapshot_context["root_directory"]
+ return archive.lookup_directory_with_path(root_directory, path)
+
+
@browse_route(
r"content/(?P<query_string>[0-9a-z_:]*[0-9a-f]+.)/",
+ r"content/",
view_name="browse-content",
checksum_args=["query_string"],
)
-def content_display(request, query_string):
+def content_display(request, query_string=None):
"""Django view that produces an HTML display of a content identified
by its hash value.
- The url that points to it is
+ The URLs that points to it are
:http:get:`/browse/content/[(algo_hash):](hash)/`
+ :http:get:`/browse/content/`
"""
+ if query_string is None:
+ # this case happens when redirected either of
+ # origin/content or snapshot/content
+ content = _get_content_from_request(request)
+ return redirect(
+ reverse(
+ "browse-content",
+ url_args={"query_string": f"sha1_git:{content['target']}"},
+ query_params=request.GET,
+ ),
+ )
+
algo, checksum = query.parse_hash(query_string)
checksum = hash_to_hex(checksum)
origin_url = request.GET.get("origin_url")
@@ -204,6 +240,8 @@
snapshot_context = get_snapshot_context(
origin_url=origin_url,
snapshot_id=snapshot_id,
+ timestamp=request.GET.get("timestamp"),
+ visit_id=request.GET.get("visit_id"),
branch_name=request.GET.get("branch"),
release_name=request.GET.get("release"),
revision_id=request.GET.get("revision"),
diff --git a/swh/web/browse/views/origin.py b/swh/web/browse/views/origin.py
--- a/swh/web/browse/views/origin.py
+++ b/swh/web/browse/views/origin.py
@@ -4,27 +4,21 @@
# See top-level LICENSE file for more information
from django.shortcuts import redirect, render
-from django.urls import resolve
from swh.web.browse.browseurls import browse_route
from swh.web.browse.snapshot_context import (
- browse_snapshot_content,
browse_snapshot_directory,
get_snapshot_context,
)
from swh.web.common import archive
from swh.web.common.exc import BadInputExc
from swh.web.common.origin_visits import get_origin_visits
-from swh.web.common.utils import format_utc_iso_date, parse_iso8601_date_to_utc, reverse
-
-
-def redirect_to_new_route(request, new_route):
- request_path = resolve(request.path_info)
- # Send all the url_args and the request_args as query params
- # eg /origin/<url:url-val>/log?path=test
- # will be send as /log?url=<url-val>&path=test
- args = {**request_path.kwargs, **request.GET.dict()}
- return redirect(reverse(new_route, query_params=args), permanent=True,)
+from swh.web.common.utils import (
+ format_utc_iso_date,
+ parse_iso8601_date_to_utc,
+ redirect_to_new_route,
+ reverse,
+)
@browse_route(
@@ -73,20 +67,16 @@
r"origin/content/", view_name="browse-origin-content",
)
def origin_content_browse(request):
- """Django view that produces an HTML display of a content
+ """
+ This route is deprecated; use http:get:`/browse/content` instead
+
+ Django view that produces an HTML display of a content
associated to an origin for a given visit.
The URL that points to it is :http:get:`/browse/origin/content/`
"""
- return browse_snapshot_content(
- request,
- origin_url=request.GET.get("origin_url"),
- snapshot_id=request.GET.get("snapshot"),
- timestamp=request.GET.get("timestamp"),
- path=request.GET.get("path"),
- selected_language=request.GET.get("language"),
- )
+ return redirect_to_new_route(request, "browse-content")
@browse_route(
@@ -96,7 +86,10 @@
view_name="browse-origin-content-legacy",
)
def origin_content_browse_legacy(request, origin_url, path=None, timestamp=None):
- """Django view that produces an HTML display of a content
+ """
+ This route is deprecated; use http:get:`/browse/content` instead
+
+ Django view that produces an HTML display of a content
associated to an origin for a given visit.
The URLs that point to it are
@@ -104,14 +97,7 @@
:http:get:`/browse/origin/(origin_url)/visit/(timestamp)/content/(path)/`
"""
- return browse_snapshot_content(
- request,
- origin_url=origin_url,
- snapshot_id=request.GET.get("snapshot"),
- timestamp=timestamp,
- path=path,
- selected_language=request.GET.get("language"),
- )
+ return redirect_to_new_route(request, "browse-content")
@browse_route(
diff --git a/swh/web/browse/views/snapshot.py b/swh/web/browse/views/snapshot.py
--- a/swh/web/browse/views/snapshot.py
+++ b/swh/web/browse/views/snapshot.py
@@ -9,14 +9,13 @@
from swh.web.browse.browseurls import browse_route
from swh.web.browse.snapshot_context import (
browse_snapshot_branches,
- browse_snapshot_content,
browse_snapshot_directory,
browse_snapshot_log,
browse_snapshot_releases,
get_snapshot_context,
)
from swh.web.common.exc import BadInputExc
-from swh.web.common.utils import reverse
+from swh.web.common.utils import redirect_to_new_route, reverse
def get_snapshot_from_request(request):
@@ -93,17 +92,16 @@
checksum_args=["snapshot_id"],
)
def snapshot_content_browse(request, snapshot_id):
- """Django view that produces an HTML display of a content
+ """
+ This route is deprecated; use http:get:`/browse/content` instead
+
+ Django view that produces an HTML display of a content
collected in a snapshot.
The url that points to it is :http:get:`/browse/snapshot/(snapshot_id)/content/`
"""
- return browse_snapshot_content(
- request,
- snapshot_id=snapshot_id,
- path=request.GET.get("path"),
- selected_language=request.GET.get("language"),
- )
+
+ return redirect_to_new_route(request, "browse-content")
@browse_route(
@@ -112,18 +110,16 @@
checksum_args=["snapshot_id"],
)
def snapshot_content_browse_legacy(request, snapshot_id, path):
- """Django view that produces an HTML display of a content
+ """
+ This route is deprecated; use http:get:`/browse/content` instead
+
+ Django view that produces an HTML display of a content
collected in a snapshot.
The url that points to it is
:http:get:`/browse/snapshot/(snapshot_id)/content/(path)/`
"""
- return browse_snapshot_content(
- request,
- snapshot_id=snapshot_id,
- path=path,
- selected_language=request.GET.get("language"),
- )
+ return redirect_to_new_route(request, "browse-content")
@browse_route(
diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py
--- a/swh/web/common/utils.py
+++ b/swh/web/common/utils.py
@@ -22,6 +22,8 @@
from django.core.cache import cache
from django.http import HttpRequest, QueryDict
+from django.shortcuts import redirect
+from django.urls import resolve
from django.urls import reverse as django_reverse
from swh.web.auth.utils import ADMIN_LIST_DEPOSIT_PERMISSION
@@ -415,3 +417,15 @@
return sorted(search().visit_types_count().keys())
except Exception:
return []
+
+
+def redirect_to_new_route(request, new_route, permanent=True):
+ """Redirect a request to another route
+ with urls args and query parameters
+ eg: /origin/<url:url-val>/log?path=test
+ can be redirected as /log?url=<url-val>&path=test
+ This can be used to deprecate routes
+ """
+ request_path = resolve(request.path_info)
+ args = {**request_path.kwargs, **request.GET.dict()}
+ return redirect(reverse(new_route, query_params=args), permanent=permanent,)
diff --git a/swh/web/tests/browse/views/test_content.py b/swh/web/tests/browse/views/test_content.py
--- a/swh/web/tests/browse/views/test_content.py
+++ b/swh/web/tests/browse/views/test_content.py
@@ -4,6 +4,7 @@
# See top-level LICENSE file for more information
import random
+import re
import pytest
@@ -18,7 +19,13 @@
)
from swh.web.common.exc import NotFoundExc
from swh.web.common.identifiers import gen_swhid
-from swh.web.common.utils import gen_path_info, reverse
+from swh.web.common.utils import (
+ format_utc_iso_date,
+ gen_path_info,
+ parse_iso8601_date_to_utc,
+ reverse,
+)
+from swh.web.tests.data import get_content
from swh.web.tests.django_asserts import assert_contains, assert_not_contains
from swh.web.tests.utils import check_html_get_response, check_http_get_response
@@ -631,3 +638,386 @@
assert type(content_display["content_data"]) == str
return content_display
+
+
+def test_content_dispaly_empty_query_string_missing_path(client):
+ url = reverse("browse-content", query_params={"origin_url": "http://example.com"},)
+ resp = check_html_get_response(
+ client, url, status_code=400, template_used="error.html"
+ )
+ assert_contains(
+ resp, "Path must be provided as a query parameter.", status_code=400
+ )
+
+
+def test_content_dispaly_empty_query_string_and_snapshot_origin(client):
+ url = reverse("browse-content", query_params={"path": "test.txt"},)
+ resp = check_html_get_response(client, url, status_code=400,)
+ assert_contains(
+ resp,
+ "Origin or Snapshot must be provided as a query parameter.",
+ status_code=400,
+ )
+
+
+def test_content_dispaly_empty_query_string_with_origin(
+ client, archive_data, origin_with_multiple_visits
+):
+ origin_url = origin_with_multiple_visits["url"]
+ snapshot = archive_data.snapshot_get_latest(origin_url)
+ head_rev_id = archive_data.snapshot_get_head(snapshot)
+ head_rev = archive_data.revision_get(head_rev_id)
+ dir_content = archive_data.directory_ls(head_rev["directory"])
+ dir_files = [e for e in dir_content if e["type"] == "file"]
+ dir_file = random.choice(dir_files)
+
+ url = reverse(
+ "browse-content",
+ query_params={"origin_url": origin_url, "path": dir_file["name"],},
+ )
+
+ resp = check_html_get_response(client, url, status_code=302,)
+ redict_url = reverse(
+ "browse-content",
+ url_args={"query_string": f"sha1_git:{dir_file['checksums']['sha1_git']}"},
+ query_params={"origin_url": origin_url, "path": dir_file["name"],},
+ )
+ assert resp.url == redict_url
+
+
+def test_content_dispaly_empty_query_string_with_snapshot(
+ client, archive_data, origin_with_multiple_visits
+):
+ origin_url = origin_with_multiple_visits["url"]
+ snapshot = archive_data.snapshot_get_latest(origin_url)
+ head_rev_id = archive_data.snapshot_get_head(snapshot)
+ head_rev = archive_data.revision_get(head_rev_id)
+ dir_content = archive_data.directory_ls(head_rev["directory"])
+ dir_files = [e for e in dir_content if e["type"] == "file"]
+ dir_file = random.choice(dir_files)
+ url = reverse(
+ "browse-content",
+ query_params={"snapshot": snapshot["id"], "path": dir_file["name"],},
+ )
+
+ resp = check_html_get_response(client, url, status_code=302,)
+ redict_url = reverse(
+ "browse-content",
+ url_args={"query_string": f"sha1_git:{dir_file['checksums']['sha1_git']}"},
+ query_params={"snapshot": snapshot["id"], "path": dir_file["name"],},
+ )
+ assert resp.url == redict_url
+
+
+def test_browse_origin_content_no_visit(client, mocker, origin):
+ mock_get_origin_visits = mocker.patch(
+ "swh.web.common.origin_visits.get_origin_visits"
+ )
+ mock_get_origin_visits.return_value = []
+ mock_archive = mocker.patch("swh.web.common.origin_visits.archive")
+ mock_archive.lookup_origin_visit_latest.return_value = None
+ url = reverse(
+ "browse-content", query_params={"origin_url": origin["url"], "path": "foo"},
+ )
+
+ resp = check_html_get_response(
+ client, url, status_code=404, template_used="error.html"
+ )
+ assert_contains(resp, "No valid visit", status_code=404)
+ assert not mock_get_origin_visits.called
+
+
+def test_browse_origin_content_unknown_visit(client, mocker, origin):
+ mock_get_origin_visits = mocker.patch(
+ "swh.web.common.origin_visits.get_origin_visits"
+ )
+ mock_get_origin_visits.return_value = [{"visit": 1}]
+
+ url = reverse(
+ "browse-content",
+ query_params={"origin_url": origin["url"], "path": "foo", "visit_id": 2},
+ )
+
+ resp = check_html_get_response(
+ client, url, status_code=404, template_used="error.html"
+ )
+ assert re.search("Resource not found", resp.content.decode("utf-8"))
+
+
+def test_browse_origin_content_not_found(client, origin):
+ url = reverse(
+ "browse-content",
+ query_params={"origin_url": origin["url"], "path": "/invalid/file/path"},
+ )
+
+ resp = check_html_get_response(
+ client, url, status_code=404, template_used="error.html"
+ )
+ assert re.search("Resource not found", resp.content.decode("utf-8"))
+
+
+def test_browse_content_invalid_origin(client):
+ url = reverse(
+ "browse-content",
+ query_params={
+ "origin_url": "http://invalid-origin",
+ "path": "/invalid/file/path",
+ },
+ )
+
+ resp = check_html_get_response(
+ client, url, status_code=404, template_used="error.html"
+ )
+ assert re.search("Resource not found", resp.content.decode("utf-8"))
+
+
+def test_origin_content_view(
+ client, archive_data, swh_scheduler, origin_with_multiple_visits
+):
+ origin_visits = archive_data.origin_visit_get(origin_with_multiple_visits["url"])
+
+ def _get_archive_data(visit_idx):
+ snapshot = archive_data.snapshot_get(origin_visits[visit_idx]["snapshot"])
+ head_rev_id = archive_data.snapshot_get_head(snapshot)
+ head_rev = archive_data.revision_get(head_rev_id)
+ dir_content = archive_data.directory_ls(head_rev["directory"])
+ dir_files = [e for e in dir_content if e["type"] == "file"]
+ dir_file = random.choice(dir_files)
+ branches, releases, _ = process_snapshot_branches(snapshot)
+ return {
+ "branches": branches,
+ "releases": releases,
+ "root_dir_sha1": head_rev["directory"],
+ "content": get_content(dir_file["checksums"]["sha1"]),
+ "visit": origin_visits[visit_idx],
+ "snapshot_sizes": archive_data.snapshot_count_branches(snapshot["id"]),
+ }
+
+ tdata = _get_archive_data(-1)
+
+ _origin_content_view_test_helper(
+ client,
+ archive_data,
+ origin_with_multiple_visits,
+ origin_visits[-1],
+ tdata["snapshot_sizes"],
+ tdata["branches"],
+ tdata["releases"],
+ tdata["root_dir_sha1"],
+ tdata["content"],
+ )
+
+ _origin_content_view_test_helper(
+ client,
+ archive_data,
+ origin_with_multiple_visits,
+ origin_visits[-1],
+ tdata["snapshot_sizes"],
+ tdata["branches"],
+ tdata["releases"],
+ tdata["root_dir_sha1"],
+ tdata["content"],
+ timestamp=tdata["visit"]["date"],
+ )
+
+ _origin_content_view_test_helper(
+ client,
+ archive_data,
+ origin_with_multiple_visits,
+ origin_visits[-1],
+ tdata["snapshot_sizes"],
+ tdata["branches"],
+ tdata["releases"],
+ tdata["root_dir_sha1"],
+ tdata["content"],
+ snapshot_id=tdata["visit"]["snapshot"],
+ )
+
+ tdata = _get_archive_data(0)
+
+ _origin_content_view_test_helper(
+ client,
+ archive_data,
+ origin_with_multiple_visits,
+ origin_visits[0],
+ tdata["snapshot_sizes"],
+ tdata["branches"],
+ tdata["releases"],
+ tdata["root_dir_sha1"],
+ tdata["content"],
+ visit_id=tdata["visit"]["visit"],
+ )
+
+ _origin_content_view_test_helper(
+ client,
+ archive_data,
+ origin_with_multiple_visits,
+ origin_visits[0],
+ tdata["snapshot_sizes"],
+ tdata["branches"],
+ tdata["releases"],
+ tdata["root_dir_sha1"],
+ tdata["content"],
+ snapshot_id=tdata["visit"]["snapshot"],
+ )
+
+
+def _origin_content_view_test_helper(
+ client,
+ archive_data,
+ origin_info,
+ origin_visit,
+ snapshot_sizes,
+ origin_branches,
+ origin_releases,
+ root_dir_sha1,
+ content,
+ visit_id=None,
+ timestamp=None,
+ snapshot_id=None,
+):
+ content_path = "/".join(content["path"].split("/")[1:])
+
+ if not visit_id and not snapshot_id:
+ visit_id = origin_visit["visit"]
+
+ query_params = {"origin_url": origin_info["url"], "path": content_path}
+
+ if timestamp:
+ query_params["timestamp"] = timestamp
+
+ if visit_id:
+ query_params["visit_id"] = visit_id
+ elif snapshot_id:
+ query_params["snapshot"] = snapshot_id
+
+ url = reverse(
+ "browse-content",
+ url_args={"query_string": f"sha1_git:{content['sha1_git']}"},
+ query_params=query_params,
+ )
+
+ resp = check_html_get_response(
+ client, url, status_code=200, template_used="browse/content.html"
+ )
+
+ assert type(content["data"]) == str
+
+ assert_contains(resp, '<code class="%s">' % content["hljs_language"])
+ assert_contains(resp, escape(content["data"]))
+
+ split_path = content_path.split("/")
+
+ filename = split_path[-1]
+ path = content_path.replace(filename, "")[:-1]
+
+ path_info = gen_path_info(path)
+
+ del query_params["path"]
+
+ if timestamp:
+ query_params["timestamp"] = format_utc_iso_date(
+ parse_iso8601_date_to_utc(timestamp).isoformat(), "%Y-%m-%dT%H:%M:%SZ"
+ )
+
+ root_dir_url = reverse(
+ "browse-directory",
+ url_args={"sha1_git": root_dir_sha1},
+ query_params=query_params,
+ )
+
+ assert_contains(resp, '<li class="swh-path">', count=len(path_info) + 1)
+
+ assert_contains(resp, '<a href="%s">%s</a>' % (root_dir_url, root_dir_sha1[:7]))
+
+ for p in path_info:
+ query_params["path"] = p["path"]
+ dir_url = reverse("browse-origin-directory", query_params=query_params)
+ assert_contains(resp, '<a href="%s">%s</a>' % (dir_url, p["name"]))
+
+ assert_contains(resp, "<li>%s</li>" % filename)
+
+ query_string = "sha1_git:" + content["sha1_git"]
+
+ url_raw = reverse(
+ "browse-content-raw",
+ url_args={"query_string": query_string},
+ query_params={"filename": filename},
+ )
+ assert_contains(resp, url_raw)
+
+ if "path" in query_params:
+ del query_params["path"]
+
+ origin_branches_url = reverse("browse-origin-branches", query_params=query_params)
+
+ assert_contains(resp, f'href="{escape(origin_branches_url)}"')
+ assert_contains(resp, f"Branches ({snapshot_sizes['revision']})")
+
+ origin_releases_url = reverse("browse-origin-releases", query_params=query_params)
+
+ assert_contains(resp, f'href="{escape(origin_releases_url)}">')
+ assert_contains(resp, f"Releases ({snapshot_sizes['release']})")
+
+ assert_contains(resp, '<li class="swh-branch">', count=len(origin_branches))
+
+ query_params["path"] = content_path
+
+ for branch in origin_branches:
+ root_dir_branch_url = reverse(
+ "browse-origin-content",
+ query_params={"branch": branch["name"], **query_params},
+ )
+
+ assert_contains(resp, '<a href="%s">' % root_dir_branch_url)
+
+ assert_contains(resp, '<li class="swh-release">', count=len(origin_releases))
+
+ query_params["branch"] = None
+ for release in origin_releases:
+ root_dir_release_url = reverse(
+ "browse-origin-content",
+ query_params={"release": release["name"], **query_params},
+ )
+
+ assert_contains(resp, '<a href="%s">' % root_dir_release_url)
+
+ url = reverse(
+ "browse-content",
+ url_args={"query_string": query_string},
+ query_params=query_params,
+ )
+
+ resp = check_html_get_response(
+ client, url, status_code=200, template_used="browse/content.html"
+ )
+
+ snapshot = archive_data.snapshot_get(origin_visit["snapshot"])
+ head_rev_id = archive_data.snapshot_get_head(snapshot)
+
+ swhid_context = {
+ "origin": origin_info["url"],
+ "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]),
+ "anchor": gen_swhid(ObjectType.REVISION, head_rev_id),
+ "path": f"/{content_path}",
+ }
+
+ swh_cnt_id = gen_swhid(
+ ObjectType.CONTENT, content["sha1_git"], metadata=swhid_context
+ )
+ swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id})
+ assert_contains(resp, swh_cnt_id)
+ assert_contains(resp, swh_cnt_id_url)
+
+ assert_contains(resp, "swh-take-new-snapshot")
+
+ _check_origin_link(resp, origin_info["url"])
+
+ assert_not_contains(resp, "swh-metadata-popover")
+
+
+def _check_origin_link(resp, origin_url):
+ browse_origin_url = reverse(
+ "browse-origin", query_params={"origin_url": origin_url}
+ )
+ assert_contains(resp, f'href="{browse_origin_url}"')
diff --git a/swh/web/tests/browse/views/test_origin.py b/swh/web/tests/browse/views/test_origin.py
--- a/swh/web/tests/browse/views/test_origin.py
+++ b/swh/web/tests/browse/views/test_origin.py
@@ -24,13 +24,7 @@
from swh.web.browse.snapshot_context import process_snapshot_branches
from swh.web.common.exc import NotFoundExc
from swh.web.common.identifiers import gen_swhid
-from swh.web.common.utils import (
- format_utc_iso_date,
- gen_path_info,
- parse_iso8601_date_to_utc,
- reverse,
-)
-from swh.web.tests.data import get_content
+from swh.web.common.utils import format_utc_iso_date, parse_iso8601_date_to_utc, reverse
from swh.web.tests.django_asserts import assert_contains, assert_not_contains
from swh.web.tests.strategies import new_origin, new_snapshot, visit_dates
from swh.web.tests.utils import check_html_get_response
@@ -57,97 +51,6 @@
_check_origin_link(resp, origin_url)
-def test_origin_content_view(
- client, archive_data, swh_scheduler, origin_with_multiple_visits
-):
- origin_visits = archive_data.origin_visit_get(origin_with_multiple_visits["url"])
-
- def _get_archive_data(visit_idx):
- snapshot = archive_data.snapshot_get(origin_visits[visit_idx]["snapshot"])
- head_rev_id = archive_data.snapshot_get_head(snapshot)
- head_rev = archive_data.revision_get(head_rev_id)
- dir_content = archive_data.directory_ls(head_rev["directory"])
- dir_files = [e for e in dir_content if e["type"] == "file"]
- dir_file = random.choice(dir_files)
- branches, releases, _ = process_snapshot_branches(snapshot)
- return {
- "branches": branches,
- "releases": releases,
- "root_dir_sha1": head_rev["directory"],
- "content": get_content(dir_file["checksums"]["sha1"]),
- "visit": origin_visits[visit_idx],
- "snapshot_sizes": archive_data.snapshot_count_branches(snapshot["id"]),
- }
-
- tdata = _get_archive_data(-1)
-
- _origin_content_view_test_helper(
- client,
- archive_data,
- origin_with_multiple_visits,
- origin_visits[-1],
- tdata["snapshot_sizes"],
- tdata["branches"],
- tdata["releases"],
- tdata["root_dir_sha1"],
- tdata["content"],
- )
-
- _origin_content_view_test_helper(
- client,
- archive_data,
- origin_with_multiple_visits,
- origin_visits[-1],
- tdata["snapshot_sizes"],
- tdata["branches"],
- tdata["releases"],
- tdata["root_dir_sha1"],
- tdata["content"],
- timestamp=tdata["visit"]["date"],
- )
-
- _origin_content_view_test_helper(
- client,
- archive_data,
- origin_with_multiple_visits,
- origin_visits[-1],
- tdata["snapshot_sizes"],
- tdata["branches"],
- tdata["releases"],
- tdata["root_dir_sha1"],
- tdata["content"],
- snapshot_id=tdata["visit"]["snapshot"],
- )
-
- tdata = _get_archive_data(0)
-
- _origin_content_view_test_helper(
- client,
- archive_data,
- origin_with_multiple_visits,
- origin_visits[0],
- tdata["snapshot_sizes"],
- tdata["branches"],
- tdata["releases"],
- tdata["root_dir_sha1"],
- tdata["content"],
- visit_id=tdata["visit"]["visit"],
- )
-
- _origin_content_view_test_helper(
- client,
- archive_data,
- origin_with_multiple_visits,
- origin_visits[0],
- tdata["snapshot_sizes"],
- tdata["branches"],
- tdata["releases"],
- tdata["root_dir_sha1"],
- tdata["content"],
- snapshot_id=tdata["visit"]["snapshot"],
- )
-
-
def test_origin_root_directory_view(client, archive_data, swh_scheduler, origin):
origin_visits = archive_data.origin_visit_get(origin["url"])
@@ -529,43 +432,6 @@
assert re.search("Directory.*not found", resp.content.decode("utf-8"))
-def test_browse_origin_content_no_visit(client, mocker, origin):
- mock_get_origin_visits = mocker.patch(
- "swh.web.common.origin_visits.get_origin_visits"
- )
- mock_get_origin_visits.return_value = []
- mock_archive = mocker.patch("swh.web.common.origin_visits.archive")
- mock_archive.lookup_origin_visit_latest.return_value = None
- url = reverse(
- "browse-origin-content",
- query_params={"origin_url": origin["url"], "path": "foo"},
- )
-
- resp = check_html_get_response(
- client, url, status_code=404, template_used="error.html"
- )
- assert_contains(resp, "No valid visit", status_code=404)
- assert not mock_get_origin_visits.called
-
-
-def test_browse_origin_content_unknown_visit(client, mocker, origin):
- mock_get_origin_visits = mocker.patch(
- "swh.web.common.origin_visits.get_origin_visits"
- )
- mock_get_origin_visits.return_value = [{"visit": 1}]
-
- url = reverse(
- "browse-origin-content",
- query_params={"origin_url": origin["url"], "path": "foo", "visit_id": 2},
- )
-
- resp = check_html_get_response(
- client, url, status_code=404, template_used="error.html"
- )
- assert re.search("Visit.*not found", resp.content.decode("utf-8"))
- assert mock_get_origin_visits.called
-
-
def _add_empty_snapshot_origin(new_origin, archive_data):
snapshot = Snapshot(branches={})
archive_data.origin_add([new_origin])
@@ -584,7 +450,7 @@
@pytest.mark.django_db
-@pytest.mark.parametrize("object_type", ["content", "directory"])
+@pytest.mark.parametrize("object_type", ["directory"])
@given(new_origin())
def test_browse_origin_content_directory_empty_snapshot(
client, staff_user, archive_data, object_type, new_origin
@@ -606,18 +472,6 @@
assert re.search("snapshot.*is empty", resp.content.decode("utf-8"))
-def test_browse_origin_content_not_found(client, origin):
- url = reverse(
- "browse-origin-content",
- query_params={"origin_url": origin["url"], "path": "/invalid/file/path"},
- )
-
- resp = check_html_get_response(
- client, url, status_code=404, template_used="browse/content.html"
- )
- assert re.search("Directory entry.*not found", resp.content.decode("utf-8"))
-
-
def test_browse_directory_snapshot_not_found(client, mocker, origin):
mock_get_snapshot_context = mocker.patch(
"swh.web.browse.snapshot_context.get_snapshot_context"
@@ -786,20 +640,8 @@
assert_not_contains(resp, "swh:1:rev:")
-def test_origin_content_no_path(client, origin):
- url = reverse("browse-origin-content", query_params={"origin_url": origin["url"]})
-
- resp = check_html_get_response(
- client, url, status_code=400, template_used="error.html"
- )
- assert_contains(
- resp, "The path of a content must be given as query parameter.", status_code=400
- )
-
-
def test_origin_views_no_url_query_parameter(client):
for browse_context in (
- "content",
"directory",
"visits",
):
@@ -824,6 +666,18 @@
)
+@given(new_origin())
+@pytest.mark.parametrize("browse_context", ["content"])
+def test_origin_content_view_redirects(client, browse_context, new_origin):
+ query_params = {"origin_url": new_origin.url, "path": "test.txt"}
+ url = reverse(f"browse-origin-{browse_context}", query_params=query_params)
+
+ resp = check_html_get_response(client, url, status_code=301)
+ assert resp["location"] == reverse(
+ f"browse-{browse_context}", query_params=query_params
+ )
+
+
@given(new_origin())
@pytest.mark.parametrize("browse_context", ["log", "branches", "releases"])
def test_origin_view_legacy_redirects(client, browse_context, new_origin):
@@ -846,147 +700,28 @@
)
-def _origin_content_view_test_helper(
- client,
- archive_data,
- origin_info,
- origin_visit,
- snapshot_sizes,
- origin_branches,
- origin_releases,
- root_dir_sha1,
- content,
- visit_id=None,
- timestamp=None,
- snapshot_id=None,
-):
- content_path = "/".join(content["path"].split("/")[1:])
-
- if not visit_id and not snapshot_id:
- visit_id = origin_visit["visit"]
-
- query_params = {"origin_url": origin_info["url"], "path": content_path}
-
- if timestamp:
- query_params["timestamp"] = timestamp
-
- if visit_id:
- query_params["visit_id"] = visit_id
- elif snapshot_id:
- query_params["snapshot"] = snapshot_id
-
- url = reverse("browse-origin-content", query_params=query_params)
-
- resp = check_html_get_response(
- client, url, status_code=200, template_used="browse/content.html"
- )
-
- assert type(content["data"]) == str
-
- assert_contains(resp, '<code class="%s">' % content["hljs_language"])
- assert_contains(resp, escape(content["data"]))
-
- split_path = content_path.split("/")
-
- filename = split_path[-1]
- path = content_path.replace(filename, "")[:-1]
-
- path_info = gen_path_info(path)
-
- del query_params["path"]
-
- if timestamp:
- query_params["timestamp"] = format_utc_iso_date(
- parse_iso8601_date_to_utc(timestamp).isoformat(), "%Y-%m-%dT%H:%M:%SZ"
- )
-
- root_dir_url = reverse("browse-origin-directory", query_params=query_params)
-
- assert_contains(resp, '<li class="swh-path">', count=len(path_info) + 1)
-
- assert_contains(resp, '<a href="%s">%s</a>' % (root_dir_url, root_dir_sha1[:7]))
-
- for p in path_info:
- query_params["path"] = p["path"]
- dir_url = reverse("browse-origin-directory", query_params=query_params)
- assert_contains(resp, '<a href="%s">%s</a>' % (dir_url, p["name"]))
-
- assert_contains(resp, "<li>%s</li>" % filename)
-
- query_string = "sha1_git:" + content["sha1_git"]
-
- url_raw = reverse(
- "browse-content-raw",
- url_args={"query_string": query_string},
- query_params={"filename": filename},
- )
- assert_contains(resp, url_raw)
-
- if "path" in query_params:
- del query_params["path"]
-
- origin_branches_url = reverse("browse-origin-branches", query_params=query_params)
-
- assert_contains(resp, f'href="{escape(origin_branches_url)}"')
- assert_contains(resp, f"Branches ({snapshot_sizes['revision']})")
-
- origin_releases_url = reverse("browse-origin-releases", query_params=query_params)
-
- assert_contains(resp, f'href="{escape(origin_releases_url)}">')
- assert_contains(resp, f"Releases ({snapshot_sizes['release']})")
-
- assert_contains(resp, '<li class="swh-branch">', count=len(origin_branches))
-
- query_params["path"] = content_path
-
- for branch in origin_branches:
- root_dir_branch_url = reverse(
- "browse-origin-content",
- query_params={"branch": branch["name"], **query_params},
+@given(new_origin())
+def test_origin_content_view_legacy_redirects(client, new_origin):
+ url_args = [
+ {"origin_url": new_origin.url},
+ {
+ "origin_url": new_origin.url,
+ "path": "test.txt",
+ "timestamp": "2021-01-23T22:24:10Z",
+ },
+ {"origin_url": new_origin.url, "path": "test.txt"},
+ ]
+ params = {"extra-param1": "extra-param1", "extra-param2": "extra-param2"}
+ for each_arg in url_args:
+ url = reverse(
+ "browse-origin-content-legacy", url_args=each_arg, query_params=params,
)
- assert_contains(resp, '<a href="%s">' % root_dir_branch_url)
-
- assert_contains(resp, '<li class="swh-release">', count=len(origin_releases))
-
- query_params["branch"] = None
- for release in origin_releases:
- root_dir_release_url = reverse(
- "browse-origin-content",
- query_params={"release": release["name"], **query_params},
+ resp = check_html_get_response(client, url, status_code=301)
+ assert resp["location"] == reverse(
+ "browse-content", query_params={**each_arg, **params}
)
- assert_contains(resp, '<a href="%s">' % root_dir_release_url)
-
- url = reverse("browse-origin-content", query_params=query_params)
-
- resp = check_html_get_response(
- client, url, status_code=200, template_used="browse/content.html"
- )
-
- snapshot = archive_data.snapshot_get(origin_visit["snapshot"])
- head_rev_id = archive_data.snapshot_get_head(snapshot)
-
- swhid_context = {
- "origin": origin_info["url"],
- "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]),
- "anchor": gen_swhid(ObjectType.REVISION, head_rev_id),
- "path": f"/{content_path}",
- }
-
- swh_cnt_id = gen_swhid(
- ObjectType.CONTENT, content["sha1_git"], metadata=swhid_context
- )
- swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id})
- assert_contains(resp, swh_cnt_id)
- assert_contains(resp, swh_cnt_id_url)
-
- assert_contains(resp, "swh-take-new-snapshot")
-
- _check_origin_link(resp, origin_info["url"])
-
- assert_not_contains(resp, "swh-metadata-popover")
-
def _origin_directory_view_test_helper(
client,
diff --git a/swh/web/tests/browse/views/test_snapshot.py b/swh/web/tests/browse/views/test_snapshot.py
--- a/swh/web/tests/browse/views/test_snapshot.py
+++ b/swh/web/tests/browse/views/test_snapshot.py
@@ -310,3 +310,22 @@
assert_contains(resp, '<a href="%s">' % escape(browse_revision_url))
_check_origin_link(resp, origin_info["url"])
+
+
+def test_snapshot_content_redirect(client, snapshot):
+ qry = {"extra-arg": "extra"}
+ url = reverse(
+ "browse-snapshot-content", url_args={"snapshot_id": snapshot}, query_params=qry
+ )
+ resp = check_html_get_response(client, url, status_code=301)
+ assert resp.url == reverse(
+ "browse-content", query_params={**{"snapshot_id": snapshot}, **qry}
+ )
+
+
+def test_snapshot_content_legacy_redirect(client, snapshot):
+ qry = {"extra-arg": "extra"}
+ url_args = {"snapshot_id": snapshot, "path": "test.txt"}
+ url = reverse("browse-snapshot-content-legacy", url_args=url_args, query_params=qry)
+ resp = check_html_get_response(client, url, status_code=301)
+ assert resp.url == reverse("browse-content", query_params={**url_args, **qry})

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 3:19 PM (6 d, 8 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3228934

Event Timeline