diff --git a/cypress/integration/errors.spec.js b/cypress/integration/errors.spec.js --- a/cypress/integration/errors.spec.js +++ b/cypress/integration/errors.spec.js @@ -98,7 +98,7 @@ urlShouldShowError(subDir, { code: '404', msg: 'NotFoundExc: Directory entry with path ' + - origin.invalidSubDir + ' from ' + + origin.invalidSubDir + ' from root directory ' + origin.rootDirectory + ' not found' }); }); diff --git a/swh/web/browse/snapshot_context.py b/swh/web/browse/snapshot_context.py --- a/swh/web/browse/snapshot_context.py +++ b/swh/web/browse/snapshot_context.py @@ -31,7 +31,7 @@ request_content, ) from swh.web.common import archive, highlightjs -from swh.web.common.exc import BadInputExc, NotFoundExc +from swh.web.common.exc import BadInputExc, NotFoundExc, http_status_code_message from swh.web.common.identifiers import get_swhids_info from swh.web.common.origin_visits import get_origin_visit from swh.web.common.typing import ( @@ -701,9 +701,18 @@ root_directory = snapshot_context["root_directory"] sha1_git = root_directory + error_info = { + "status_code": 200, + "description": None, + } if root_directory and path: - dir_info = archive.lookup_directory_with_path(root_directory, path) - sha1_git = dir_info["target"] + try: + dir_info = archive.lookup_directory_with_path(root_directory, path) + sha1_git = dir_info["target"] + except NotFoundExc as e: + sha1_git = None + error_info["status_code"] = 404 + error_info["description"] = f"NotFoundExc: {str(e)}" dirs = [] files = [] @@ -875,7 +884,11 @@ "vault_cooking": vault_cooking, "show_actions": True, "swhids_info": swhids_info, + "error_code": error_info["status_code"], + "error_message": http_status_code_message.get(error_info["status_code"]), + "error_description": error_info["description"], }, + status=error_info["status_code"], ) @@ -915,17 +928,25 @@ split_path = path.split("/") filename = split_path[-1] filepath = path[: -len(filename)] + error_info = { + "status_code": 200, + "description": None, + } if root_directory: - content_info = archive.lookup_directory_with_path(root_directory, path) - sha1_git = content_info["target"] - query_string = "sha1_git:" + sha1_git - content_data = request_content(query_string, raise_if_unavailable=False) - - if filepath: - dir_info = archive.lookup_directory_with_path(root_directory, filepath) - directory_id = dir_info["target"] - else: - directory_id = root_directory + try: + content_info = archive.lookup_directory_with_path(root_directory, path) + sha1_git = content_info["target"] + query_string = "sha1_git:" + sha1_git + content_data = request_content(query_string) + + if filepath: + dir_info = archive.lookup_directory_with_path(root_directory, filepath) + directory_id = dir_info["target"] + else: + directory_id = root_directory + except NotFoundExc as e: + error_info["status_code"] = 404 + error_info["description"] = f"NotFoundExc: {str(e)}" revision_id = snapshot_context["revision_id"] origin_info = snapshot_context["origin_info"] @@ -1055,11 +1076,11 @@ "vault_cooking": None, "show_actions": True, "swhids_info": swhids_info, - "error_code": content_data.get("error_code"), - "error_message": content_data.get("error_message"), - "error_description": content_data.get("error_description"), + "error_code": error_info["status_code"], + "error_message": http_status_code_message.get(error_info["status_code"]), + "error_description": error_info["description"], }, - status=content_data.get("error_code", 200), + status=error_info["status_code"], ) diff --git a/swh/web/browse/utils.py b/swh/web/browse/utils.py --- a/swh/web/browse/utils.py +++ b/swh/web/browse/utils.py @@ -16,7 +16,7 @@ from django.utils.safestring import mark_safe from swh.web.common import archive, highlightjs -from swh.web.common.exc import http_status_code_message +from swh.web.common.exc import NotFoundExc from swh.web.common.utils import ( browsers_supported_image_mimes, format_utc_iso_date, @@ -123,10 +123,7 @@ def request_content( - query_string, - max_size=content_display_max_size, - raise_if_unavailable=True, - re_encode=True, + query_string, max_size=content_display_max_size, re_encode=True, ): """Function that retrieves a content from the archive. @@ -170,27 +167,15 @@ if mimetype.startswith("\\"): filetype = None - content_data["error_code"] = 200 - content_data["error_message"] = "" - content_data["error_description"] = "" - if not max_size or content_data["length"] < max_size: try: content_raw = archive.lookup_content_raw(query_string) except Exception as exc: - if raise_if_unavailable: - raise exc - else: - sentry_sdk.capture_exception(exc) - content_data["raw_data"] = None - content_data["error_code"] = 404 - content_data["error_description"] = ( - "The bytes of the content are currently not available " - "in the archive." - ) - content_data["error_message"] = http_status_code_message[ - content_data["error_code"] - ] + sentry_sdk.capture_exception(exc) + raise NotFoundExc( + "The bytes of the content are currently not available " + "in the archive." + ) else: content_data["raw_data"] = content_raw["data"] diff --git a/swh/web/browse/views/content.py b/swh/web/browse/views/content.py --- a/swh/web/browse/views/content.py +++ b/swh/web/browse/views/content.py @@ -24,7 +24,7 @@ request_content, ) from swh.web.common import archive, highlightjs, query -from swh.web.common.exc import NotFoundExc +from swh.web.common.exc import NotFoundExc, http_status_code_message from swh.web.common.identifiers import get_swhids_info from swh.web.common.typing import ContentMetadata, SWHObjectInfo from swh.web.common.utils import gen_path_info, reverse, swh_object_icons @@ -186,13 +186,20 @@ """ algo, checksum = query.parse_hash(query_string) checksum = hash_to_hex(checksum) - content_data = request_content(query_string, raise_if_unavailable=False) origin_url = request.GET.get("origin_url") selected_language = request.GET.get("language") if not origin_url: origin_url = request.GET.get("origin") snapshot_id = request.GET.get("snapshot") path = request.GET.get("path") + content_data = {} + error_info = {"status_code": 200, "description": None} + try: + content_data = request_content(query_string) + except NotFoundExc as e: + error_info["status_code"] = 404 + error_info["description"] = f"NotFoundExc: {str(e)}" + snapshot_context = None if origin_url is not None or snapshot_id is not None: try: @@ -225,7 +232,7 @@ content = None language = None mimetype = None - if content_data["raw_data"] is not None: + if content_data.get("raw_data") is not None: content_display_data = prepare_content_for_display( content_data["raw_data"], content_data["mimetype"], path ) @@ -293,12 +300,9 @@ query_params = {"filename": filename} - content_checksums = content_data["checksums"] + content_checksums = content_data.get("checksums", {}) - content_url = reverse( - "browse-content", - url_args={"query_string": f'sha1_git:{content_checksums["sha1_git"]}'}, - ) + content_url = reverse("browse-content", url_args={"query_string": query_string},) content_raw_url = reverse( "browse-content-raw", @@ -308,16 +312,16 @@ content_metadata = ContentMetadata( object_type=CONTENT, - object_id=content_checksums["sha1_git"], - sha1=content_checksums["sha1"], - sha1_git=content_checksums["sha1_git"], - sha256=content_checksums["sha256"], - blake2s256=content_checksums["blake2s256"], + object_id=content_checksums.get("sha1_git"), + sha1=content_checksums.get("sha1"), + sha1_git=content_checksums.get("sha1_git"), + sha256=content_checksums.get("sha256"), + blake2s256=content_checksums.get("blake2s256"), content_url=content_url, - mimetype=content_data["mimetype"], - encoding=content_data["encoding"], - size=filesizeformat(content_data["length"]), - language=content_data["language"], + mimetype=content_data.get("mimetype"), + encoding=content_data.get("encoding"), + size=filesizeformat(content_data.get("length", 0)), + language=content_data.get("language"), root_directory=root_dir, path=f"/{path}" if path else None, filename=filename or "", @@ -330,7 +334,7 @@ ) swh_objects = [ - SWHObjectInfo(object_type=CONTENT, object_id=content_checksums["sha1_git"]) + SWHObjectInfo(object_type=CONTENT, object_id=content_checksums.get("sha1_git")) ] if directory_id: @@ -358,7 +362,7 @@ swh_objects, snapshot_context, extra_context=content_metadata, ) - heading = "Content - %s" % content_checksums["sha1_git"] + heading = "Content - %s" % content_checksums.get("sha1_git") if breadcrumbs: content_path = "/".join([bc["name"] for bc in breadcrumbs]) heading += " - %s" % content_path @@ -372,10 +376,10 @@ "swh_object_name": "Content", "swh_object_metadata": content_metadata, "content": content, - "content_size": content_data["length"], + "content_size": content_data.get("length"), "max_content_size": content_display_max_size, "filename": filename, - "encoding": content_data["encoding"], + "encoding": content_data.get("encoding"), "mimetype": mimetype, "language": language, "available_languages": available_languages, @@ -389,9 +393,9 @@ "vault_cooking": None, "show_actions": True, "swhids_info": swhids_info, - "error_code": content_data["error_code"], - "error_message": content_data["error_message"], - "error_description": content_data["error_description"], + "error_code": error_info["status_code"], + "error_message": http_status_code_message.get(error_info["status_code"]), + "error_description": error_info["description"], }, - status=content_data["error_code"], + status=error_info["status_code"], ) diff --git a/swh/web/browse/views/directory.py b/swh/web/browse/views/directory.py --- a/swh/web/browse/views/directory.py +++ b/swh/web/browse/views/directory.py @@ -16,7 +16,7 @@ from swh.web.browse.snapshot_context import get_snapshot_context from swh.web.browse.utils import gen_link, get_directory_entries, get_readme_to_display from swh.web.common import archive -from swh.web.common.exc import NotFoundExc +from swh.web.common.exc import NotFoundExc, http_status_code_message from swh.web.common.identifiers import get_swhids_info from swh.web.common.typing import DirectoryMetadata, SWHObjectInfo from swh.web.common.utils import gen_path_info, reverse, swh_object_icons @@ -24,11 +24,19 @@ def _directory_browse(request, sha1_git, path=None): root_sha1_git = sha1_git + error_info = {"status_code": 200, "description": None} if path: - dir_info = archive.lookup_directory_with_path(sha1_git, path) - sha1_git = dir_info["target"] + try: + dir_info = archive.lookup_directory_with_path(sha1_git, path) + sha1_git = dir_info["target"] + except NotFoundExc as e: + error_info["status_code"] = 404 + error_info["description"] = f"NotFoundExc: {str(e)}" + sha1_git = None - dirs, files = get_directory_entries(sha1_git) + dirs, files = [], [] + if sha1_git is not None: + dirs, files = get_directory_entries(sha1_git) origin_url = request.GET.get("origin_url") if not origin_url: origin_url = request.GET.get("origin") @@ -211,7 +219,11 @@ "vault_cooking": vault_cooking, "show_actions": True, "swhids_info": swhids_info, + "error_code": error_info["status_code"], + "error_message": http_status_code_message.get(error_info["status_code"]), + "error_description": error_info["description"], }, + status=error_info["status_code"], ) diff --git a/swh/web/browse/views/revision.py b/swh/web/browse/views/revision.py --- a/swh/web/browse/views/revision.py +++ b/swh/web/browse/views/revision.py @@ -31,7 +31,7 @@ request_content, ) from swh.web.common import archive -from swh.web.common.exc import NotFoundExc +from swh.web.common.exc import NotFoundExc, http_status_code_message from swh.web.common.identifiers import get_swhids_info from swh.web.common.typing import RevisionMetadata, SWHObjectInfo from swh.web.common.utils import ( @@ -316,7 +316,7 @@ snapshot_id = request.GET.get("snapshot") path = request.GET.get("path") dir_id = None - dirs, files = None, None + dirs, files = [], [] content_data = {} if origin_url: try: @@ -328,6 +328,7 @@ branch_name=request.GET.get("branch"), release_name=request.GET.get("release"), revision_id=sha1_git, + path=path, ) except NotFoundExc as e: raw_rev_url = reverse("browse-revision", url_args={"sha1_git": sha1_git}) @@ -349,13 +350,19 @@ elif snapshot_id: snapshot_context = get_snapshot_context(snapshot_id) + error_info = {"status_code": 200, "description": None} + if path: - file_info = archive.lookup_directory_with_path(revision["directory"], path) - if file_info["type"] == "dir": - dir_id = file_info["target"] - else: - query_string = "sha1_git:" + file_info["target"] - content_data = request_content(query_string, raise_if_unavailable=False) + try: + file_info = archive.lookup_directory_with_path(revision["directory"], path) + if file_info["type"] == "dir": + dir_id = file_info["target"] + else: + query_string = "sha1_git:" + file_info["target"] + content_data = request_content(query_string) + except NotFoundExc as e: + error_info["status_code"] = 404 + error_info["description"] = f"NotFoundExc: {str(e)}" else: dir_id = revision["directory"] @@ -449,9 +456,6 @@ readme_url = None readme_html = None readmes = {} - error_code = 200 - error_message = "" - error_description = "" extra_context = dict(revision_metadata) extra_context["path"] = f"/{path}" if path else None @@ -487,10 +491,6 @@ swh_objects.append( SWHObjectInfo(object_type=CONTENT, object_id=file_info["target"]) ) - - error_code = content_data["error_code"] - error_message = content_data["error_message"] - error_description = content_data["error_description"] else: for d in dirs: if d["type"] == "rev": @@ -580,9 +580,9 @@ "diff_revision_url": diff_revision_url, "show_actions": True, "swhids_info": swhids_info, - "error_code": error_code, - "error_message": error_message, - "error_description": error_description, + "error_code": error_info["status_code"], + "error_message": http_status_code_message.get(error_info["status_code"]), + "error_description": error_info["description"], }, - status=error_code, + status=error_info["status_code"], ) diff --git a/swh/web/common/archive.py b/swh/web/common/archive.py --- a/swh/web/common/archive.py +++ b/swh/web/common/archive.py @@ -418,15 +418,18 @@ return map(converters.from_directory_entry, directory_entries) -def lookup_directory_with_path(sha1_git, path_string): - """Return directory information for entry with path path_string w.r.t. - root directory pointed by directory_sha1_git +def lookup_directory_with_path(sha1_git: str, path: str) -> Dict[str, Any]: + """Return directory information for entry with specified path w.r.t. + root directory pointed by sha1_git Args: - - directory_sha1_git: sha1_git corresponding to the directory - to which we append paths to (hopefully) find the entry - - the relative path to the entry starting from the directory pointed by - directory_sha1_git + sha1_git: sha1_git corresponding to the directory to which we + append paths to (hopefully) find the entry + path: the relative path to the entry starting from the root + directory pointed by sha1_git + + Returns: + Directory entry information as dict. Raises: NotFoundExc if the directory entry is not found @@ -435,14 +438,14 @@ _check_directory_exists(sha1_git, sha1_git_bin) - paths = path_string.strip(os.path.sep).split(os.path.sep) + paths = path.strip(os.path.sep).split(os.path.sep) queried_dir = storage.directory_entry_get_by_path( - sha1_git_bin, list(map(lambda p: p.encode("utf-8"), paths)) + sha1_git_bin, [p.encode("utf-8") for p in paths] ) if not queried_dir: raise NotFoundExc( - ("Directory entry with path %s from %s not found") % (path_string, sha1_git) + f"Directory entry with path {path} from root directory {sha1_git} not found" ) return converters.from_directory_entry(queried_dir) diff --git a/swh/web/templates/includes/directory-display.html b/swh/web/templates/includes/directory-display.html --- a/swh/web/templates/includes/directory-display.html +++ b/swh/web/templates/includes/directory-display.html @@ -58,6 +58,8 @@ Revision {{ swh_object_metadata.revision }} could not be found in the archive.
Its associated directory can not be displayed. +{% elif error_code != 200 %} + {% include "includes/http-error.html" %} {% elif dirs|length == 0 and files|length == 0 %} Directory is empty {% endif %} diff --git a/swh/web/tests/api/views/test_directory.py b/swh/web/tests/api/views/test_directory.py --- a/swh/web/tests/api/views/test_directory.py +++ b/swh/web/tests/api/views/test_directory.py @@ -58,11 +58,12 @@ path = "some/path/to/nonexistent/dir/" url = reverse("api-1-directory", url_args={"sha1_git": directory, "path": path}) rv = check_api_get_responses(api_client, url, status_code=404) + reason = ( + f"Directory entry with path {path} from root directory {directory} not found" + ) assert rv.data == { "exception": "NotFoundExc", - "reason": ( - "Directory entry with path %s from %s not found" % (path, directory) - ), + "reason": reason, } diff --git a/swh/web/tests/browse/views/test_content.py b/swh/web/tests/browse/views/test_content.py --- a/swh/web/tests/browse/views/test_content.py +++ b/swh/web/tests/browse/views/test_content.py @@ -316,7 +316,9 @@ check_html_get_response(client, url, status_code=400, template_used="error.html") url = reverse("browse-content", url_args={"query_string": unknown_content["sha1"]}) - check_html_get_response(client, url, status_code=404, template_used="error.html") + check_html_get_response( + client, url, status_code=404, template_used="browse/content.html" + ) @given(content()) diff --git a/swh/web/tests/browse/views/test_directory.py b/swh/web/tests/browse/views/test_directory.py --- a/swh/web/tests/browse/views/test_directory.py +++ b/swh/web/tests/browse/views/test_directory.py @@ -153,6 +153,24 @@ ) +@given(directory()) +def test_directory_with_invalid_path(client, directory): + path = "foo/bar" + dir_url = reverse( + "browse-directory", + url_args={"sha1_git": directory}, + query_params={"path": path}, + ) + + resp = check_html_get_response( + client, dir_url, status_code=404, template_used="browse/directory.html" + ) + error_message = ( + f"Directory entry with path {path} from root directory {directory} not found" + ) + assert_contains(resp, error_message, status_code=404) + + @given(directory()) def test_directory_uppercase(client, directory): url = reverse( diff --git a/swh/web/tests/browse/views/test_origin.py b/swh/web/tests/browse/views/test_origin.py --- a/swh/web/tests/browse/views/test_origin.py +++ b/swh/web/tests/browse/views/test_origin.py @@ -572,7 +572,7 @@ ) resp = check_html_get_response( - client, url, status_code=404, template_used="error.html" + client, url, status_code=404, template_used="browse/directory.html" ) assert re.search("Directory.*not found", resp.content.decode("utf-8")) @@ -653,7 +653,7 @@ ) resp = check_html_get_response( - client, url, status_code=404, template_used="error.html" + client, url, status_code=404, template_used="browse/content.html" ) assert re.search("Directory entry.*not found", resp.content.decode("utf-8")) diff --git a/swh/web/tests/browse/views/test_revision.py b/swh/web/tests/browse/views/test_revision.py --- a/swh/web/tests/browse/views/test_revision.py +++ b/swh/web/tests/browse/views/test_revision.py @@ -291,3 +291,21 @@ swh_dir_id_url = reverse("browse-swhid", url_args={"swhid": swh_dir_id}) assert_contains(resp, swh_dir_id) assert_contains(resp, swh_dir_id_url) + + +@given(revision()) +def test_revision_invalid_path(client, archive_data, revision): + path = "foo/bar" + url = reverse( + "browse-revision", url_args={"sha1_git": revision}, query_params={"path": path} + ) + + resp = check_html_get_response( + client, url, status_code=404, template_used="browse/revision.html" + ) + + directory = archive_data.revision_get(revision)["directory"] + error_message = ( + f"Directory entry with path {path} from root directory {directory} not found" + ) + assert_contains(resp, error_message, status_code=404) diff --git a/swh/web/tests/common/test_archive.py b/swh/web/tests/common/test_archive.py --- a/swh/web/tests/common/test_archive.py +++ b/swh/web/tests/common/test_archive.py @@ -274,7 +274,9 @@ path = "some/invalid/path/here" with pytest.raises(NotFoundExc) as e: archive.lookup_directory_with_path(directory, path) - assert e.match("Directory entry with path %s from %s not found" % (path, directory)) + assert e.match( + f"Directory entry with path {path} from root directory {directory} not found" + ) @given(directory())