diff --git a/swh/web/browse/views/iframe.py b/swh/web/browse/views/iframe.py --- a/swh/web/browse/views/iframe.py +++ b/swh/web/browse/views/iframe.py @@ -4,6 +4,7 @@ # See top-level LICENSE file for more information from typing import Any, Dict, List, Optional, Tuple +from urllib.parse import quote from django.shortcuts import render from django.urls import re_path as url @@ -281,7 +282,9 @@ ) ) - archive_link = reverse("browse-swhid", url_args={"swhid": swhid}) + archive_link = reverse( + "browse-swhid", url_args={"swhid": quote(swhid, safe=":;=/")} + ) if ( parsed_swhid.origin is None and parsed_swhid.visit is None @@ -293,15 +296,17 @@ root_dir_swhid = CoreSWHID( object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(root_dir) ) - archive_swhid = QualifiedSWHID( - object_type=parsed_swhid.object_type, - object_id=parsed_swhid.object_id, - path=parsed_swhid.path, - anchor=root_dir_swhid, + archive_swhid = str( + QualifiedSWHID( + object_type=parsed_swhid.object_type, + object_id=parsed_swhid.object_id, + path=parsed_swhid.path, + anchor=root_dir_swhid, + ) ) archive_link = reverse( "browse-swhid", - url_args={"swhid": f"{archive_swhid}"}, + url_args={"swhid": quote(archive_swhid, safe=":;=/")}, ) except BadInputExc as e: diff --git a/swh/web/utils/identifiers.py b/swh/web/utils/identifiers.py --- a/swh/web/utils/identifiers.py +++ b/swh/web/utils/identifiers.py @@ -361,7 +361,8 @@ object_type, object_id, metadata=swhid_context ) swhid_with_context_url = reverse( - "browse-swhid", url_args={"swhid": swhid_with_context} + "browse-swhid", + url_args={"swhid": quote(swhid_with_context, safe=":;=/")}, ) swhids_info.append( diff --git a/swh/web/utils/tests/test_identifiers.py b/swh/web/utils/tests/test_identifiers.py --- a/swh/web/utils/tests/test_identifiers.py +++ b/swh/web/utils/tests/test_identifiers.py @@ -4,7 +4,7 @@ # See top-level LICENSE file for more information import random -from urllib.parse import quote +from urllib.parse import quote, unquote, urlparse import pytest @@ -458,21 +458,26 @@ extra_context={"path": path}, )[0] - # check special characters in SWHID have been escaped + # check special characters in SWHID have been percent escaped assert ( swhid_info["context"]["origin"] == "http://example.org/?project%3Dabc%3Bdef%25" ) assert swhid_info["context"]["path"] == "/foo%3B/bar%25" # check special characters in SWHID URL have been escaped - parsed_url_swhid = QualifiedSWHID.from_string( - swhid_info["swhid_with_context_url"][1:] + parsed_swhid_url = urlparse(swhid_info["swhid_with_context_url"]) + assert ( + "origin=http://example.org/%253Fproject%25253Dabc%25253Bdef%252525;" + in parsed_swhid_url.path ) + assert "path=/foo%25253B/bar%252525" in parsed_swhid_url.path + + # check that by double unquoting SWHID URL path, we get back on the SWHID value + # first unquoting is done by HTTP server, second unquoting by the SWHID parser + # when processing origin and path qualifiers assert ( - parsed_url_swhid.qualifiers()["origin"] - == "http://example.org/%3Fproject%253Dabc%253Bdef%2525" + unquote(unquote(parsed_swhid_url.path[1:])) == swhid_info["swhid_with_context"] ) - assert parsed_url_swhid.qualifiers()["path"] == "/foo%253B/bar%2525" def test_resolve_swhids_snapshot_context(