diff --git a/swh/web/assets/src/bundles/browse/swhid-utils.js b/swh/web/assets/src/bundles/browse/swhid-utils.js --- a/swh/web/assets/src/bundles/browse/swhid-utils.js +++ b/swh/web/assets/src/bundles/browse/swhid-utils.js @@ -20,17 +20,19 @@ event.stopPropagation(); let swhIdElt = $(event.target).closest('.swhid-ui').find('.swhid'); let swhIdWithContext = $(event.target).data('swhid-with-context'); + let swhIdWithContextUrl = $(event.target).data('swhid-with-context-url'); let currentSwhId = swhIdElt.text(); if ($(event.target).prop('checked')) { currentSwhId = swhIdWithContext; + swhIdElt.attr('href', swhIdWithContextUrl); } else { const pos = currentSwhId.indexOf(';'); if (pos !== -1) { currentSwhId = currentSwhId.slice(0, pos); } + swhIdElt.attr('href', '/' + currentSwhId + '/'); } swhIdElt.text(currentSwhId); - swhIdElt.attr('href', '/' + currentSwhId + '/'); addLinesInfo(); } @@ -74,8 +76,8 @@ new ClipboardJS('.btn-swhid-url-copy', { text: trigger => { - let swhId = $(trigger).closest('.swhid-ui').find('.swhid').text(); - return window.location.origin + '/' + swhId + '/'; + let swhIdUrl = $(trigger).closest('.swhid-ui').find('.swhid').attr('href'); + return window.location.origin + swhIdUrl; } }); diff --git a/swh/web/assets/src/bundles/webapp/badges.js b/swh/web/assets/src/bundles/webapp/badges.js --- a/swh/web/assets/src/bundles/webapp/badges.js +++ b/swh/web/assets/src/bundles/webapp/badges.js @@ -14,18 +14,20 @@ } else { const pos = objectSWHID.indexOf(';'); if (pos !== -1) { - badgeImageUrl = Urls.swh_badge_swhid(objectSWHID.slice(0, pos)); + const objectSWHIDNoContext = objectSWHID.slice(0, pos); + badgeImageUrl = Urls.swh_badge_swhid(objectSWHIDNoContext); + $('.swhid').each((i, swhid) => { + if (swhid.id === objectSWHIDNoContext) { + badgeLinkUrl = swhid.pathname; + } + }); } else { badgeImageUrl = Urls.swh_badge_swhid(objectSWHID); + badgeLinkUrl = Urls.browse_swhid(objectSWHID); } - badgeLinkUrl = Urls.browse_swhid(objectSWHID); } - let urlPrefix = `${window.location.protocol}//${window.location.hostname}`; - if (window.location.port) { - urlPrefix += `:${window.location.port}`; - } - const absoluteBadgeImageUrl = `${urlPrefix}${badgeImageUrl}`; - const absoluteBadgeLinkUrl = `${urlPrefix}${badgeLinkUrl}`; + const absoluteBadgeImageUrl = `${window.location.origin}${badgeImageUrl}`; + const absoluteBadgeLinkUrl = `${window.location.origin}${badgeLinkUrl}`; const html = ` diff --git a/swh/web/common/identifiers.py b/swh/web/common/identifiers.py --- a/swh/web/common/identifiers.py +++ b/swh/web/common/identifiers.py @@ -4,7 +4,7 @@ # See top-level LICENSE file for more information from typing import Any, Dict, Iterable, List, Optional, cast -from urllib.parse import quote +from urllib.parse import quote, unquote from typing_extensions import TypedDict @@ -113,13 +113,13 @@ query_dict[k] = query_params[k] if "origin" in swhid_parsed.metadata: - query_dict["origin_url"] = swhid_parsed.metadata["origin"] + query_dict["origin_url"] = unquote(swhid_parsed.metadata["origin"]) if "anchor" in swhid_parsed.metadata: anchor_swhid_parsed = get_swhid(swhid_parsed.metadata["anchor"]) if "path" in swhid_parsed.metadata and swhid_parsed.metadata["path"] != "/": - query_dict["path"] = swhid_parsed.metadata["path"] + query_dict["path"] = unquote(swhid_parsed.metadata["path"]) if anchor_swhid_parsed: directory = "" if anchor_swhid_parsed.object_type == DIRECTORY: diff --git a/swh/web/templates/includes/show-swhids.html b/swh/web/templates/includes/show-swhids.html --- a/swh/web/templates/includes/show-swhids.html +++ b/swh/web/templates/includes/show-swhids.html @@ -73,6 +73,7 @@ diff --git a/swh/web/tests/browse/views/test_identifiers.py b/swh/web/tests/browse/views/test_identifiers.py --- a/swh/web/tests/browse/views/test_identifiers.py +++ b/swh/web/tests/browse/views/test_identifiers.py @@ -4,10 +4,11 @@ # See top-level LICENSE file for more information import random +from urllib.parse import quote from hypothesis import given -from swh.model.identifiers import CONTENT, REVISION, SNAPSHOT +from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT from swh.web.common.identifiers import gen_swhid from swh.web.common.utils import reverse from swh.web.tests.django_asserts import assert_contains @@ -20,13 +21,11 @@ snapshot, ) -swhid_prefix = "swh:1:" - @given(content()) def test_content_id_browse(client, content): cnt_sha1_git = content["sha1_git"] - swhid = swhid_prefix + "cnt:" + cnt_sha1_git + swhid = gen_swhid(CONTENT, cnt_sha1_git) url = reverse("browse-swhid", url_args={"swhid": swhid}) query_string = "sha1_git:" + cnt_sha1_git @@ -42,7 +41,7 @@ @given(directory()) def test_directory_id_browse(client, directory): - swhid = swhid_prefix + "dir:" + directory + swhid = gen_swhid(DIRECTORY, directory) url = reverse("browse-swhid", url_args={"swhid": swhid}) directory_browse_url = reverse("browse-directory", url_args={"sha1_git": directory}) @@ -55,7 +54,7 @@ @given(revision()) def test_revision_id_browse(client, revision): - swhid = swhid_prefix + "rev:" + revision + swhid = gen_swhid(REVISION, revision) url = reverse("browse-swhid", url_args={"swhid": swhid}) revision_browse_url = reverse("browse-revision", url_args={"sha1_git": revision}) @@ -80,7 +79,7 @@ @given(release()) def test_release_id_browse(client, release): - swhid = swhid_prefix + "rel:" + release + swhid = gen_swhid(RELEASE, release) url = reverse("browse-swhid", url_args={"swhid": swhid}) release_browse_url = reverse("browse-release", url_args={"sha1_git": release}) @@ -105,7 +104,7 @@ @given(snapshot()) def test_snapshot_id_browse(client, snapshot): - swhid = swhid_prefix + "snp:" + snapshot + swhid = gen_swhid(SNAPSHOT, snapshot) url = reverse("browse-swhid", url_args={"swhid": swhid}) snapshot_browse_url = reverse("browse-snapshot", url_args={"snapshot_id": snapshot}) @@ -130,7 +129,7 @@ @given(release()) def test_bad_id_browse(client, release): - swhid = swhid_prefix + "foo:" + release + swhid = f"swh:1:foo:{release}" url = reverse("browse-swhid", url_args={"swhid": swhid}) resp = client.get(url) @@ -140,15 +139,17 @@ @given(content()) def test_content_id_optional_parts_browse(client, content): cnt_sha1_git = content["sha1_git"] - optional_parts = ";lines=4-20;origin=https://github.com/user/repo" - swhid = swhid_prefix + "cnt:" + cnt_sha1_git + optional_parts + origin_url = "https://github.com/user/repo" + swhid = gen_swhid( + CONTENT, cnt_sha1_git, metadata={"lines": "4-20", "origin": origin_url}, + ) url = reverse("browse-swhid", url_args={"swhid": swhid}) query_string = "sha1_git:" + cnt_sha1_git content_browse_url = reverse( "browse-content", url_args={"query_string": query_string}, - query_params={"origin_url": "https://github.com/user/repo"}, + query_params={"origin_url": origin_url}, ) content_browse_url += "#L4-L20" @@ -198,3 +199,14 @@ ) assert_contains(resp, swhid) + + +@given(directory()) +def test_browse_swhid_special_characters_escaping(client, directory): + origin = "http://example.org/?project=abc;" + origin_swhid_escaped = quote(origin, safe="/?:@&") + origin_swhid_url_escaped = quote(origin, safe="/:@;") + swhid = gen_swhid(DIRECTORY, directory, metadata={"origin": origin_swhid_escaped}) + url = reverse("browse-swhid", url_args={"swhid": swhid}) + resp = client.get(url) + assert origin_swhid_url_escaped in resp["location"] diff --git a/swh/web/tests/common/test_identifiers.py b/swh/web/tests/common/test_identifiers.py --- a/swh/web/tests/common/test_identifiers.py +++ b/swh/web/tests/common/test_identifiers.py @@ -4,12 +4,21 @@ # See top-level LICENSE file for more information import random +from urllib.parse import quote from hypothesis import given import pytest from swh.model.hashutil import hash_to_bytes -from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT, SWHID +from swh.model.identifiers import ( + CONTENT, + DIRECTORY, + RELEASE, + REVISION, + SNAPSHOT, + SWHID, + parse_swhid, +) from swh.web.browse.snapshot_context import get_snapshot_context from swh.web.common.exc import BadInputExc from swh.web.common.identifiers import ( @@ -393,19 +402,30 @@ @given(origin(), directory()) -def test_get_swhids_info_path_encoding(archive_data, origin, directory): +def test_get_swhids_info_characters_and_url_escaping(archive_data, origin, directory): snapshot_context = get_snapshot_context(origin_url=origin["url"]) snapshot_context["origin_info"]["url"] = "http://example.org/?project=abc;def%" path = "/foo;/bar%" - swhid = get_swhids_info( + swhid_info = get_swhids_info( [SWHObjectInfo(object_type=DIRECTORY, object_id=directory)], snapshot_context=snapshot_context, extra_context={"path": path}, )[0] - assert swhid["context"]["origin"] == "http://example.org/?project%3Dabc%3Bdef%25" - assert swhid["context"]["path"] == "/foo%3B/bar%25" + # check special characters in SWHID have been escaped + assert ( + swhid_info["context"]["origin"] == "http://example.org/?project%3Dabc%3Bdef%25" + ) + assert swhid_info["context"]["path"] == "/foo%3B/bar%25" + + # check special characters in SWHID URL have been escaped + parsed_url_swhid = parse_swhid(swhid_info["swhid_with_context_url"][1:-1]) + assert ( + parsed_url_swhid.metadata["origin"] + == "http://example.org/%3Fproject%253Dabc%253Bdef%2525" + ) + assert parsed_url_swhid.metadata["path"] == "/foo%253B/bar%2525" @given(origin_with_multiple_visits()) @@ -574,3 +594,14 @@ expected_url += f"-L{lines_number[1]}" assert obj_swhid_resolved["browse_url"] == expected_url + + +@given(directory()) +def test_resolve_swhid_with_escaped_chars(directory): + origin = "http://example.org/?project=abc;" + origin_swhid_escaped = quote(origin, safe="/?:@&") + origin_swhid_url_escaped = quote(origin, safe="/:@;") + swhid = gen_swhid(DIRECTORY, directory, metadata={"origin": origin_swhid_escaped}) + resolved_swhid = resolve_swhid(swhid) + assert resolved_swhid["swhid_parsed"].metadata["origin"] == origin_swhid_escaped + assert origin_swhid_url_escaped in resolved_swhid["browse_url"]