diff --git a/swh/web/assets/src/bundles/webapp/readme-rendering.js b/swh/web/assets/src/bundles/webapp/readme-rendering.js --- a/swh/web/assets/src/bundles/webapp/readme-rendering.js +++ b/swh/web/assets/src/bundles/webapp/readme-rendering.js @@ -5,23 +5,8 @@ * See top-level LICENSE file for more information */ -import DOMPurify from 'dompurify'; - import {handleFetchError} from 'utils/functions'; -DOMPurify.addHook('uponSanitizeAttribute', function(node, data) { - if (node.nodeName === 'IMG' && data.attrName === 'src') { - // remove leading slash from image src to fix rendering - if (data.attrValue.startsWith('/')) { - data.attrValue = data.attrValue.slice(1); - } - } -}); - -export function filterXSS(html) { - return DOMPurify.sanitize(html); -} - export async function renderMarkdown(domElt, markdownDocUrl) { let showdown = await import(/* webpackChunkName: "showdown" */ 'utils/showdown'); @@ -33,7 +18,7 @@ .then(response => response.text()) .then(data => { $(domElt).addClass('swh-showdown'); - $(domElt).html(filterXSS(converter.makeHtml(data))); + $(domElt).html(swh.webapp.filterXSS(converter.makeHtml(data))); }) .catch(() => { $(domElt).text('Readme bytes are not available'); @@ -50,7 +35,7 @@ let orgDocument = parser.parse(orgDocData, {toc: false}); let orgHTMLDocument = orgDocument.convert(org.ConverterHTML, {}); $(domElt).addClass('swh-org'); - $(domElt).html(filterXSS(orgHTMLDocument.toString())); + $(domElt).html(swh.webapp.filterXSS(orgHTMLDocument.toString())); // remove toc and section numbers to get consistent // with other readme renderings $('.swh-org ul').first().remove(); diff --git a/swh/web/assets/src/bundles/webapp/webapp-utils.js b/swh/web/assets/src/bundles/webapp/webapp-utils.js --- a/swh/web/assets/src/bundles/webapp/webapp-utils.js +++ b/swh/web/assets/src/bundles/webapp/webapp-utils.js @@ -183,3 +183,13 @@ export function isReCaptchaActivated() { return reCaptchaActivated; } + +let browsedSwhObjectMetadata = {}; + +export function setBrowsedSwhObjectMetadata(metadata) { + browsedSwhObjectMetadata = metadata; +} + +export function getBrowsedSwhObjectMetadata() { + return browsedSwhObjectMetadata; +} diff --git a/swh/web/assets/src/bundles/webapp/xss-filtering.js b/swh/web/assets/src/bundles/webapp/xss-filtering.js new file mode 100644 --- /dev/null +++ b/swh/web/assets/src/bundles/webapp/xss-filtering.js @@ -0,0 +1,42 @@ +/** + * Copyright (C) 2019 The Software Heritage developers + * See the AUTHORS file at the top-level directory of this distribution + * License: GNU Affero General Public License version 3, or any later version + * See top-level LICENSE file for more information + */ + +import DOMPurify from 'dompurify'; + +// we register a hook when performing XSS filtering in order to +// possibly replace a relative image url with the one for getting +// the image bytes from the archive content +DOMPurify.addHook('uponSanitizeAttribute', function(node, data) { + if (node.nodeName === 'IMG' && data.attrName === 'src') { + + // image url does not need any processing here + if (data.attrValue.startsWith('data:image') || + data.attrValue.startsWith('http:') || + data.attrValue.startsWith('https:')) { + return; + } + + // get currently browsed swh object metadata + let swhObjectMetadata = swh.webapp.getBrowsedSwhObjectMetadata(); + + // the swh object is provided without any useful context + // to get the image checksums from the web api + if (!swhObjectMetadata.hasOwnProperty('directory')) { + return; + } + + // used internal endpoint as image url to possibly get the image data + // from the archive content + let url = Urls.browse_directory_resolve_content_path(swhObjectMetadata.directory, + data.attrValue); + data.attrValue = url; + } +}); + +export function filterXSS(html) { + return DOMPurify.sanitize(html); +} diff --git a/swh/web/browse/views/directory.py b/swh/web/browse/views/directory.py --- a/swh/web/browse/views/directory.py +++ b/swh/web/browse/views/directory.py @@ -3,7 +3,9 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +import os +from django.http import HttpResponse from django.shortcuts import render, redirect from django.template.defaultfilters import filesizeformat @@ -35,14 +37,6 @@ try: if path: dir_info = service.lookup_directory_with_path(sha1_git, path) - # some readme files can reference assets reachable from the - # browsed directory, handle that special case in order to - # correctly displayed them - if dir_info and dir_info['type'] == 'file': - file_raw_url = reverse( - 'browse-content-raw', - url_args={'query_string': dir_info['checksums']['sha1']}) - return redirect(file_raw_url) sha1_git = dir_info['target'] dirs, files = get_directory_entries(sha1_git) @@ -159,3 +153,25 @@ 'vault_cooking': vault_cooking, 'show_actions_menu': True, 'swh_ids': swh_ids}) + + +@browse_route(r'directory/resolve/content-path/(?P[0-9a-f]+)/(?P.+)/', # noqa + view_name='browse-directory-resolve-content-path', + checksum_args=['sha1_git']) +def _directory_resolve_content_path(request, sha1_git, path): + """ + Internal endpoint redirecting to data url for a specific file path + relative to a root directory. + """ + try: + path = os.path.normpath(path) + if not path.startswith('../'): + dir_info = service.lookup_directory_with_path(sha1_git, path) + if dir_info['type'] == 'file': + sha1 = dir_info['checksums']['sha1'] + data_url = reverse('browse-content-raw', + url_args={'query_string': sha1}) + return redirect(data_url) + except Exception: + pass + return HttpResponse(status=404) diff --git a/swh/web/browse/views/revision.py b/swh/web/browse/views/revision.py --- a/swh/web/browse/views/revision.py +++ b/swh/web/browse/views/revision.py @@ -8,7 +8,7 @@ import textwrap from django.http import HttpResponse -from django.shortcuts import render, redirect +from django.shortcuts import render from django.template.defaultfilters import filesizeformat from django.utils.html import escape from django.utils.safestring import mark_safe @@ -266,18 +266,6 @@ """ try: revision = service.lookup_revision(sha1_git) - # some readme files can reference assets reachable from the - # browsed directory, handle that special case in order to - # correctly displayed them - if extra_path: - dir_info = \ - service.lookup_directory_with_path(revision['directory'], - extra_path) - if dir_info and dir_info['type'] == 'file': - file_raw_url = reverse( - 'browse-content-raw', - url_args={'query_string': dir_info['checksums']['sha1']}) - return redirect(file_raw_url) origin_info = None snapshot_context = None origin_type = request.GET.get('origin_type', None) diff --git a/swh/web/browse/views/utils/snapshot_context.py b/swh/web/browse/views/utils/snapshot_context.py --- a/swh/web/browse/views/utils/snapshot_context.py +++ b/swh/web/browse/views/utils/snapshot_context.py @@ -8,7 +8,7 @@ # Its purpose is to factorize code for the views reachable from the # /origin/.* and /snapshot/.* endpoints. -from django.shortcuts import render, redirect +from django.shortcuts import render from django.template.defaultfilters import filesizeformat from swh.model.identifiers import snapshot_identifier @@ -241,14 +241,6 @@ sha1_git = root_sha1_git if root_sha1_git and path: dir_info = service.lookup_directory_with_path(root_sha1_git, path) - # some readme files can reference assets reachable from the - # browsed directory, handle that special case in order to - # correctly displayed them - if dir_info and dir_info['type'] == 'file': - file_raw_url = reverse( - 'browse-content-raw', - url_args={'query_string': dir_info['checksums']['sha1']}) - return redirect(file_raw_url) sha1_git = dir_info['target'] dirs = [] diff --git a/swh/web/templates/includes/show-metadata.html b/swh/web/templates/includes/show-metadata.html --- a/swh/web/templates/includes/show-metadata.html +++ b/swh/web/templates/includes/show-metadata.html @@ -29,4 +29,8 @@ - \ No newline at end of file + + +