Page MenuHomeSoftware Heritage

D2345.id8077.diff
No OneTemporary

D2345.id8077.diff

diff --git a/swh/web/browse/utils.py b/swh/web/browse/utils.py
--- a/swh/web/browse/utils.py
+++ b/swh/web/browse/utils.py
@@ -129,19 +129,19 @@
elif mimetype.startswith('application/octet-stream'):
# file may detect a text content as binary
# so try to decode it for display
- encodings = ['us-ascii']
+ encodings = ['us-ascii', 'utf-8']
encodings += ['iso-8859-%s' % i for i in range(1, 17)]
- for encoding in encodings:
+ for enc in encodings:
try:
- content_data = content_data.decode(encoding)\
- .encode('utf-8')
+ content_data = content_data.decode(enc).encode('utf-8')
except Exception:
pass
else:
# ensure display in content view
+ encoding = enc
mimetype = 'text/plain'
break
- return mimetype, content_data
+ return mimetype, encoding, content_data
def request_content(query_string, max_size=content_display_max_size,
@@ -215,7 +215,7 @@
get_mimetype_and_encoding_for_content(content_data['raw_data']) # noqa
if re_encode:
- mimetype, raw_data = _re_encode_content(
+ mimetype, encoding, raw_data = _re_encode_content(
mimetype, encoding, content_data['raw_data'])
content_data['raw_data'] = raw_data
diff --git a/swh/web/templates/includes/content-display.html b/swh/web/templates/includes/content-display.html
--- a/swh/web/templates/includes/content-display.html
+++ b/swh/web/templates/includes/content-display.html
@@ -40,7 +40,8 @@
<canvas id="pdf-canvas"></canvas>
</div>
{% elif content %}
- Content with mime type {{ swh_object_metadata.mimetype }} can not be displayed.
+ Content with mime type {{ swh_object_metadata.mimetype }} and encoding
+ {{ swh_object_metadata.encoding }} cannot be displayed.
{% else %}
{% include "includes/http-error.html" %}
{% endif %}
diff --git a/swh/web/tests/browse/views/test_content.py b/swh/web/tests/browse/views/test_content.py
--- a/swh/web/tests/browse/views/test_content.py
+++ b/swh/web/tests/browse/views/test_content.py
@@ -19,7 +19,8 @@
)
from swh.web.tests.strategies import (
content, content_text_non_utf8, content_text_no_highlight,
- content_image_type, content_text, invalid_sha1, unknown_content
+ content_image_type, content_text, invalid_sha1, unknown_content,
+ content_utf8_detected_as_binary
)
@@ -356,14 +357,26 @@
assert resp['location'] == redirect_url
+@given(content_utf8_detected_as_binary())
+def test_content_utf8_detected_as_binary_display(client, archive_data,
+ content):
+ url = reverse('browse-content',
+ url_args={'query_string': content['sha1']})
+ resp = client.get(url)
+
+ content_display = _process_content_for_display(archive_data, content)
+
+ assert_contains(resp, escape(content_display['content_data']))
+
+
def _process_content_for_display(archive_data, content):
content_data = archive_data.content_get(content['sha1'])
mime_type, encoding = get_mimetype_and_encoding_for_content(
content_data['data'])
- mime_type, content_data = _re_encode_content(mime_type, encoding,
- content_data['data'])
+ mime_type, encoding, content_data = _re_encode_content(
+ mime_type, encoding, content_data['data'])
return prepare_content_for_display(content_data, mime_type,
content['path'])
diff --git a/swh/web/tests/strategies.py b/swh/web/tests/strategies.py
--- a/swh/web/tests/strategies.py
+++ b/swh/web/tests/strategies.py
@@ -123,6 +123,24 @@
return content().filter(lambda c: c['mimetype'].startswith('image/'))
+def content_utf8_detected_as_binary():
+ """
+ Hypothesis strategy returning random textual contents detected as binary
+ by libmagic while they are valid UTF-8 encoded files.
+ """
+ def utf8_binary_detected(content):
+ if content['encoding'] != 'binary':
+ return False
+ try:
+ content['data'].decode('utf-8')
+ except Exception:
+ return False
+ else:
+ return True
+
+ return content().filter(utf8_binary_detected)
+
+
@composite
def new_content(draw):
blake2s256_hex = draw(sha256())

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 30, 2:32 PM (7 h, 45 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217574

Event Timeline