Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7163742
D2345.id8077.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D2345.id8077.diff
View Options
diff --git a/swh/web/browse/utils.py b/swh/web/browse/utils.py
--- a/swh/web/browse/utils.py
+++ b/swh/web/browse/utils.py
@@ -129,19 +129,19 @@
elif mimetype.startswith('application/octet-stream'):
# file may detect a text content as binary
# so try to decode it for display
- encodings = ['us-ascii']
+ encodings = ['us-ascii', 'utf-8']
encodings += ['iso-8859-%s' % i for i in range(1, 17)]
- for encoding in encodings:
+ for enc in encodings:
try:
- content_data = content_data.decode(encoding)\
- .encode('utf-8')
+ content_data = content_data.decode(enc).encode('utf-8')
except Exception:
pass
else:
# ensure display in content view
+ encoding = enc
mimetype = 'text/plain'
break
- return mimetype, content_data
+ return mimetype, encoding, content_data
def request_content(query_string, max_size=content_display_max_size,
@@ -215,7 +215,7 @@
get_mimetype_and_encoding_for_content(content_data['raw_data']) # noqa
if re_encode:
- mimetype, raw_data = _re_encode_content(
+ mimetype, encoding, raw_data = _re_encode_content(
mimetype, encoding, content_data['raw_data'])
content_data['raw_data'] = raw_data
diff --git a/swh/web/templates/includes/content-display.html b/swh/web/templates/includes/content-display.html
--- a/swh/web/templates/includes/content-display.html
+++ b/swh/web/templates/includes/content-display.html
@@ -40,7 +40,8 @@
<canvas id="pdf-canvas"></canvas>
</div>
{% elif content %}
- Content with mime type {{ swh_object_metadata.mimetype }} can not be displayed.
+ Content with mime type {{ swh_object_metadata.mimetype }} and encoding
+ {{ swh_object_metadata.encoding }} cannot be displayed.
{% else %}
{% include "includes/http-error.html" %}
{% endif %}
diff --git a/swh/web/tests/browse/views/test_content.py b/swh/web/tests/browse/views/test_content.py
--- a/swh/web/tests/browse/views/test_content.py
+++ b/swh/web/tests/browse/views/test_content.py
@@ -19,7 +19,8 @@
)
from swh.web.tests.strategies import (
content, content_text_non_utf8, content_text_no_highlight,
- content_image_type, content_text, invalid_sha1, unknown_content
+ content_image_type, content_text, invalid_sha1, unknown_content,
+ content_utf8_detected_as_binary
)
@@ -356,14 +357,26 @@
assert resp['location'] == redirect_url
+@given(content_utf8_detected_as_binary())
+def test_content_utf8_detected_as_binary_display(client, archive_data,
+ content):
+ url = reverse('browse-content',
+ url_args={'query_string': content['sha1']})
+ resp = client.get(url)
+
+ content_display = _process_content_for_display(archive_data, content)
+
+ assert_contains(resp, escape(content_display['content_data']))
+
+
def _process_content_for_display(archive_data, content):
content_data = archive_data.content_get(content['sha1'])
mime_type, encoding = get_mimetype_and_encoding_for_content(
content_data['data'])
- mime_type, content_data = _re_encode_content(mime_type, encoding,
- content_data['data'])
+ mime_type, encoding, content_data = _re_encode_content(
+ mime_type, encoding, content_data['data'])
return prepare_content_for_display(content_data, mime_type,
content['path'])
diff --git a/swh/web/tests/strategies.py b/swh/web/tests/strategies.py
--- a/swh/web/tests/strategies.py
+++ b/swh/web/tests/strategies.py
@@ -123,6 +123,24 @@
return content().filter(lambda c: c['mimetype'].startswith('image/'))
+def content_utf8_detected_as_binary():
+ """
+ Hypothesis strategy returning random textual contents detected as binary
+ by libmagic while they are valid UTF-8 encoded files.
+ """
+ def utf8_binary_detected(content):
+ if content['encoding'] != 'binary':
+ return False
+ try:
+ content['data'].decode('utf-8')
+ except Exception:
+ return False
+ else:
+ return True
+
+ return content().filter(utf8_binary_detected)
+
+
@composite
def new_content(draw):
blake2s256_hex = draw(sha256())
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jan 30, 2:32 PM (7 h, 45 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217574
Attached To
D2345: browse: Ensure display of valid utf-8 contents detected as binary
Event Timeline
Log In to Comment