diff --git a/swh/web/ui/converters.py b/swh/web/ui/converters.py
--- a/swh/web/ui/converters.py
+++ b/swh/web/ui/converters.py
@@ -6,6 +6,7 @@
import datetime
from swh.core import hashutil
+from swh.core.utils import decode_with_escape
from swh.web.ui import utils
@@ -86,7 +87,14 @@
elif key in hashess:
new_dict[key] = utils.fmap(convert_hashes_bytes, value)
elif key in bytess:
- new_dict[key] = utils.fmap(convert_bytes, value)
+ try:
+ new_dict[key] = utils.fmap(convert_bytes, value)
+ except UnicodeDecodeError:
+ if 'decoding_failures' not in new_dict:
+ new_dict['decoding_failures'] = [key]
+ else:
+ new_dict['decoding_failures'].append(key)
+ new_dict[key] = utils.fmap(decode_with_escape, value)
elif key in convert:
new_dict[key] = convert_fn(value)
else:
diff --git a/swh/web/ui/templates/content.html b/swh/web/ui/templates/content.html
--- a/swh/web/ui/templates/content.html
+++ b/swh/web/ui/templates/content.html
@@ -24,5 +24,10 @@
{% for key in release.keys() %}
- {% if key not in ['author', 'target_url', 'message', 'target', 'target_type'] and release[key] is not none %}
+ {% if key not in ['author', 'target_url', 'message', 'target', 'target_type', 'decoding_failures'] and release[key] is not none %}
{{ key }}
{{ release[key] }}
@@ -19,7 +19,10 @@
{% if release['author'] is not none %}
author
-
{{ release['author']['name'] }} - {{ release['author']['email'] }}
+
+ {{ release['author']['name'] }} - {{ release['author']['email'] }}
+ {% if 'decoding_failures' in release['author'] %}(some decoding errors){% endif %}
+
{% endif %}
@@ -29,6 +32,11 @@
{% endif %}
+ {% if 'decoding_failures' in release %}
+
Date
@@ -60,7 +65,12 @@
{% if revision['committer'] is not none %}
Committer Date
@@ -86,14 +96,13 @@
{% endif %}
{% for key in revision.keys() %}
- {% if key in ['type', 'synthetic'] and revision[key] is not none %}
+ {% if key in ['type', 'synthetic'] and key not in ['decoding_failures'] and revision[key] is not none %}
{% endif %}
{% endfor %}
-
{% for key in ['parent_urls', 'children_urls'] %}
{% if revision[key] is not none %}
@@ -104,6 +113,12 @@
{% endif %}
{% endfor %}
+ {% if 'decoding_failures' in revision %}
+
+
(some decoding errors occurred)
+
+ {% endif %}
+
{% endfor %}
diff --git a/swh/web/ui/templates/revision.html b/swh/web/ui/templates/revision.html
--- a/swh/web/ui/templates/revision.html
+++ b/swh/web/ui/templates/revision.html
@@ -32,7 +32,12 @@
{% if revision['author'] is not none %}
Date
@@ -43,7 +48,12 @@
{% if revision['committer'] is not none %}
Committer Date
@@ -87,6 +97,11 @@
{% endif %}
{% endfor %}
+ {% if 'decoding_failures' in revision %}
+
+
(some decoding failed)
+
+ {% endif %}
{% endif %}
diff --git a/swh/web/ui/tests/test_converters.py b/swh/web/ui/tests/test_converters.py
--- a/swh/web/ui/tests/test_converters.py
+++ b/swh/web/ui/tests/test_converters.py
@@ -100,6 +100,33 @@
self.assertEquals(expected_output, actual_output)
@istest
+ def from_swh_edge_cases_convert_invalid_utf8_bytes(self):
+ some_input = {
+ 'a': 'something',
+ 'b': 'someone',
+ 'c': b'a name \xff',
+ 'd': b'an email \xff',
+ }
+
+ expected_output = {
+ 'a': 'something',
+ 'b': 'someone',
+ 'c': 'a name \\xff',
+ 'd': 'an email \\xff',
+ 'decoding_failures': ['c', 'd']
+ }
+
+ actual_output = converters.from_swh(some_input,
+ hashess={'a', 'b'},
+ bytess={'c', 'd'})
+ for v in ['a', 'b', 'c', 'd']:
+ self.assertEqual(expected_output[v], actual_output[v])
+ self.assertEqual(len(expected_output['decoding_failures']),
+ len(actual_output['decoding_failures']))
+ for v in expected_output['decoding_failures']:
+ self.assertTrue(v in actual_output['decoding_failures'])
+
+ @istest
def from_swh_empty(self):
# when
self.assertEquals({}, converters.from_swh({}))