diff --git a/swh/web/ui/converters.py b/swh/web/ui/converters.py --- a/swh/web/ui/converters.py +++ b/swh/web/ui/converters.py @@ -6,6 +6,7 @@ import datetime from swh.core import hashutil +from swh.core.utils import decode_with_escape from swh.web.ui import utils @@ -86,7 +87,14 @@ elif key in hashess: new_dict[key] = utils.fmap(convert_hashes_bytes, value) elif key in bytess: - new_dict[key] = utils.fmap(convert_bytes, value) + try: + new_dict[key] = utils.fmap(convert_bytes, value) + except UnicodeDecodeError: + if 'decoding_failures' not in new_dict: + new_dict['decoding_failures'] = [key] + else: + new_dict['decoding_failures'].append(key) + new_dict[key] = utils.fmap(decode_with_escape, value) elif key in convert: new_dict[key] = convert_fn(value) else: diff --git a/swh/web/ui/templates/content.html b/swh/web/ui/templates/content.html --- a/swh/web/ui/templates/content.html +++ b/swh/web/ui/templates/content.html @@ -24,5 +24,10 @@
{{ content['data'] }}
{% endif %} + {% if 'decoding_failures' in content %} +
+
(some decoding errors)
+
+ {% endif %} {% endif %} {% endblock %} diff --git a/swh/web/ui/templates/directory.html b/swh/web/ui/templates/directory.html --- a/swh/web/ui/templates/directory.html +++ b/swh/web/ui/templates/directory.html @@ -4,7 +4,12 @@ {{ message }} {% if ls is not none %} {% for e in files %} -

{{ e.name }}

+
+

+ {{ e.name }} + {% if 'decoding_failures' in e %}(some decoding errors){% endif %} +

+
{% endfor %} {% endif %} {% endblock %} diff --git a/swh/web/ui/templates/entity.html b/swh/web/ui/templates/entity.html --- a/swh/web/ui/templates/entity.html +++ b/swh/web/ui/templates/entity.html @@ -18,6 +18,11 @@ {% endif %} {% endfor %} + {% if 'decoding_failures' in entity %} +
+
(some decoding errors)
+
+ {% endif %} {% endfor %} {% endif %} {% endblock %} diff --git a/swh/web/ui/templates/origin.html b/swh/web/ui/templates/origin.html --- a/swh/web/ui/templates/origin.html +++ b/swh/web/ui/templates/origin.html @@ -16,6 +16,11 @@ {% endif %} {% endfor %} + {% if 'decoding_failures' in content %} +
+
(some decoding errors)
+
+ {% endif %} {% endif %} diff --git a/swh/web/ui/templates/person.html b/swh/web/ui/templates/person.html --- a/swh/web/ui/templates/person.html +++ b/swh/web/ui/templates/person.html @@ -16,6 +16,11 @@ {% endif %} {% endfor %} + {% if 'decoding_failures' in person %} +
+
(some decoding errors)
+
+ {% endif %} {% endif %} diff --git a/swh/web/ui/templates/release.html b/swh/web/ui/templates/release.html --- a/swh/web/ui/templates/release.html +++ b/swh/web/ui/templates/release.html @@ -9,7 +9,7 @@ {% if release is not none %}
{% for key in release.keys() %} - {% if key not in ['author', 'target_url', 'message', 'target', 'target_type'] and release[key] is not none %} + {% if key not in ['author', 'target_url', 'message', 'target', 'target_type', 'decoding_failures'] and release[key] is not none %}
{{ key }}
{{ release[key] }}
@@ -19,7 +19,10 @@ {% if release['author'] is not none %}
author
-
{{ release['author']['name'] }} - {{ release['author']['email'] }}
+
+ {{ release['author']['name'] }} - {{ release['author']['email'] }} + {% if 'decoding_failures' in release['author'] %}(some decoding errors){% endif %} +
{% endif %}
@@ -29,6 +32,11 @@
{{ release['target_url'] }}
{% endif %} + {% if 'decoding_failures' in release %} +
+
(some decoding errors)
+
+ {% endif %} {% endif %} diff --git a/swh/web/ui/templates/revision-directory.html b/swh/web/ui/templates/revision-directory.html --- a/swh/web/ui/templates/revision-directory.html +++ b/swh/web/ui/templates/revision-directory.html @@ -13,10 +13,20 @@ {% if result['content'] is not none %} {% for e in result['content'] %} {% if e.type == 'dir' %} -

{{ e.name }}

+
+

+ {{ e.name }} + {% if 'decoding_failures' in e %}(some decoding errors){% endif %} +

+
{% else %} -

{{ e.name }}

- {% endif %} +
+

+ {{ e.name }} + {% if 'decoding_failures' in e %}(some decoding errors){% endif %} +

+
+ {% endif %} {% endfor %} {% endif %} {% else %} @@ -39,7 +49,12 @@
data
{{ result['content']['data'] }}
- {% endif %} + {% endif %} + {% if 'decoding_failures' in result %} +
+
(some decoding errors)
+
+ {% endif %} {% endif %} {% endif %} {% endblock %} diff --git a/swh/web/ui/templates/revision-log.html b/swh/web/ui/templates/revision-log.html --- a/swh/web/ui/templates/revision-log.html +++ b/swh/web/ui/templates/revision-log.html @@ -49,7 +49,12 @@ {% if revision['author'] is not none %}
Author
-

{{ revision['author']['name'] }}

+
+

+ {{ revision['author']['name'] }} + {% if 'decoding_failures' in revision['author'] %}(some decoding errors){% endif %} +

+
Date
@@ -60,7 +65,12 @@ {% if revision['committer'] is not none %}
Committer
- +
+

+ {{ revision['committer']['name'] }} + {% if 'decoding_failures' in revision['committer'] %}(some decoding errors){% endif %} +

+
Committer Date
@@ -86,14 +96,13 @@ {% endif %} {% for key in revision.keys() %} - {% if key in ['type', 'synthetic'] and revision[key] is not none %} + {% if key in ['type', 'synthetic'] and key not in ['decoding_failures'] and revision[key] is not none %}
{{ key }}

{{ revision[key] }}

{% endif %} {% endfor %} - {% for key in ['parent_urls', 'children_urls'] %} {% if revision[key] is not none %}
@@ -104,6 +113,12 @@
{% endif %} {% endfor %} + {% if 'decoding_failures' in revision %} +
+
(some decoding errors occurred)
+
+ {% endif %} +

{% endfor %} diff --git a/swh/web/ui/templates/revision.html b/swh/web/ui/templates/revision.html --- a/swh/web/ui/templates/revision.html +++ b/swh/web/ui/templates/revision.html @@ -32,7 +32,12 @@ {% if revision['author'] is not none %}
Author
- +
+

+ {{ revision['author']['name'] }} + {% if 'decoding_failures' in revision['author'] %}(some decoding failed){% endif %} +

+
Date
@@ -43,7 +48,12 @@ {% if revision['committer'] is not none %}
Committer
- +
+

+ {{ revision['committer']['name'] }} + {% if 'decoding_failures' in revision['committer'] %}(some decoding failed){% endif %} +

+
Committer Date
@@ -87,6 +97,11 @@
{% endif %} {% endfor %} + {% if 'decoding_failures' in revision %} +
+
(some decoding failed)
+
+ {% endif %}
{% endif %} diff --git a/swh/web/ui/tests/test_converters.py b/swh/web/ui/tests/test_converters.py --- a/swh/web/ui/tests/test_converters.py +++ b/swh/web/ui/tests/test_converters.py @@ -100,6 +100,33 @@ self.assertEquals(expected_output, actual_output) @istest + def from_swh_edge_cases_convert_invalid_utf8_bytes(self): + some_input = { + 'a': 'something', + 'b': 'someone', + 'c': b'a name \xff', + 'd': b'an email \xff', + } + + expected_output = { + 'a': 'something', + 'b': 'someone', + 'c': 'a name \\xff', + 'd': 'an email \\xff', + 'decoding_failures': ['c', 'd'] + } + + actual_output = converters.from_swh(some_input, + hashess={'a', 'b'}, + bytess={'c', 'd'}) + for v in ['a', 'b', 'c', 'd']: + self.assertEqual(expected_output[v], actual_output[v]) + self.assertEqual(len(expected_output['decoding_failures']), + len(actual_output['decoding_failures'])) + for v in expected_output['decoding_failures']: + self.assertTrue(v in actual_output['decoding_failures']) + + @istest def from_swh_empty(self): # when self.assertEquals({}, converters.from_swh({}))