Changeset View
Changeset View
Standalone View
Standalone View
swh/web/browse/utils.py
Show All 9 Lines | |||||
import textwrap | import textwrap | ||||
from collections import defaultdict | from collections import defaultdict | ||||
from threading import Lock | from threading import Lock | ||||
from django.core.cache import cache | from django.core.cache import cache | ||||
from django.utils.safestring import mark_safe | from django.utils.safestring import mark_safe | ||||
from django.utils.html import escape | from django.utils.html import escape | ||||
import sentry_sdk | |||||
from swh.model.identifiers import persistent_identifier | from swh.model.identifiers import persistent_identifier | ||||
from swh.web.common import highlightjs, service | from swh.web.common import highlightjs, service | ||||
from swh.web.common.exc import NotFoundExc, http_status_code_message | from swh.web.common.exc import NotFoundExc, http_status_code_message | ||||
from swh.web.common.origin_visits import get_origin_visit | from swh.web.common.origin_visits import get_origin_visit | ||||
from swh.web.common.utils import ( | from swh.web.common.utils import ( | ||||
reverse, format_utc_iso_date, get_swh_persistent_id, | reverse, format_utc_iso_date, get_swh_persistent_id, | ||||
swh_object_icons | swh_object_icons | ||||
▲ Show 20 Lines • Show All 103 Lines • ▼ Show 20 Lines | def _re_encode_content(mimetype, encoding, content_data): | ||||
elif mimetype.startswith('application/octet-stream'): | elif mimetype.startswith('application/octet-stream'): | ||||
# file may detect a text content as binary | # file may detect a text content as binary | ||||
# so try to decode it for display | # so try to decode it for display | ||||
encodings = ['us-ascii', 'utf-8'] | encodings = ['us-ascii', 'utf-8'] | ||||
encodings += ['iso-8859-%s' % i for i in range(1, 17)] | encodings += ['iso-8859-%s' % i for i in range(1, 17)] | ||||
for enc in encodings: | for enc in encodings: | ||||
try: | try: | ||||
content_data = content_data.decode(enc).encode('utf-8') | content_data = content_data.decode(enc).encode('utf-8') | ||||
except Exception: | except Exception as exc: | ||||
pass | sentry_sdk.capture_exception(exc) | ||||
else: | else: | ||||
# ensure display in content view | # ensure display in content view | ||||
encoding = enc | encoding = enc | ||||
mimetype = 'text/plain' | mimetype = 'text/plain' | ||||
break | break | ||||
return mimetype, encoding, content_data | return mimetype, encoding, content_data | ||||
Show All 25 Lines | def request_content(query_string, max_size=content_display_max_size, | ||||
language = None | language = None | ||||
license = None | license = None | ||||
# requests to the indexer db may fail so properly handle | # requests to the indexer db may fail so properly handle | ||||
# those cases in order to avoid content display errors | # those cases in order to avoid content display errors | ||||
try: | try: | ||||
filetype = service.lookup_content_filetype(query_string) | filetype = service.lookup_content_filetype(query_string) | ||||
language = service.lookup_content_language(query_string) | language = service.lookup_content_language(query_string) | ||||
license = service.lookup_content_license(query_string) | license = service.lookup_content_license(query_string) | ||||
except Exception: | except Exception as exc: | ||||
pass | sentry_sdk.capture_exception(exc) | ||||
mimetype = 'unknown' | mimetype = 'unknown' | ||||
encoding = 'unknown' | encoding = 'unknown' | ||||
if filetype: | if filetype: | ||||
mimetype = filetype['mimetype'] | mimetype = filetype['mimetype'] | ||||
encoding = filetype['encoding'] | encoding = filetype['encoding'] | ||||
# workaround when encountering corrupted data due to implicit | # workaround when encountering corrupted data due to implicit | ||||
# conversion from bytea to text in the indexer db (see T818) | # conversion from bytea to text in the indexer db (see T818) | ||||
# TODO: Remove that code when all data have been correctly converted | # TODO: Remove that code when all data have been correctly converted | ||||
if mimetype.startswith('\\'): | if mimetype.startswith('\\'): | ||||
filetype = None | filetype = None | ||||
content_data['error_code'] = 200 | content_data['error_code'] = 200 | ||||
content_data['error_message'] = '' | content_data['error_message'] = '' | ||||
content_data['error_description'] = '' | content_data['error_description'] = '' | ||||
if not max_size or content_data['length'] < max_size: | if not max_size or content_data['length'] < max_size: | ||||
try: | try: | ||||
content_raw = service.lookup_content_raw(query_string) | content_raw = service.lookup_content_raw(query_string) | ||||
except Exception as e: | except Exception as exc: | ||||
if raise_if_unavailable: | if raise_if_unavailable: | ||||
raise e | raise exc | ||||
else: | else: | ||||
sentry_sdk.capture_exception(exc) | |||||
content_data['raw_data'] = None | content_data['raw_data'] = None | ||||
content_data['error_code'] = 404 | content_data['error_code'] = 404 | ||||
content_data['error_description'] = \ | content_data['error_description'] = \ | ||||
'The bytes of the content are currently not available in the archive.' # noqa | 'The bytes of the content are currently not available in the archive.' # noqa | ||||
content_data['error_message'] = \ | content_data['error_message'] = \ | ||||
http_status_code_message[content_data['error_code']] | http_status_code_message[content_data['error_code']] | ||||
else: | else: | ||||
content_data['raw_data'] = content_raw['data'] | content_data['raw_data'] = content_raw['data'] | ||||
▲ Show 20 Lines • Show All 831 Lines • ▼ Show 20 Lines | if readme_name and readme_name.endswith('.rst'): | ||||
if cache_entry: | if cache_entry: | ||||
readme_html = cache_entry | readme_html = cache_entry | ||||
else: | else: | ||||
try: | try: | ||||
rst_doc = request_content(readme_sha1) | rst_doc = request_content(readme_sha1) | ||||
readme_html = pypandoc.convert_text(rst_doc['raw_data'], | readme_html = pypandoc.convert_text(rst_doc['raw_data'], | ||||
'html', format='rst') | 'html', format='rst') | ||||
cache.set(cache_entry_id, readme_html) | cache.set(cache_entry_id, readme_html) | ||||
except Exception: | except Exception as exc: | ||||
sentry_sdk.capture_exception(exc) | |||||
readme_html = 'Readme bytes are not available' | readme_html = 'Readme bytes are not available' | ||||
return readme_name, readme_url, readme_html | return readme_name, readme_url, readme_html | ||||
def get_swh_persistent_ids(swh_objects, snapshot_context=None): | def get_swh_persistent_ids(swh_objects, snapshot_context=None): | ||||
""" | """ | ||||
Returns a list of dict containing info related to persistent | Returns a list of dict containing info related to persistent | ||||
▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines |