Differential D2423 Diff 8583 swh/web/browse/utils.py

Changeset View

Standalone View

swh/web/browse/utils.py

Show All 9 Lines
import textwrap		import textwrap

from collections import defaultdict		from collections import defaultdict
from threading import Lock		from threading import Lock

from django.core.cache import cache		from django.core.cache import cache
from django.utils.safestring import mark_safe		from django.utils.safestring import mark_safe
from django.utils.html import escape		from django.utils.html import escape
		import sentry_sdk

from swh.model.identifiers import persistent_identifier		from swh.model.identifiers import persistent_identifier
from swh.web.common import highlightjs, service		from swh.web.common import highlightjs, service
from swh.web.common.exc import NotFoundExc, http_status_code_message		from swh.web.common.exc import NotFoundExc, http_status_code_message
from swh.web.common.origin_visits import get_origin_visit		from swh.web.common.origin_visits import get_origin_visit
from swh.web.common.utils import (		from swh.web.common.utils import (
reverse, format_utc_iso_date, get_swh_persistent_id,		reverse, format_utc_iso_date, get_swh_persistent_id,
swh_object_icons		swh_object_icons
▲ Show 20 Lines • Show All 103 Lines • ▼ Show 20 Lines	def _re_encode_content(mimetype, encoding, content_data):
elif mimetype.startswith('application/octet-stream'):		elif mimetype.startswith('application/octet-stream'):
# file may detect a text content as binary		# file may detect a text content as binary
# so try to decode it for display		# so try to decode it for display
encodings = ['us-ascii', 'utf-8']		encodings = ['us-ascii', 'utf-8']
encodings += ['iso-8859-%s' % i for i in range(1, 17)]		encodings += ['iso-8859-%s' % i for i in range(1, 17)]
for enc in encodings:		for enc in encodings:
try:		try:
content_data = content_data.decode(enc).encode('utf-8')		content_data = content_data.decode(enc).encode('utf-8')
except Exception:		except Exception as exc:
pass		sentry_sdk.capture_exception(exc)
else:		else:
# ensure display in content view		# ensure display in content view
encoding = enc		encoding = enc
mimetype = 'text/plain'		mimetype = 'text/plain'
break		break
return mimetype, encoding, content_data		return mimetype, encoding, content_data


Show All 25 Lines	def request_content(query_string, max_size=content_display_max_size,
language = None		language = None
license = None		license = None
# requests to the indexer db may fail so properly handle		# requests to the indexer db may fail so properly handle
# those cases in order to avoid content display errors		# those cases in order to avoid content display errors
try:		try:
filetype = service.lookup_content_filetype(query_string)		filetype = service.lookup_content_filetype(query_string)
language = service.lookup_content_language(query_string)		language = service.lookup_content_language(query_string)
license = service.lookup_content_license(query_string)		license = service.lookup_content_license(query_string)
except Exception:		except Exception as exc:
pass		sentry_sdk.capture_exception(exc)
mimetype = 'unknown'		mimetype = 'unknown'
encoding = 'unknown'		encoding = 'unknown'
if filetype:		if filetype:
mimetype = filetype['mimetype']		mimetype = filetype['mimetype']
encoding = filetype['encoding']		encoding = filetype['encoding']
# workaround when encountering corrupted data due to implicit		# workaround when encountering corrupted data due to implicit
# conversion from bytea to text in the indexer db (see T818)		# conversion from bytea to text in the indexer db (see T818)
# TODO: Remove that code when all data have been correctly converted		# TODO: Remove that code when all data have been correctly converted
if mimetype.startswith('\\'):		if mimetype.startswith('\\'):
filetype = None		filetype = None

content_data['error_code'] = 200		content_data['error_code'] = 200
content_data['error_message'] = ''		content_data['error_message'] = ''
content_data['error_description'] = ''		content_data['error_description'] = ''

if not max_size or content_data['length'] < max_size:		if not max_size or content_data['length'] < max_size:
try:		try:
content_raw = service.lookup_content_raw(query_string)		content_raw = service.lookup_content_raw(query_string)
except Exception as e:		except Exception as exc:
if raise_if_unavailable:		if raise_if_unavailable:
raise e		raise exc
else:		else:
		sentry_sdk.capture_exception(exc)
content_data['raw_data'] = None		content_data['raw_data'] = None
content_data['error_code'] = 404		content_data['error_code'] = 404
content_data['error_description'] = \		content_data['error_description'] = \
'The bytes of the content are currently not available in the archive.' # noqa		'The bytes of the content are currently not available in the archive.' # noqa
content_data['error_message'] = \		content_data['error_message'] = \
http_status_code_message[content_data['error_code']]		http_status_code_message[content_data['error_code']]
else:		else:
content_data['raw_data'] = content_raw['data']		content_data['raw_data'] = content_raw['data']
▲ Show 20 Lines • Show All 831 Lines • ▼ Show 20 Lines	if readme_name and readme_name.endswith('.rst'):
if cache_entry:		if cache_entry:
readme_html = cache_entry		readme_html = cache_entry
else:		else:
try:		try:
rst_doc = request_content(readme_sha1)		rst_doc = request_content(readme_sha1)
readme_html = pypandoc.convert_text(rst_doc['raw_data'],		readme_html = pypandoc.convert_text(rst_doc['raw_data'],
'html', format='rst')		'html', format='rst')
cache.set(cache_entry_id, readme_html)		cache.set(cache_entry_id, readme_html)
except Exception:		except Exception as exc:
		sentry_sdk.capture_exception(exc)
readme_html = 'Readme bytes are not available'		readme_html = 'Readme bytes are not available'

return readme_name, readme_url, readme_html		return readme_name, readme_url, readme_html


def get_swh_persistent_ids(swh_objects, snapshot_context=None):		def get_swh_persistent_ids(swh_objects, snapshot_context=None):
"""		"""
Returns a list of dict containing info related to persistent		Returns a list of dict containing info related to persistent
▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines