diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -3,3 +3,4 @@ swh.vault >= 0.0.23 swh.indexer >= 0.0.120 swh.scheduler >= 0.0.31 +swh.core >= 0.0.81 diff --git a/requirements-test.txt b/requirements-test.txt --- a/requirements-test.txt +++ b/requirements-test.txt @@ -4,5 +4,6 @@ pytest-mock django-stubs < 1.3.0 requests-mock -swh.core[http] >= 0.0.61 +swh.core[http] >= 0.0.81 swh.loader.git >= 0.0.47 +decorator # dependency of swh.core[http] diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -20,6 +20,7 @@ requests python-memcached pybadges +sentry-sdk # Doc dependencies sphinx diff --git a/swh/web/admin/deposit.py b/swh/web/admin/deposit.py --- a/swh/web/admin/deposit.py +++ b/swh/web/admin/deposit.py @@ -14,6 +14,7 @@ from django.shortcuts import render from requests.auth import HTTPBasicAuth +import sentry_sdk from swh.web.admin.adminurls import admin_route from swh.web.config import get_config @@ -85,7 +86,8 @@ 'swh_id_context': d['swh_id_context'] } for d in data] - except Exception: + except Exception as exc: + sentry_sdk.capture_exception(exc) table_data['error'] = ('An error occurred while retrieving ' 'the list of deposits !') diff --git a/swh/web/api/apidoc.py b/swh/web/api/apidoc.py --- a/swh/web/api/apidoc.py +++ b/swh/web/api/apidoc.py @@ -7,12 +7,13 @@ import docutils.parsers.rst import docutils.utils import functools +from functools import wraps import os import re import textwrap -from functools import wraps from rest_framework.decorators import api_view +import sentry_sdk from swh.web.common.utils import parse_rst from swh.web.api.apiurls import APIUrls @@ -313,6 +314,7 @@ try: response = f(request, **kwargs) except Exception as exc: + sentry_sdk.capture_exception(exc) if request.accepted_media_type == 'text/html' and \ need_params and not request.query_params: response = None diff --git a/swh/web/browse/utils.py b/swh/web/browse/utils.py --- a/swh/web/browse/utils.py +++ b/swh/web/browse/utils.py @@ -15,6 +15,7 @@ from django.core.cache import cache from django.utils.safestring import mark_safe from django.utils.html import escape +import sentry_sdk from swh.model.identifiers import persistent_identifier from swh.web.common import highlightjs, service @@ -134,8 +135,8 @@ for enc in encodings: try: content_data = content_data.decode(enc).encode('utf-8') - except Exception: - pass + except Exception as exc: + sentry_sdk.capture_exception(exc) else: # ensure display in content view encoding = enc @@ -177,8 +178,8 @@ filetype = service.lookup_content_filetype(query_string) language = service.lookup_content_language(query_string) license = service.lookup_content_license(query_string) - except Exception: - pass + except Exception as exc: + sentry_sdk.capture_exception(exc) mimetype = 'unknown' encoding = 'unknown' if filetype: @@ -197,10 +198,11 @@ if not max_size or content_data['length'] < max_size: try: content_raw = service.lookup_content_raw(query_string) - except Exception as e: + except Exception as exc: if raise_if_unavailable: - raise e + raise exc else: + sentry_sdk.capture_exception(exc) content_data['raw_data'] = None content_data['error_code'] = 404 content_data['error_description'] = \ @@ -1048,7 +1050,8 @@ readme_html = pypandoc.convert_text(rst_doc['raw_data'], 'html', format='rst') cache.set(cache_entry_id, readme_html) - except Exception: + except Exception as exc: + sentry_sdk.capture_exception(exc) readme_html = 'Readme bytes are not available' return readme_name, readme_url, readme_html diff --git a/swh/web/browse/views/content.py b/swh/web/browse/views/content.py --- a/swh/web/browse/views/content.py +++ b/swh/web/browse/views/content.py @@ -11,6 +11,7 @@ from django.http import HttpResponse from django.shortcuts import render from django.template.defaultfilters import filesizeformat +import sentry_sdk from swh.model.hashutil import hash_to_hex @@ -155,8 +156,9 @@ diff_lines = difflib.unified_diff(content_from_lines, content_to_lines) diff_str = ''.join(list(diff_lines)[2:]) - except Exception as e: - diff_str = str(e) + except Exception as exc: + sentry_sdk.capture_exception(exc) + diff_str = str(exc) diff_data['diff_str'] = diff_str diff_data['language'] = language @@ -188,7 +190,7 @@ if origin_url: try: snapshot_context = get_snapshot_context(origin_url=origin_url) - except Exception: + except NotFoundExc: raw_cnt_url = reverse('browse-content', url_args={'query_string': query_string}) error_message = \ diff --git a/swh/web/browse/views/directory.py b/swh/web/browse/views/directory.py --- a/swh/web/browse/views/directory.py +++ b/swh/web/browse/views/directory.py @@ -8,6 +8,7 @@ from django.http import HttpResponse from django.shortcuts import render, redirect from django.template.defaultfilters import filesizeformat +import sentry_sdk from swh.web.common import service from swh.web.common.utils import ( @@ -48,7 +49,7 @@ if origin_url: try: snapshot_context = get_snapshot_context(origin_url=origin_url) - except Exception: + except NotFoundExc: raw_dir_url = reverse('browse-directory', url_args={'sha1_git': sha1_git}) error_message = \ @@ -171,6 +172,6 @@ data_url = reverse('browse-content-raw', url_args={'query_string': sha1}) return redirect(data_url) - except Exception: - pass + except Exception as exc: + sentry_sdk.capture_exception(exc) return HttpResponse(status=404) diff --git a/swh/web/browse/views/release.py b/swh/web/browse/views/release.py --- a/swh/web/browse/views/release.py +++ b/swh/web/browse/views/release.py @@ -4,6 +4,7 @@ # See top-level LICENSE file for more information from django.shortcuts import render +import sentry_sdk from swh.web.common import service from swh.web.common.utils import ( @@ -42,7 +43,7 @@ try: snapshot_context = get_snapshot_context( snapshot_id, origin_url, timestamp, visit_id) - except Exception: + except NotFoundExc: raw_rel_url = reverse('browse-release', url_args={'sha1_git': sha1_git}) error_message = \ @@ -106,8 +107,8 @@ 'revision_context': True, 'revision_id': release['target'] } - except Exception: - pass + except Exception as exc: + sentry_sdk.capture_exception(exc) elif release['target_type'] == 'directory': target_link = gen_directory_link(release['target'], snapshot_context=snapshot_context, @@ -120,8 +121,8 @@ 'revision_context': False, 'revision_id': None } - except Exception: - pass + except Exception as exc: + sentry_sdk.capture_exception(exc) elif release['target_type'] == 'content': target_link = gen_content_link(release['target'], snapshot_context=snapshot_context, diff --git a/swh/web/browse/views/revision.py b/swh/web/browse/views/revision.py --- a/swh/web/browse/views/revision.py +++ b/swh/web/browse/views/revision.py @@ -282,7 +282,7 @@ snapshot_context = get_snapshot_context( origin_url=origin_url, timestamp=timestamp, visit_id=visit_id) - except Exception: + except NotFoundExc: raw_rev_url = reverse('browse-revision', url_args={'sha1_git': sha1_git}) error_message = \ diff --git a/swh/web/browse/views/utils/snapshot_context.py b/swh/web/browse/views/utils/snapshot_context.py --- a/swh/web/browse/views/utils/snapshot_context.py +++ b/swh/web/browse/views/utils/snapshot_context.py @@ -11,6 +11,7 @@ from django.shortcuts import render from django.template.defaultfilters import filesizeformat from django.utils.html import escape +import sentry_sdk from swh.model.identifiers import snapshot_identifier @@ -182,7 +183,8 @@ revision_id = release['target'] release_id = release['id'] query_params['release'] = release_name - except Exception: + except Exception as exc: + sentry_sdk.capture_exception(exc) _branch_not_found('release', release_name, snapshot_id, snapshot_sizes, origin_info, timestamp, visit_id) elif snapshot_total_size: @@ -195,7 +197,8 @@ branch_name = branch['name'] revision_id = branch['revision'] root_sha1_git = branch['directory'] - except Exception: + except Exception as exc: + sentry_sdk.capture_exception(exc) _branch_not_found('branch', branch_name, snapshot_id, snapshot_sizes, origin_info, timestamp, visit_id) diff --git a/swh/web/common/exc.py b/swh/web/common/exc.py --- a/swh/web/common/exc.py +++ b/swh/web/common/exc.py @@ -9,6 +9,7 @@ from django.shortcuts import render from django.utils.safestring import mark_safe from django.utils.html import escape +import sentry_sdk from swh.web.config import get_config @@ -106,6 +107,7 @@ Function used to generate an error page when an exception was raised inside a swh-web browse view. """ + sentry_sdk.capture_exception(exc) error_code = 500 error_description = '%s: %s' % (type(exc).__name__, str(exc)) if get_config()['debug']: diff --git a/swh/web/common/highlightjs.py b/swh/web/common/highlightjs.py --- a/swh/web/common/highlightjs.py +++ b/swh/web/common/highlightjs.py @@ -4,13 +4,13 @@ # See top-level LICENSE file for more information import functools - from typing import Dict from pygments.lexers import ( get_all_lexers, get_lexer_for_filename ) +import sentry_sdk # set of languages ids that can be highlighted # by highlight.js library @@ -317,8 +317,8 @@ # try to find a Pygment lexer try: lexer = get_lexer_for_filename(filename) - except Exception: - pass + except Exception as exc: + sentry_sdk.capture_exception(exc) # if there is a correspondence between the lexer and an hljs # language, return it if lexer and lexer.name in _pygments_lexer_to_hljs_language: diff --git a/swh/web/common/middlewares.py b/swh/web/common/middlewares.py --- a/swh/web/common/middlewares.py +++ b/swh/web/common/middlewares.py @@ -5,6 +5,7 @@ from bs4 import BeautifulSoup from htmlmin import minify +import sentry_sdk class HtmlPrettifyMiddleware(object): @@ -45,8 +46,8 @@ try: minified_html = minify(response.content.decode('utf-8')) response.content = minified_html.encode('utf-8') - except Exception: - pass + except Exception as exc: + sentry_sdk.capture_exception(exc) return response diff --git a/swh/web/common/origin_save.py b/swh/web/common/origin_save.py --- a/swh/web/common/origin_save.py +++ b/swh/web/common/origin_save.py @@ -3,19 +3,19 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -import json -import logging - from bisect import bisect_right from datetime import datetime, timezone, timedelta - -import requests +import json +import logging from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ValidationError from django.core.validators import URLValidator from django.utils.html import escape +import requests +import sentry_sdk + from swh.web import config from swh.web.common import service from swh.web.common.exc import BadInputExc, ForbiddenExc, NotFoundExc @@ -153,8 +153,8 @@ visit_status = origin_visits[i]['status'] if origin_visits[i]['status'] == 'ongoing': visit_date = None - except Exception: - pass + except Exception as exc: + sentry_sdk.capture_exception(exc) return visit_date, visit_status @@ -522,8 +522,8 @@ task_run['worker'] = task_run_info['hostname'] elif 'host' in task_run_info: task_run['worker'] = task_run_info['host'] - except Exception as e: - logger.warning('Request to Elasticsearch failed\n%s' % str(e)) - pass + except Exception as exc: + logger.warning('Request to Elasticsearch failed\n%s', exc) + sentry_sdk.capture_exception(exc) return task_run diff --git a/swh/web/common/swh_templatetags.py b/swh/web/common/swh_templatetags.py --- a/swh/web/common/swh_templatetags.py +++ b/swh/web/common/swh_templatetags.py @@ -3,6 +3,7 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +from inspect import cleandoc import json import re @@ -12,7 +13,8 @@ from docutils.core import publish_parts from docutils.writers.html4css1 import Writer, HTMLTranslator -from inspect import cleandoc + +import sentry_sdk from swh.web.common.origin_save import get_savable_visit_types @@ -71,8 +73,8 @@ return re.sub(r'([^ <>"]+@[^ <>"]+)', r'\1', text) - except Exception: - pass + except Exception as exc: + sentry_sdk.capture_exception(exc) return text diff --git a/swh/web/common/throttling.py b/swh/web/common/throttling.py --- a/swh/web/common/throttling.py +++ b/swh/web/common/throttling.py @@ -5,7 +5,9 @@ import ipaddress +from django.core.exceptions import ImproperlyConfigured from rest_framework.throttling import ScopedRateThrottle +import sentry_sdk from swh.web.config import get_config @@ -66,31 +68,35 @@ def allow_request(self, request, view): # class based view case if not self.scope: + default_scope = getattr(view, self.scope_attr, None) - # check if there is a specific rate limiting associated - # to the request type - try: + request_allowed = None + if default_scope is not None: + # check if there is a specific rate limiting associated + # to the request type request_scope = default_scope + '_' + request.method.lower() setattr(view, self.scope_attr, request_scope) - request_allowed = \ - super(SwhWebRateThrottle, self).allow_request(request, view) # noqa - setattr(view, self.scope_attr, default_scope) - # use default rate limiting otherwise - except Exception: - setattr(view, self.scope_attr, default_scope) - request_allowed = \ - super(SwhWebRateThrottle, self).allow_request(request, view) # noqa + try: + request_allowed = super().allow_request(request, view) + # use default rate limiting otherwise + except ImproperlyConfigured as exc: + sentry_sdk.capture_exception(exc) + + setattr(view, self.scope_attr, default_scope) + if request_allowed is None: + request_allowed = super().allow_request(request, view) # function based view case else: default_scope = self.scope # check if there is a specific rate limiting associated # to the request type + self.scope = default_scope + '_' + request.method.lower() try: - self.scope = default_scope + '_' + request.method.lower() self.rate = self.get_rate() # use default rate limiting otherwise - except Exception: + except ImproperlyConfigured as exc: + sentry_sdk.capture_exception(exc) self.scope = default_scope self.rate = self.get_rate() self.num_requests, self.duration = self.parse_rate(self.rate) diff --git a/swh/web/doc_config.py b/swh/web/doc_config.py --- a/swh/web/doc_config.py +++ b/swh/web/doc_config.py @@ -3,6 +3,7 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +import importlib.util import os from sphinxcontrib import httpdomain @@ -11,10 +12,8 @@ # guard to avoid ImportError when running tests through sbuild # as there is no Debian package built for swh-docs -try: +if importlib.util.find_spec('swh.docs'): from swh.docs.sphinx.conf import setup as orig_setup -except Exception: - pass class SimpleDocumenter(autodoc.FunctionDocumenter): diff --git a/swh/web/gunicorn_config.py b/swh/web/gunicorn_config.py new file mode 100644 --- /dev/null +++ b/swh/web/gunicorn_config.py @@ -0,0 +1,14 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from sentry_sdk.integrations.django import DjangoIntegration + +from swh.core.api.gunicorn_config import * # noqa +from swh.core.api.gunicorn_config import post_fork as _post_fork + + +def post_fork(server, worker): # type: ignore + _post_fork(server, worker, + flask=False, sentry_integrations=[DjangoIntegration()]) diff --git a/swh/web/misc/urls.py b/swh/web/misc/urls.py --- a/swh/web/misc/urls.py +++ b/swh/web/misc/urls.py @@ -6,6 +6,7 @@ import json import requests +import sentry_sdk from django.conf.urls import url, include from django.contrib.staticfiles import finders @@ -33,8 +34,8 @@ try: response = requests.get(url, timeout=5) stat_counters_history = response.text - except Exception: - pass + except Exception as exc: + sentry_sdk.capture_exception(exc) json_data = '{"stat_counters": %s, "stat_counters_history": %s}' % ( json.dumps(stat), stat_counters_history) return HttpResponse(json_data, content_type='application/json') diff --git a/swh/web/tests/test_gunicorn_config.py b/swh/web/tests/test_gunicorn_config.py new file mode 100644 --- /dev/null +++ b/swh/web/tests/test_gunicorn_config.py @@ -0,0 +1,51 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os +from unittest.mock import patch + +import swh.web.gunicorn_config as gunicorn_config + + +def test_post_fork_default(): + with patch('sentry_sdk.init') as sentry_sdk_init: + gunicorn_config.post_fork(None, None) + + sentry_sdk_init.assert_not_called() + + +def test_post_fork_with_dsn_env(): + django_integration = object() # unique object to check for equality + with patch('swh.web.gunicorn_config.DjangoIntegration', + new=lambda: django_integration): + with patch('sentry_sdk.init') as sentry_sdk_init: + with patch.dict(os.environ, {'SWH_SENTRY_DSN': 'test_dsn'}): + gunicorn_config.post_fork(None, None) + + sentry_sdk_init.assert_called_once_with( + dsn='test_dsn', + integrations=[django_integration], + debug=False, + ) + + +def test_post_fork_debug(): + django_integration = object() # unique object to check for equality + with patch('swh.web.gunicorn_config.DjangoIntegration', + new=lambda: django_integration): + with patch('sentry_sdk.init') as sentry_sdk_init: + with patch.dict(os.environ, {'SWH_SENTRY_DSN': 'test_dsn', + 'SWH_SENTRY_DEBUG': '1'}): + gunicorn_config.post_fork(None, None) + + sentry_sdk_init.assert_called_once_with( + dsn='test_dsn', + integrations=[django_integration], + debug=True, + )