diff --git a/.gitignore b/.gitignore index f124186a..185099d9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,39 +1,40 @@ *.pyc *.sw? *~ \#* .\#* /.coverage /.coverage.* .eggs/ resources/test/ __pycache__ version.txt swh.web.egg-info docs/build/ docs/uri-scheme.md docs/dev-info.md *.sqlite3 .vscode/ .directory node_modules/ swh/web/static/*.* swh/web/static/js/ swh/web/static/css/ swh/web/static/fonts/ swh/web/static/jssources/ swh/web/static/img/thirdParty/ .cache-loader/ build/ dist/ .hypothesis .cache .pytest_cache .tox/ +.mypy_cache/ debian/ package-lock.json yarn-error.log cypress/mochawesome/ .nyc_output/ cypress/coverage/ cypress/fixtures/source*.json diff --git a/MANIFEST.in b/MANIFEST.in index 1f4d4094..aef7206e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,15 +1,16 @@ include Makefile include pytest.ini include README.md include requirements.txt include requirements-swh.txt include requirements-test.txt include tox.ini include version.txt +recursive-include swh py.typed recursive-include swh/web/assets * recursive-include swh/web/static * recursive-include swh/web/templates * recursive-include swh/web/tests/resources * include package.json include yarn.lock diff --git a/Makefile.local b/Makefile.local index e098abb8..986c8355 100644 --- a/Makefile.local +++ b/Makefile.local @@ -1,89 +1,97 @@ TEST_DIRS := ./swh/web/tests TESTFLAGS = --hypothesis-profile=swh-web-fast TESTFULL_FLAGS = --hypothesis-profile=swh-web YARN ?= yarn yarn-install: package.json $(YARN) install .PHONY: build-webpack-dev build-webpack-dev: yarn-install $(YARN) build-dev .PHONY: build-webpack-test build-webpack-test: yarn-install $(YARN) build-test .PHONY: build-webpack-dev-no-verbose build-webpack-dev-no-verbose: yarn-install $(YARN) build-dev >/dev/null .PHONY: build-webpack-prod build-webpack-prod: yarn-install $(YARN) build .PHONY: run-migrations run-migrations: python3 swh/web/manage.py migrate --settings=swh.web.settings.development -v0 2>/dev/null python3 swh/web/manage.py createcachetable --settings=swh.web.settings.development -v0 2>/dev/null .PHONY: run-migrations-prod run-migrations-prod: django-admin migrate --settings=swh.web.settings.production -v0 2>/dev/null django-admin createcachetable --settings=swh.web.settings.production -v0 2>/dev/null .PHONY: run-migrations-test run-migrations-test: rm -f swh/web/settings/testdb.sqlite3 django-admin migrate --settings=swh.web.settings.tests -v0 2>/dev/null django-admin createcachetable --settings=swh.web.settings.tests -v0 2>/dev/null cat swh/web/tests/create_test_admin.py | django-admin shell --settings=swh.web.settings.tests .PHONY: clear-memcached clear-memcached: echo "flush_all" | nc -q 2 localhost 11211 2>/dev/null run-django-webpack-devserver: run-migrations yarn-install bash -c "trap 'trap - SIGINT SIGTERM ERR; kill %1' SIGINT SIGTERM ERR; $(YARN) start-dev & sleep 10 && cd swh/web && python3 manage.py runserver --nostatic --settings=swh.web.settings.development" run-django-webpack-dev: build-webpack-dev run-migrations python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.development run-django-webpack-prod: build-webpack-prod run-migrations-prod clear-memcached python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.production run-django-server-dev: run-migrations python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.development run-django-server-prod: run-migrations-prod clear-memcached python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.production run-gunicorn-server: run-migrations clear-memcached DJANGO_SETTINGS_MODULE=swh.web.settings.production \ gunicorn3 -b 127.0.0.1:5004 'django.core.wsgi:get_wsgi_application()' run-django-webpack-memory-storages: build-webpack-dev run-migrations python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.tests test-full: $(TEST) $(TESTFULL_FLAGS) $(TEST_DIRS) .PHONY: test-frontend-cmd test-frontend-cmd: build-webpack-test run-migrations-test python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.tests & sleep 10 && $(YARN) run cypress run ; pkill -P $$! ; $(YARN) run mochawesome test-frontend: export CYPRESS_SKIP_SLOW_TESTS=1 test-frontend: test-frontend-cmd test-frontend-full: export CYPRESS_SKIP_SLOW_TESTS=0 test-frontend-full: test-frontend-cmd .PHONY: test-frontend-ui-cmd test-frontend-ui-cmd: build-webpack-test run-migrations-test bash -c "trap 'trap - SIGINT SIGTERM ERR EXIT; jobs -p | head -1 | xargs pkill -P' SIGINT SIGTERM ERR EXIT; python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.tests & sleep 10 && $(YARN) run cypress open" test-frontend-ui: export CYPRESS_SKIP_SLOW_TESTS=1 test-frontend-ui: test-frontend-ui-cmd test-frontend-full-ui: export CYPRESS_SKIP_SLOW_TESTS=0 test-frontend-full-ui: test-frontend-ui-cmd + + +# Override default rule to make sure DJANGO env var is properly set. It +# *should* work without any override thanks to the mypy django-stubs plugin, +# but it currently doesn't; see +# https://github.com/typeddjango/django-stubs/issues/166 +typecheck: + DJANGO_SETTINGS_MODULE=swh.web.settings.development $(MYPY) $(MYPYFLAGS) swh diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 00000000..5a77a9c0 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,49 @@ +[mypy] +namespace_packages = True +warn_unused_ignores = True +# support for django magic: https://github.com/typeddjango/django-stubs +plugins = mypy_django_plugin.main + +[mypy.plugins.django-stubs] +django_settings_module = swh.web.settings.development + +# 3rd party libraries without stubs (yet) + +[mypy-bs4.*] +ignore_missing_imports = True + +[mypy-django_js_reverse.*] +ignore_missing_imports = True + +[mypy-htmlmin.*] +ignore_missing_imports = True + +[mypy-magic.*] +ignore_missing_imports = True + +[mypy-pkg_resources.*] +ignore_missing_imports = True + +[mypy-pygments.*] +ignore_missing_imports = True + +[mypy-pypandoc.*] +ignore_missing_imports = True + +[mypy-pytest.*] +ignore_missing_imports = True + +[mypy-rest_framework.*] +ignore_missing_imports = True + +[mypy-requests_mock.*] +ignore_missing_imports = True + +[mypy-sphinx.*] +ignore_missing_imports = True + +[mypy-sphinxcontrib.*] +ignore_missing_imports = True + +[mypy-swh.docs.*] +ignore_missing_imports = True diff --git a/requirements-test.txt b/requirements-test.txt index 489af9b0..576fe193 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,7 +1,8 @@ hypothesis pytest pytest-django pytest-mock +django-stubs requests-mock swh.core[http] >= 0.0.61 swh.loader.git >= 0.0.47 diff --git a/swh/__init__.py b/swh/__init__.py index 69e3be50..f14e1965 100644 --- a/swh/__init__.py +++ b/swh/__init__.py @@ -1 +1,4 @@ -__path__ = __import__('pkgutil').extend_path(__path__, __name__) +from pkgutil import extend_path +from typing import Iterable + +__path__ = extend_path(__path__, __name__) # type: Iterable[str] diff --git a/swh/web/admin/deposit.py b/swh/web/admin/deposit.py index df66eda9..7cb84f98 100644 --- a/swh/web/admin/deposit.py +++ b/swh/web/admin/deposit.py @@ -1,93 +1,93 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json import requests from django.core.cache import cache from django.conf import settings from django.contrib.admin.views.decorators import staff_member_required from django.core.paginator import Paginator from django.http import HttpResponse from django.shortcuts import render from requests.auth import HTTPBasicAuth from swh.web.admin.adminurls import admin_route from swh.web.config import get_config config = get_config()['deposit'] @admin_route(r'deposit/', view_name='admin-deposit') -@staff_member_required(login_url=settings.LOGIN_URL) +@staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save(request): return render(request, 'admin/deposit.html') @admin_route(r'deposit/list/', view_name='admin-deposit-list') -@staff_member_required(login_url=settings.LOGIN_URL) +@staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_deposit_list(request): table_data = {} table_data['draw'] = int(request.GET['draw']) deposits_list_url = config['private_api_url'] + 'deposits' deposits_list_auth = HTTPBasicAuth(config['private_api_user'], config['private_api_password']) try: nb_deposits = requests.get('%s?page_size=1' % deposits_list_url, auth=deposits_list_auth, timeout=30).json()['count'] deposits_data = cache.get('swh-deposit-list') if not deposits_data or deposits_data['count'] != nb_deposits: deposits_data = requests.get('%s?page_size=%s' % (deposits_list_url, nb_deposits), auth=deposits_list_auth, timeout=30).json() cache.set('swh-deposit-list', deposits_data) deposits = deposits_data['results'] search_value = request.GET['search[value]'] if search_value: deposits = \ [d for d in deposits if any(search_value.lower() in val for val in [str(v).lower() for v in d.values()])] column_order = request.GET['order[0][column]'] field_order = request.GET['columns[%s][name]' % column_order] order_dir = request.GET['order[0][dir]'] deposits = sorted(deposits, key=lambda d: d[field_order] or '') if order_dir == 'desc': deposits = list(reversed(deposits)) length = int(request.GET['length']) page = int(request.GET['start']) / length + 1 paginator = Paginator(deposits, length) data = paginator.page(page).object_list table_data['recordsTotal'] = deposits_data['count'] table_data['recordsFiltered'] = len(deposits) table_data['data'] = [{ 'id': d['id'], 'external_id': d['external_id'], 'reception_date': d['reception_date'], 'status': d['status'], 'status_detail': d['status_detail'], 'swh_anchor_id': d['swh_anchor_id'], 'swh_anchor_id_context': d['swh_anchor_id_context'], 'swh_id': d['swh_id'], 'swh_id_context': d['swh_id_context'] } for d in data] except Exception: table_data['error'] = ('An error occurred while retrieving ' 'the list of deposits !') return HttpResponse(json.dumps(table_data), content_type='application/json') diff --git a/swh/web/admin/origin_save.py b/swh/web/admin/origin_save.py index 965a4600..7317e7c0 100644 --- a/swh/web/admin/origin_save.py +++ b/swh/web/admin/origin_save.py @@ -1,206 +1,206 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json from django.conf import settings from django.contrib.admin.views.decorators import staff_member_required from django.core.exceptions import ObjectDoesNotExist from django.core.paginator import Paginator from django.http import HttpResponse from django.shortcuts import render from django.views.decorators.http import require_POST from swh.web.admin.adminurls import admin_route from swh.web.common.models import ( SaveAuthorizedOrigin, SaveUnauthorizedOrigin, SaveOriginRequest ) from swh.web.common.origin_save import ( create_save_origin_request, get_save_origin_task_info, SAVE_REQUEST_PENDING, SAVE_REQUEST_REJECTED ) @admin_route(r'origin/save/', view_name='admin-origin-save') -@staff_member_required(login_url=settings.LOGIN_URL) +@staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save(request): return render(request, 'admin/origin-save.html') def _datatables_origin_urls_response(request, urls_query_set): search_value = request.GET['search[value]'] if search_value: urls_query_set = urls_query_set.filter(url__icontains=search_value) column_order = request.GET['order[0][column]'] field_order = request.GET['columns[%s][name]' % column_order] order_dir = request.GET['order[0][dir]'] if order_dir == 'desc': field_order = '-' + field_order urls_query_set = urls_query_set.order_by(field_order) table_data = {} table_data['draw'] = int(request.GET['draw']) table_data['recordsTotal'] = urls_query_set.count() table_data['recordsFiltered'] = urls_query_set.count() length = int(request.GET['length']) page = int(request.GET['start']) / length + 1 paginator = Paginator(urls_query_set, length) urls_query_set = paginator.page(page).object_list table_data['data'] = [{'url': u.url} for u in urls_query_set] table_data_json = json.dumps(table_data, separators=(',', ': ')) return HttpResponse(table_data_json, content_type='application/json') @admin_route(r'origin/save/authorized_urls/list/', view_name='admin-origin-save-authorized-urls-list') @staff_member_required def _admin_origin_save_authorized_urls_list(request): authorized_urls = SaveAuthorizedOrigin.objects.all() return _datatables_origin_urls_response(request, authorized_urls) @admin_route(r'origin/save/authorized_urls/add/(?P.+)/', view_name='admin-origin-save-add-authorized-url') @require_POST -@staff_member_required(login_url=settings.LOGIN_URL) +@staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_add_authorized_url(request, origin_url): try: SaveAuthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: # add the new authorized url SaveAuthorizedOrigin.objects.create(url=origin_url) # check if pending save requests with that url prefix exist pending_save_requests = \ SaveOriginRequest.objects.filter(origin_url__startswith=origin_url, status=SAVE_REQUEST_PENDING) # create origin save tasks for previously pending requests for psr in pending_save_requests: create_save_origin_request(psr.visit_type, psr.origin_url) status_code = 200 else: status_code = 400 return HttpResponse(status=status_code) @admin_route(r'origin/save/authorized_urls/remove/(?P.+)/', view_name='admin-origin-save-remove-authorized-url') @require_POST -@staff_member_required(login_url=settings.LOGIN_URL) +@staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_remove_authorized_url(request, origin_url): try: entry = SaveAuthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: status_code = 404 else: entry.delete() status_code = 200 return HttpResponse(status=status_code) @admin_route(r'origin/save/unauthorized_urls/list/', view_name='admin-origin-save-unauthorized-urls-list') -@staff_member_required(login_url=settings.LOGIN_URL) +@staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_unauthorized_urls_list(request): unauthorized_urls = SaveUnauthorizedOrigin.objects.all() return _datatables_origin_urls_response(request, unauthorized_urls) @admin_route(r'origin/save/unauthorized_urls/add/(?P.+)/', view_name='admin-origin-save-add-unauthorized-url') @require_POST -@staff_member_required(login_url=settings.LOGIN_URL) +@staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_add_unauthorized_url(request, origin_url): try: SaveUnauthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: SaveUnauthorizedOrigin.objects.create(url=origin_url) # check if pending save requests with that url prefix exist pending_save_requests = \ SaveOriginRequest.objects.filter(origin_url__startswith=origin_url, status=SAVE_REQUEST_PENDING) # mark pending requests as rejected for psr in pending_save_requests: psr.status = SAVE_REQUEST_REJECTED psr.save() status_code = 200 else: status_code = 400 return HttpResponse(status=status_code) @admin_route(r'origin/save/unauthorized_urls/remove/(?P.+)/', view_name='admin-origin-save-remove-unauthorized-url') @require_POST -@staff_member_required(login_url=settings.LOGIN_URL) +@staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_remove_unauthorized_url(request, origin_url): try: entry = SaveUnauthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: status_code = 404 else: entry.delete() status_code = 200 return HttpResponse(status=status_code) @admin_route(r'origin/save/request/accept/(?P.+)/url/(?P.+)/', # noqa view_name='admin-origin-save-request-accept') @require_POST -@staff_member_required(login_url=settings.LOGIN_URL) +@staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_request_accept(request, visit_type, origin_url): try: SaveAuthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: SaveAuthorizedOrigin.objects.create(url=origin_url) create_save_origin_request(visit_type, origin_url) return HttpResponse(status=200) @admin_route(r'origin/save/request/reject/(?P.+)/url/(?P.+)/', # noqa view_name='admin-origin-save-request-reject') @require_POST -@staff_member_required(login_url=settings.LOGIN_URL) +@staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_request_reject(request, visit_type, origin_url): try: SaveUnauthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: SaveUnauthorizedOrigin.objects.create(url=origin_url) sor = SaveOriginRequest.objects.get(visit_type=visit_type, origin_url=origin_url, status=SAVE_REQUEST_PENDING) sor.status = SAVE_REQUEST_REJECTED sor.save() return HttpResponse(status=200) @admin_route(r'origin/save/request/remove/(?P.+)/', view_name='admin-origin-save-request-remove') @require_POST -@staff_member_required(login_url=settings.LOGIN_URL) +@staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_request_remove(request, sor_id): try: entry = SaveOriginRequest.objects.get(id=sor_id) except ObjectDoesNotExist: status_code = 404 else: entry.delete() status_code = 200 return HttpResponse(status=status_code) @admin_route(r'origin/save/task/info/(?P.+)/', view_name='admin-origin-save-task-info') -@staff_member_required(login_url=settings.LOGIN_URL) +@staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _save_origin_task_info(request, save_request_id): request_info = get_save_origin_task_info(save_request_id) for date_field in ('scheduled', 'started', 'ended'): if date_field in request_info and request_info[date_field] is not None: request_info[date_field] = request_info[date_field].isoformat() return HttpResponse(json.dumps(request_info), content_type='application/json') diff --git a/swh/web/api/apiurls.py b/swh/web/api/apiurls.py index c356c9cd..8694115f 100644 --- a/swh/web/api/apiurls.py +++ b/swh/web/api/apiurls.py @@ -1,85 +1,86 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import functools +from typing import Dict + from rest_framework.decorators import api_view from swh.web.common.urlsindex import UrlsIndex from swh.web.common import throttling class APIUrls(UrlsIndex): """ Class to manage API documentation URLs. - Indexes all routes documented using apidoc's decorators. - Tracks endpoint/request processing method relationships for use in generating related urls in API documentation """ - _apidoc_routes = {} - _method_endpoints = {} + _apidoc_routes = {} # type: Dict[str, Dict[str, str]] scope = 'api' @classmethod def get_app_endpoints(cls): return cls._apidoc_routes @classmethod def add_route(cls, route, docstring, **kwargs): """ Add a route to the self-documenting API reference """ route_view_name = 'api-1-%s' % route[1:-1].replace('/', '-') if route not in cls._apidoc_routes: d = {'docstring': docstring, 'route_view_name': route_view_name} for k, v in kwargs.items(): d[k] = v cls._apidoc_routes[route] = d def api_route(url_pattern=None, view_name=None, methods=['GET', 'HEAD', 'OPTIONS'], throttle_scope='swh_api', api_version='1', checksum_args=None): """ Decorator to ease the registration of an API endpoint using the Django REST Framework. Args: url_pattern: the url pattern used by DRF to identify the API route view_name: the name of the API view associated to the route used to reverse the url methods: array of HTTP methods supported by the API route """ url_pattern = '^' + api_version + url_pattern + '$' def decorator(f): # create a DRF view from the wrapped function @api_view(methods) @throttling.throttle_scope(throttle_scope) @functools.wraps(f) def api_view_f(*args, **kwargs): return f(*args, **kwargs) # small hacks for correctly generating API endpoints index doc api_view_f.__name__ = f.__name__ api_view_f.http_method_names = methods # register the route and its view in the endpoints index APIUrls.add_url_pattern(url_pattern, api_view_f, view_name) if checksum_args: APIUrls.add_redirect_for_checksum_args(view_name, [url_pattern], checksum_args) return f return decorator diff --git a/swh/web/common/highlightjs.py b/swh/web/common/highlightjs.py index 6c57a556..ad458149 100644 --- a/swh/web/common/highlightjs.py +++ b/swh/web/common/highlightjs.py @@ -1,359 +1,361 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import functools +from typing import Dict + from pygments.lexers import ( get_all_lexers, get_lexer_for_filename ) # set of languages ids that can be highlighted # by highlight.js library _hljs_languages = set([ '1c', 'abnf', 'accesslog', 'actionscript', 'ada', 'angelscript', 'apache', 'applescript', 'arcade', 'arduino', 'armasm', 'asciidoc', 'aspectj', 'autohotkey', 'autoit', 'avrasm', 'awk', 'axapta', 'bash', 'basic', 'bnf', 'brainfuck', 'cal', 'capnproto', 'ceylon', 'clean', 'clojure', 'clojure-repl', 'cmake', 'coffeescript', 'coq', 'cos', 'cpp', 'crmsh', 'crystal', 'cs', 'csp', 'css', 'd', 'dart', 'delphi', 'diff', 'django', 'dns', 'dockerfile', 'dos', 'dsconfig', 'dts', 'dust', 'ebnf', 'elixir', 'elm', 'erb', 'erlang', 'erlang-repl', 'excel', 'fix', 'flix', 'fortran', 'fsharp', 'gams', 'gauss', 'gcode', 'gherkin', 'glsl', 'gml', 'go', 'golo', 'gradle', 'groovy', 'haml', 'handlebars', 'haskell', 'haxe', 'hsp', 'htmlbars', 'http', 'hy', 'inform7', 'ini', 'irpf90', 'isbl', 'java', 'javascript', 'jboss-cli', 'json', 'julia', 'julia-repl', 'kotlin', 'lasso', 'ldif', 'leaf', 'less', 'lisp', 'livecodeserver', 'livescript', 'llvm', 'lsl', 'lua', 'makefile', 'markdown', 'mathematica', 'matlab', 'maxima', 'mel', 'mercury', 'mipsasm', 'mizar', 'mojolicious', 'monkey', 'moonscript', 'n1ql', 'nginx', 'nimrod', 'nix', 'nsis', 'objectivec', 'ocaml', 'openscad', 'oxygene', 'parser3', 'perl', 'pf', 'pgsql', 'php', 'plaintext', 'pony', 'powershell', 'processing', 'profile', 'prolog', 'properties', 'protobuf', 'puppet', 'purebasic', 'python', 'q', 'qml', 'r', 'reasonml', 'rib', 'roboconf', 'routeros', 'rsl', 'ruby', 'ruleslanguage', 'rust', 'sas', 'scala', 'scheme', 'scilab', 'scss', 'shell', 'smali', 'smalltalk', 'sml', 'sqf', 'sql', 'stan', 'stata', 'step21', 'stylus', 'subunit', 'swift', 'taggerscript', 'tap', 'tcl', 'tex', 'thrift', 'tp', 'twig', 'typescript', 'vala', 'vbnet', 'vbscript', 'vbscript-html', 'verilog', 'vhdl', 'vim', 'x86asm', 'xl', 'xml', 'xquery', 'yaml', 'zephir' ]) # languages aliases defined in highlight.js _hljs_languages_aliases = { 'ado': 'stata', 'adoc': 'asciidoc', 'ahk': 'autohotkey', 'aj': 'aspectj', 'apacheconf': 'apache', 'arm': 'armasm', 'as': 'actionscript', 'asc': 'asciidoc', 'atom': 'xml', 'bas': 'basic', 'bat': 'dos', 'bf': 'brainfuck', 'bind': 'dns', 'bsl': '1c', 'c-al': 'cal', 'c': 'cpp', 'c++': 'cpp', 'capnp': 'capnproto', 'cc': 'cpp', 'clj': 'clojure', 'cls': 'cos', 'cmake.in': 'cmake', 'cmd': 'dos', 'coffee': 'coffeescript', 'console': 'shell', 'cr': 'crystal', 'craftcms': 'twig', 'crm': 'crmsh', 'csharp': 'cs', 'cson': 'coffeescript', 'dcl': 'clean', 'dfm': 'delphi', 'do': 'stata', 'docker': 'dockerfile', 'dpr': 'delphi', 'dst': 'dust', 'dtsi': 'dts', 'ep': 'mojolicious', 'erl': 'erlang', 'ex': 'elixir', 'exs': 'elixir', 'f90': 'fortran', 'f95': 'fortran', 'feature': 'gherkin', 'freepascal': 'delphi', 'fs': 'fsharp', 'fsx': 'fsharp', 'gemspec': 'ruby', 'GML': 'gml', 'gms': 'gams', 'golang': 'go', 'graph': 'roboconf', 'gss': 'gauss', 'gyp': 'python', 'h': 'cpp', 'h++': 'cpp', 'hbs': 'handlebars', 'hpp': 'cpp', 'hs': 'haskell', 'html': 'xml', 'html.handlebars': 'handlebars', 'html.hbs': 'handlebars', 'https': 'http', 'hx': 'haxe', 'hylang': 'hy', 'i7': 'inform7', 'i7x': 'inform7', 'iced': 'coffeescript', 'icl': 'clean', 'ino': 'arduino', 'instances': 'roboconf', 'ipynb': 'json', 'irb': 'ruby', 'jinja': 'django', 'js': 'javascript', 'jsp': 'java', 'jsx': 'javascript', 'k': 'q', 'kdb': 'q', 'kt': 'kotlin', 'lassoscript': 'lasso', 'lazarus': 'delphi', 'lc': 'livecode', 'lfm': 'delphi', 'll': 'llvm', 'lpr': 'delphi', 'ls': 'livescript', 'm': 'matlab', 'mak': 'makefile', 'md': 'markdown', 'mikrotik': 'routeros', 'mips': 'mipsasm', 'mk': 'monkey', 'mkd': 'markdown', 'mkdown': 'markdown', 'ml': 'ocaml', 'mli': 'ocaml', 'mm': 'objectivec', 'mma': 'mathematica', 'moo': 'mercury', 'moon': 'moonscript', 'nav': 'cal', 'nb': 'mathematica', 'nc': 'gcode', 'nginxconf': 'nginx', 'ni': 'inform7', 'nim': 'nimrod', 'nixos': 'nix', 'nsi': 'nsis', 'obj-c': 'objectivec', 'objc': 'objectivec', 'osascript': 'applescript', 'osl': 'rsl', 'p': 'parser3', 'p21': 'step21', 'pas': 'delphi', 'pascal': 'delphi', 'patch': 'diff', 'pb': 'purebasic', 'pbi': 'purebasic', 'pcmk': 'crmsh', 'pde': 'processing', 'pf.conf': 'pf', 'php3': 'php', 'php4': 'php', 'php5': 'php', 'php6': 'php', 'php7': 'php', 'pl': 'perl', 'plist': 'xml', 'pm': 'perl', 'podspec': 'ruby', 'postgres': 'pgsql', 'postgresql': 'pgsql', 'pp': 'puppet', 'proto': 'protobuf', 'ps': 'powershell', 'ps1': 'powershell', 'psd1': 'powershell', 'psm1': 'powershell', 'py': 'python', 'qt': 'qml', 'rb': 'ruby', 're': 'reasonml', 'rei': 'reasonml', 'rs': 'rust', 'rsc': 'routeros', 'rss': 'xml', 'rst': 'nohighlight', 's': 'armasm', 'SAS': 'sas', 'scad': 'openscad', 'sci': 'scilab', 'scm': 'scheme', 'sh': 'bash', 'sig': 'sml', 'sl': 'rsl', 'st': 'smalltalk', 'step': 'step21', 'stp': 'step21', 'styl': 'stylus', 'sv': 'verilog', 'svh': 'verilog', 'tao': 'xl', 'thor': 'ruby', 'tk': 'tcl', 'toml': 'ini', 'ts': 'typescript', 'txt': 'nohighlight', 'v': 'coq', 'vb': 'vbnet', 'vbs': 'vbscript', 'vhd': 'vhdl', 'wildfly-cli': 'jboss-cli', 'wl': 'mathematica', 'wls': 'mathematica', 'xhtml': 'xml', 'xjb': 'xml', 'xls': 'excel', 'xlsx': 'excel', 'xpath': 'xquery', 'xpo': 'axapta', 'xpp': 'axapta', 'xq': 'xquery', 'xqy': 'xquery', 'xsd': 'xml', 'xsl': 'xml', 'YAML': 'yaml', 'yml': 'yaml', 'zep': 'zephir', 'zone': 'dns', 'zsh': 'bash' } # dictionary mapping pygment lexers to hljs languages -_pygments_lexer_to_hljs_language = {} +_pygments_lexer_to_hljs_language = {} # type: Dict[str, str] # dictionary mapping mime types to hljs languages _mime_type_to_hljs_language = { 'text/x-c': 'cpp', 'text/x-c++': 'cpp', 'text/x-msdos-batch': 'dos', 'text/x-lisp': 'lisp', 'text/x-shellscript': 'bash', } # dictionary mapping filenames to hljs languages _filename_to_hljs_language = { 'cmakelists.txt': 'cmake', '.htaccess': 'apache', 'httpd.conf': 'apache', 'access.log': 'accesslog', 'nginx.log': 'accesslog', 'resolv.conf': 'dns', 'dockerfile': 'docker', 'nginx.conf': 'nginx', 'pf.conf': 'pf' } # function to fill the above dictionaries def _init_pygments_to_hljs_map(): if len(_pygments_lexer_to_hljs_language) == 0: for lexer in get_all_lexers(): lexer_name = lexer[0] lang_aliases = lexer[1] lang_mime_types = lexer[3] lang = None for lang_alias in lang_aliases: if lang_alias in _hljs_languages: lang = lang_alias _pygments_lexer_to_hljs_language[lexer_name] = lang_alias break if lang: for lang_mime_type in lang_mime_types: _mime_type_to_hljs_language[lang_mime_type] = lang def get_hljs_language_from_filename(filename): """Function that tries to associate a language supported by highlight.js from a filename. Args: filename: input filename Returns: highlight.js language id or None if no correspondence has been found """ _init_pygments_to_hljs_map() if filename: filename_lower = filename.lower() if filename_lower in _filename_to_hljs_language: return _filename_to_hljs_language[filename_lower] if filename_lower in _hljs_languages: return filename_lower exts = filename_lower.split('.') # check if file extension matches an hljs language # also handle .ext.in cases for ext in reversed(exts[-2:]): if ext in _hljs_languages: return ext if ext in _hljs_languages_aliases: return _hljs_languages_aliases[ext] # otherwise use Pygments language database lexer = None # try to find a Pygment lexer try: lexer = get_lexer_for_filename(filename) except Exception: pass # if there is a correspondence between the lexer and an hljs # language, return it if lexer and lexer.name in _pygments_lexer_to_hljs_language: return _pygments_lexer_to_hljs_language[lexer.name] # otherwise, try to find a match between the file extensions # associated to the lexer and the hljs language aliases if lexer: exts = [ext.replace('*.', '') for ext in lexer.filenames] for ext in exts: if ext in _hljs_languages_aliases: return _hljs_languages_aliases[ext] return None def get_hljs_language_from_mime_type(mime_type): """Function that tries to associate a language supported by highlight.js from a mime type. Args: mime_type: input mime type Returns: highlight.js language id or None if no correspondence has been found """ _init_pygments_to_hljs_map() if mime_type and mime_type in _mime_type_to_hljs_language: return _mime_type_to_hljs_language[mime_type] return None @functools.lru_cache() def get_supported_languages(): """ Return the list of programming languages that can be highlighted using the highlight.js library. Returns: List[str]: the list of supported languages """ return sorted(list(_hljs_languages)) diff --git a/swh/web/common/migrations/0001_initial.py b/swh/web/common/migrations/0001_initial.py index a112f750..b81f050f 100644 --- a/swh/web/common/migrations/0001_initial.py +++ b/swh/web/common/migrations/0001_initial.py @@ -1,75 +1,72 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information # flake8: noqa from __future__ import unicode_literals from django.db import migrations, models _authorized_origins = [ 'https://github.com/', 'https://gitlab.com/', 'https://bitbucket.org/', 'https://git.code.sf.net/', 'http://git.code.sf.net/', 'https://hg.code.sf.net/', 'http://hg.code.sf.net/', 'https://svn.code.sf.net/', 'http://svn.code.sf.net/' ] def _populate_save_authorized_origins(apps, schema_editor): SaveAuthorizedOrigin = apps.get_model('swh.web.common', 'SaveAuthorizedOrigin') for origin_url in _authorized_origins: SaveAuthorizedOrigin.objects.create(url=origin_url) class Migration(migrations.Migration): initial = True - dependencies = [ - ] - operations = [ migrations.CreateModel( name='SaveAuthorizedOrigin', fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('url', models.CharField(max_length=200)), ], options={ 'db_table': 'save_authorized_origin', }, ), migrations.CreateModel( name='SaveOriginRequest', fields=[ ('id', models.BigAutoField(primary_key=True, serialize=False)), ('request_date', models.DateTimeField(auto_now_add=True)), ('origin_type', models.CharField(max_length=200)), ('origin_url', models.CharField(max_length=200)), ('status', models.TextField(choices=[('accepted', 'accepted'), ('rejected', 'rejected'), ('pending', 'pending')], default='pending')), ('loading_task_id', models.IntegerField(default=-1)), ], options={ 'db_table': 'save_origin_request', 'ordering': ['-id'], }, ), migrations.CreateModel( name='SaveUnauthorizedOrigin', fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('url', models.CharField(max_length=200)), ], options={ 'db_table': 'save_unauthorized_origin', }, ), migrations.RunPython(_populate_save_authorized_origins) ] diff --git a/swh/web/common/urlsindex.py b/swh/web/common/urlsindex.py index 0c9649b7..08000426 100644 --- a/swh/web/common/urlsindex.py +++ b/swh/web/common/urlsindex.py @@ -1,76 +1,80 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +from typing import Dict, List + +import django.urls + from django.conf.urls import url from django.shortcuts import redirect class UrlsIndex(object): """ Simple helper class for centralizing url patterns of a Django web application. Derived classes should override the 'scope' class attribute otherwise all declared patterns will be grouped under the default one. """ - _urlpatterns = {} + _urlpatterns = {} # type: Dict[str, List[django.urls.URLPattern]] scope = 'default' @classmethod def add_url_pattern(cls, url_pattern, view, view_name=None): """ Class method that adds an url pattern to the current scope. Args: url_pattern: regex describing a Django url view: function implementing the Django view view_name: name of the view used to reverse the url """ if cls.scope not in cls._urlpatterns: cls._urlpatterns[cls.scope] = [] if view_name: cls._urlpatterns[cls.scope].append(url(url_pattern, view, name=view_name)) else: cls._urlpatterns[cls.scope].append(url(url_pattern, view)) @classmethod def add_redirect_for_checksum_args(cls, view_name, url_patterns, checksum_args): """ Class method that redirects to view with lowercase checksums when upper/mixed case checksums are passed as url arguments. Args: view_name (str): name of the view to redirect requests url_patterns (List[str]): regexps describing the view urls checksum_args (List[str]): url argument names corresponding to checksum values """ new_view_name = view_name+'-uppercase-checksum' for url_pattern in url_patterns: url_pattern_upper = url_pattern.replace('[0-9a-f]', '[0-9a-fA-F]') def view_redirect(request, *args, **kwargs): for checksum_arg in checksum_args: checksum_upper = kwargs[checksum_arg] kwargs[checksum_arg] = checksum_upper.lower() return redirect(view_name, *args, **kwargs) cls.add_url_pattern(url_pattern_upper, view_redirect, new_view_name) @classmethod def get_url_patterns(cls): """ Class method that returns the list of url pattern associated to the current scope. Returns: The list of url patterns associated to the current scope """ return cls._urlpatterns[cls.scope] diff --git a/swh/web/config.py b/swh/web/config.py index 19b30826..c506ef76 100644 --- a/swh/web/config.py +++ b/swh/web/config.py @@ -1,158 +1,160 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import os +from typing import Any, Dict + from swh.core import config from swh.indexer.storage import get_indexer_storage from swh.scheduler import get_scheduler from swh.storage import get_storage from swh.vault import get_vault from swh.web import settings SETTINGS_DIR = os.path.dirname(settings.__file__) DEFAULT_CONFIG = { 'allowed_hosts': ('list', []), 'storage': ('dict', { 'cls': 'remote', 'args': { 'url': 'http://127.0.0.1:5002/', 'timeout': 10, }, }), 'indexer_storage': ('dict', { 'cls': 'remote', 'args': { 'url': 'http://127.0.0.1:5007/', 'timeout': 1, } }), 'log_dir': ('string', '/tmp/swh/log'), 'debug': ('bool', False), 'serve_assets': ('bool', False), 'host': ('string', '127.0.0.1'), 'port': ('int', 5004), 'secret_key': ('string', 'development key'), # do not display code highlighting for content > 1MB 'content_display_max_size': ('int', 5 * 1024 * 1024), 'snapshot_content_max_size': ('int', 1000), 'throttling': ('dict', { 'cache_uri': None, # production: memcached as cache (127.0.0.1:11211) # development: in-memory cache so None 'scopes': { 'swh_api': { 'limiter_rate': { 'default': '120/h' }, 'exempted_networks': ['127.0.0.0/8'] }, 'swh_vault_cooking': { 'limiter_rate': { 'default': '120/h', 'GET': '60/m' }, 'exempted_networks': ['127.0.0.0/8'] }, 'swh_save_origin': { 'limiter_rate': { 'default': '120/h', 'POST': '10/h' }, 'exempted_networks': ['127.0.0.0/8'] }, 'swh_api_origin_visit_latest': { 'limiter_rate': { 'default': '700/m' }, 'exempted_networks': ['127.0.0.0/8'], }, } }), 'vault': ('dict', { 'cls': 'remote', 'args': { 'url': 'http://127.0.0.1:5005/', } }), 'scheduler': ('dict', { 'cls': 'remote', 'args': { 'url': 'http://127.0.0.1:5008/' } }), 'development_db': ('string', os.path.join(SETTINGS_DIR, 'db.sqlite3')), 'test_db': ('string', os.path.join(SETTINGS_DIR, 'testdb.sqlite3')), 'production_db': ('string', '/var/lib/swh/web.sqlite3'), 'deposit': ('dict', { 'private_api_url': 'https://deposit.softwareheritage.org/1/private/', 'private_api_user': 'swhworker', 'private_api_password': '' }), 'coverage_count_origins': ('bool', False), 'e2e_tests_mode': ('bool', False), 'es_workers_index_url': ('string', ''), 'history_counters_url': ('string', 'https://stats.export.softwareheritage.org/history_counters.json'), # noqa } -swhweb_config = {} +swhweb_config = {} # type: Dict[str, Any] def get_config(config_file='web/web'): """Read the configuration file `config_file`. If an environment variable SWH_CONFIG_FILENAME is defined, this takes precedence over the config_file parameter. In any case, update the app with parameters (secret_key, conf) and return the parsed configuration as a dict. If no configuration file is provided, return a default configuration. """ if not swhweb_config: config_filename = os.environ.get('SWH_CONFIG_FILENAME') if config_filename: config_file = config_filename cfg = config.load_named_config(config_file, DEFAULT_CONFIG) swhweb_config.update(cfg) config.prepare_folders(swhweb_config, 'log_dir') swhweb_config['storage'] = get_storage(**swhweb_config['storage']) swhweb_config['vault'] = get_vault(**swhweb_config['vault']) swhweb_config['indexer_storage'] = \ get_indexer_storage(**swhweb_config['indexer_storage']) swhweb_config['scheduler'] = get_scheduler( **swhweb_config['scheduler']) return swhweb_config def storage(): """Return the current application's storage. """ return get_config()['storage'] def vault(): """Return the current application's vault. """ return get_config()['vault'] def indexer_storage(): """Return the current application's indexer storage. """ return get_config()['indexer_storage'] def scheduler(): """Return the current application's scheduler. """ return get_config()['scheduler'] diff --git a/swh/web/py.typed b/swh/web/py.typed new file mode 100644 index 00000000..1242d432 --- /dev/null +++ b/swh/web/py.typed @@ -0,0 +1 @@ +# Marker file for PEP 561. diff --git a/swh/web/settings/tests.py b/swh/web/settings/tests.py index a91d22a9..12d2d4bf 100644 --- a/swh/web/settings/tests.py +++ b/swh/web/settings/tests.py @@ -1,103 +1,103 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information """ Django tests settings for swh-web. """ import sys from swh.web.config import get_config scope1_limiter_rate = 3 scope1_limiter_rate_post = 1 scope2_limiter_rate = 5 scope2_limiter_rate_post = 2 scope3_limiter_rate = 1 scope3_limiter_rate_post = 1 save_origin_rate_post = 10 swh_web_config = get_config() swh_web_config.update({ 'debug': False, 'secret_key': 'test', 'history_counters_url': '', 'throttling': { 'cache_uri': None, 'scopes': { 'swh_api': { 'limiter_rate': { 'default': '60/min' }, 'exempted_networks': ['127.0.0.0/8'] }, 'swh_api_origin_visit_latest': { 'limiter_rate': { 'default': '6000/min' }, 'exempted_networks': ['127.0.0.0/8'] }, 'swh_vault_cooking': { 'limiter_rate': { 'default': '120/h', 'GET': '60/m' }, 'exempted_networks': ['127.0.0.0/8'] }, 'swh_save_origin': { 'limiter_rate': { 'default': '120/h', 'POST': '%s/h' % save_origin_rate_post, } }, 'scope1': { 'limiter_rate': { 'default': '%s/min' % scope1_limiter_rate, 'POST': '%s/min' % scope1_limiter_rate_post, } }, 'scope2': { 'limiter_rate': { 'default': '%s/min' % scope2_limiter_rate, 'POST': '%s/min' % scope2_limiter_rate_post } }, 'scope3': { 'limiter_rate': { 'default': '%s/min' % scope3_limiter_rate, 'POST': '%s/min' % scope3_limiter_rate_post }, 'exempted_networks': ['127.0.0.0/8'] } } } }) from .common import * # noqa from .common import ALLOWED_HOSTS, LOGGING # noqa DATABASES = { 'default': { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': swh_web_config['test_db'], } } # when not running unit tests, make the webapp fetch data from memory storages if 'pytest' not in sys.argv[0]: swh_web_config.update({ 'debug': True, 'e2e_tests_mode': True }) from swh.web.tests.data import get_tests_data, override_storages # noqa test_data = get_tests_data() override_storages(test_data['storage'], test_data['idx_storage']) else: ALLOWED_HOSTS += ['testserver'] # Silent DEBUG output when running unit tests - LOGGING['handlers']['console']['level'] = 'INFO' + LOGGING['handlers']['console']['level'] = 'INFO' # type: ignore diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py index bb651049..8ffb740e 100644 --- a/swh/web/tests/data.py +++ b/swh/web/tests/data.py @@ -1,466 +1,467 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -from copy import deepcopy import os import random +from copy import deepcopy +from typing import Dict from rest_framework.decorators import api_view from rest_framework.response import Response from swh.indexer.fossology_license import FossologyLicenseIndexer from swh.indexer.mimetype import MimetypeIndexer from swh.indexer.ctags import CtagsIndexer from swh.indexer.storage import get_indexer_storage from swh.model.from_disk import Directory from swh.model.hashutil import hash_to_hex, hash_to_bytes, DEFAULT_ALGORITHMS from swh.model.identifiers import directory_identifier from swh.loader.git.from_disk import GitLoaderFromArchive from swh.storage.algos.dir_iterators import dir_iterator from swh.web import config from swh.web.browse.utils import ( get_mimetype_and_encoding_for_content, prepare_content_for_display ) from swh.web.common import service from swh.web.common.highlightjs import get_hljs_language_from_filename # Module used to initialize data that will be provided as tests input # Configuration for git loader _TEST_LOADER_CONFIG = { 'storage': { 'cls': 'memory', 'args': {} }, 'send_contents': True, 'send_directories': True, 'send_revisions': True, 'send_releases': True, 'send_snapshot': True, 'content_size_limit': 100 * 1024 * 1024, 'content_packet_size': 10, 'content_packet_size_bytes': 100 * 1024 * 1024, 'directory_packet_size': 10, 'revision_packet_size': 10, 'release_packet_size': 10, 'save_data': False, } # Base content indexer configuration _TEST_INDEXER_BASE_CONFIG = { 'storage': { 'cls': 'memory', 'args': {}, }, 'objstorage': { 'cls': 'memory', 'args': {}, }, 'indexer_storage': { 'cls': 'memory', 'args': {}, } } def random_sha1(): return hash_to_hex(bytes(random.randint(0, 255) for _ in range(20))) def random_sha256(): return hash_to_hex(bytes(random.randint(0, 255) for _ in range(32))) def random_blake2s256(): return hash_to_hex(bytes(random.randint(0, 255) for _ in range(32))) def random_content(): return { 'sha1': random_sha1(), 'sha1_git': random_sha1(), 'sha256': random_sha256(), 'blake2s256': random_blake2s256(), } # MimetypeIndexer with custom configuration for tests class _MimetypeIndexer(MimetypeIndexer): def parse_config_file(self, *args, **kwargs): return { **_TEST_INDEXER_BASE_CONFIG, 'tools': { 'name': 'file', 'version': '1:5.30-1+deb9u1', 'configuration': { "type": "library", "debian-package": "python3-magic" } } } # FossologyLicenseIndexer with custom configuration for tests class _FossologyLicenseIndexer(FossologyLicenseIndexer): def parse_config_file(self, *args, **kwargs): return { **_TEST_INDEXER_BASE_CONFIG, 'workdir': '/tmp/swh/indexer.fossology.license', 'tools': { 'name': 'nomos', 'version': '3.1.0rc2-31-ga2cbb8c', 'configuration': { 'command_line': 'nomossa ', }, } } # CtagsIndexer with custom configuration for tests class _CtagsIndexer(CtagsIndexer): def parse_config_file(self, *args, **kwargs): return { **_TEST_INDEXER_BASE_CONFIG, 'workdir': '/tmp/swh/indexer.ctags', 'languages': {'c': 'c'}, 'tools': { 'name': 'universal-ctags', 'version': '~git7859817b', 'configuration': { 'command_line': '''ctags --fields=+lnz --sort=no --links=no ''' # noqa '''--output-format=json ''' }, } } # Lightweight git repositories that will be loaded to generate # input data for tests _TEST_ORIGINS = [ { 'type': 'git', 'url': 'https://github.com/wcoder/highlightjs-line-numbers.js', 'archives': ['highlightjs-line-numbers.js.zip', 'highlightjs-line-numbers.js_visit2.zip'], 'visit_date': ['Dec 1 2018, 01:00 UTC', 'Jan 20 2019, 15:00 UTC'] }, { 'type': 'git', 'url': 'https://github.com/memononen/libtess2', 'archives': ['libtess2.zip'], 'visit_date': ['May 25 2018, 01:00 UTC'] }, { 'type': 'git', 'url': 'repo_with_submodules', 'archives': ['repo_with_submodules.tgz'], 'visit_date': ['Jan 1 2019, 01:00 UTC'] } ] _contents = {} # Tests data initialization def _init_tests_data(): # Load git repositories from archives loader = GitLoaderFromArchive(config=_TEST_LOADER_CONFIG) # Get reference to the memory storage storage = loader.storage for origin in _TEST_ORIGINS: for i, archive in enumerate(origin['archives']): origin_repo_archive = \ os.path.join(os.path.dirname(__file__), 'resources/repos/%s' % archive) loader.load(origin['url'], origin_repo_archive, origin['visit_date'][i]) origin.update(storage.origin_get(origin)) # add an 'id' key if enabled contents = set() directories = set() revisions = set() releases = set() snapshots = set() content_path = {} # Get all objects loaded into the test archive for origin in _TEST_ORIGINS: snp = storage.snapshot_get_latest(origin['url']) snapshots.add(hash_to_hex(snp['id'])) for branch_name, branch_data in snp['branches'].items(): if branch_data['target_type'] == 'revision': revisions.add(branch_data['target']) elif branch_data['target_type'] == 'release': release = next(storage.release_get([branch_data['target']])) revisions.add(release['target']) releases.add(hash_to_hex(branch_data['target'])) for rev_log in storage.revision_shortlog(set(revisions)): rev_id = rev_log[0] revisions.add(rev_id) for rev in storage.revision_get(revisions): dir_id = rev['directory'] directories.add(hash_to_hex(dir_id)) for entry in dir_iterator(storage, dir_id): content_path[entry['sha1']] = '/'.join( [hash_to_hex(dir_id), entry['path'].decode('utf-8')]) if entry['type'] == 'file': contents.add(entry['sha1']) elif entry['type'] == 'dir': directories.add(hash_to_hex(entry['target'])) # Get all checksums for each content contents_metadata = storage.content_get_metadata(contents) contents = [] for content_metadata in contents_metadata: contents.append({ algo: hash_to_hex(content_metadata[algo]) for algo in DEFAULT_ALGORITHMS }) path = content_path[content_metadata['sha1']] cnt = next(storage.content_get([content_metadata['sha1']])) mimetype, encoding = get_mimetype_and_encoding_for_content(cnt['data']) content_display_data = prepare_content_for_display( cnt['data'], mimetype, path) contents[-1]['path'] = path contents[-1]['mimetype'] = mimetype contents[-1]['encoding'] = encoding contents[-1]['hljs_language'] = content_display_data['language'] contents[-1]['data'] = content_display_data['content_data'] _contents[contents[-1]['sha1']] = contents[-1] # Create indexer storage instance that will be shared by indexers idx_storage = get_indexer_storage('memory', {}) # Add the empty directory to the test archive empty_dir_id = directory_identifier({'entries': []}) empty_dir_id_bin = hash_to_bytes(empty_dir_id) storage.directory_add([{'id': empty_dir_id_bin, 'entries': []}]) # Return tests data return { 'storage': storage, 'idx_storage': idx_storage, 'origins': _TEST_ORIGINS, 'contents': contents, 'directories': list(directories), 'releases': list(releases), 'revisions': list(map(hash_to_hex, revisions)), 'snapshots': list(snapshots), 'generated_checksums': set(), } def _init_indexers(tests_data): # Instantiate content indexers that will be used in tests # and force them to use the memory storages indexers = {} for idx_name, idx_class in (('mimetype_indexer', _MimetypeIndexer), ('license_indexer', _FossologyLicenseIndexer), ('ctags_indexer', _CtagsIndexer)): idx = idx_class() idx.storage = tests_data['storage'] idx.objstorage = tests_data['storage'].objstorage idx.idx_storage = tests_data['idx_storage'] idx.register_tools(idx.config['tools']) indexers[idx_name] = idx return indexers def get_content(content_sha1): return _contents.get(content_sha1) _tests_data = None _current_tests_data = None _indexer_loggers = {} def get_tests_data(reset=False): """ Initialize tests data and return them in a dict. """ global _tests_data, _current_tests_data if _tests_data is None: _tests_data = _init_tests_data() indexers = _init_indexers(_tests_data) for (name, idx) in indexers.items(): # pytest makes the loggers use a temporary file; and deepcopy # requires serializability. So we remove them, and add them # back after the copy. _indexer_loggers[name] = idx.log del idx.log _tests_data.update(indexers) if reset or _current_tests_data is None: _current_tests_data = deepcopy(_tests_data) for (name, logger) in _indexer_loggers.items(): _current_tests_data[name].log = logger return _current_tests_data def override_storages(storage, idx_storage): """ Helper function to replace the storages from which archive data are fetched. """ swh_config = config.get_config() swh_config.update({'storage': storage}) service.storage = storage swh_config.update({'indexer_storage': idx_storage}) service.idx_storage = idx_storage # Implement some special endpoints used to provide input tests data # when executing end to end tests with cypress -_content_code_data_exts = {} -_content_code_data_filenames = {} -_content_other_data_exts = {} +_content_code_data_exts = {} # type: Dict[str, Dict[str, str]] +_content_code_data_filenames = {} # type: Dict[str, Dict[str, str]] +_content_other_data_exts = {} # type: Dict[str, Dict[str, str]] def _init_content_tests_data(data_path, data_dict, ext_key): """ Helper function to read the content of a directory, store it into a test archive and add some files metadata (sha1 and/or expected programming language) in a dict. Args: data_path (str): path to a directory relative to the tests folder of swh-web data_dict (dict): the dict that will store files metadata ext_key (bool): whether to use file extensions or filenames as dict keys """ test_contents_dir = os.path.join( os.path.dirname(__file__), data_path).encode('utf-8') directory = Directory.from_disk(path=test_contents_dir, data=True, save_path=True) objects = directory.collect() for c in objects['content'].values(): c['status'] = 'visible' sha1 = hash_to_hex(c['sha1']) if ext_key: key = c['path'].decode('utf-8').split('.')[-1] filename = 'test.' + key else: filename = c['path'].decode('utf-8').split('/')[-1] key = filename language = get_hljs_language_from_filename(filename) data_dict[key] = {'sha1': sha1, 'language': language} del c['path'] del c['perms'] storage = get_tests_data()['storage'] storage.content_add(objects['content'].values()) def _init_content_code_data_exts(): """ Fill a global dictionary which maps source file extension to a code content example. """ global _content_code_data_exts _init_content_tests_data('resources/contents/code/extensions', _content_code_data_exts, True) def _init_content_other_data_exts(): """ Fill a global dictionary which maps a file extension to a content example. """ global _content_other_data_exts _init_content_tests_data('resources/contents/other/extensions', _content_other_data_exts, True) def _init_content_code_data_filenames(): """ Fill a global dictionary which maps a filename to a content example. """ global _content_code_data_filenames _init_content_tests_data('resources/contents/code/filenames', _content_code_data_filenames, False) if config.get_config()['e2e_tests_mode']: _init_content_code_data_exts() _init_content_other_data_exts() _init_content_code_data_filenames() @api_view(['GET']) def get_content_code_data_all_exts(request): """ Endpoint implementation returning a list of all source file extensions to test for highlighting using cypress. """ return Response(sorted(_content_code_data_exts.keys()), status=200, content_type='application/json') @api_view(['GET']) def get_content_code_data_by_ext(request, ext): """ Endpoint implementation returning metadata of a code content example based on the source file extension. """ data = None status = 404 if ext in _content_code_data_exts: data = _content_code_data_exts[ext] status = 200 return Response(data, status=status, content_type='application/json') @api_view(['GET']) def get_content_other_data_by_ext(request, ext): """ Endpoint implementation returning metadata of a content example based on the file extension. """ _init_content_other_data_exts() data = None status = 404 if ext in _content_other_data_exts: data = _content_other_data_exts[ext] status = 200 return Response(data, status=status, content_type='application/json') @api_view(['GET']) def get_content_code_data_all_filenames(request): """ Endpoint implementation returning a list of all source filenames to test for highlighting using cypress. """ return Response(sorted(_content_code_data_filenames.keys()), status=200, content_type='application/json') @api_view(['GET']) def get_content_code_data_by_filename(request, filename): """ Endpoint implementation returning metadata of a code content example based on the source filename. """ data = None status = 404 if filename in _content_code_data_filenames: data = _content_code_data_filenames[filename] status = 200 return Response(data, status=status, content_type='application/json') diff --git a/tox.ini b/tox.ini index a2aec2e1..0c360be4 100644 --- a/tox.ini +++ b/tox.ini @@ -1,26 +1,35 @@ [tox] -envlist=flake8,py3 +envlist=flake8,mypy,py3 [testenv:py3] deps = .[testing] pytest-cov pytest-django commands = pytest --hypothesis-profile=swh-web-fast --cov {envsitepackagesdir}/swh/web --cov-branch {posargs} {envsitepackagesdir}/swh/web [testenv:py3-slow] deps = .[testing] pytest-cov pytest-django commands = pytest --hypothesis-profile=swh-web --cov {envsitepackagesdir}/swh/web --cov-branch {posargs} {envsitepackagesdir}/swh/web [testenv:flake8] skip_install = true deps = flake8 commands = {envpython} -m flake8 \ --exclude=.tox,.git,__pycache__,.eggs,*.egg,node_modules + +[testenv:mypy] +setenv = DJANGO_SETTINGS_MODULE = swh.web.settings.development +skip_install = true +deps = + mypy + .[testing] +commands = + mypy swh