diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,6 +37,11 @@ language: system types: [javascript] +- repo: https://github.com/python/black + rev: 19.10b0 + hooks: + - id: black + # unfortunately, we are far from being able to enable this... # - repo: https://github.com/PyCQA/pydocstyle.git # rev: 4.0.0 @@ -47,15 +52,3 @@ # entry: pydocstyle --convention=google # language: python # types: [python] - -# black requires py3.6+ -#- repo: https://github.com/python/black -# rev: 19.3b0 -# hooks: -# - id: black -# language_version: python3 -#- repo: https://github.com/asottile/blacken-docs -# rev: v1.0.0-1 -# hooks: -# - id: blacken-docs -# additional_dependencies: [black==19.3b0] diff --git a/docs/conf.py b/docs/conf.py --- a/docs/conf.py +++ b/docs/conf.py @@ -3,13 +3,14 @@ import django import os -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'swh.web.settings.development') +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "swh.web.settings.development") django.setup() import swh.docs.sphinx.conf as sphinx_conf from swh.web.doc_config import customize_sphinx_conf + customize_sphinx_conf(sphinx_conf) from swh.docs.sphinx.conf import * diff --git a/setup.cfg b/setup.cfg --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,10 @@ [radon] exclude = swh/web/tests/resources/* + +[flake8] +# E203: whitespaces before ':' +# E231: missing whitespace after ',' +# W503: line break before binary operator +ignore = E203,E231,W503 +max-line-length = 88 + diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -14,15 +14,15 @@ here = path.abspath(path.dirname(__file__)) # Get the long description from the README file -with open(path.join(here, 'README.md'), encoding='utf-8') as f: +with open(path.join(here, "README.md"), encoding="utf-8") as f: long_description = f.read() def parse_requirements(name=None): if name: - reqf = 'requirements-%s.txt' % name + reqf = "requirements-%s.txt" % name else: - reqf = 'requirements.txt' + reqf = "requirements.txt" requirements = [] if not path.exists(reqf): @@ -31,7 +31,7 @@ with open(reqf) as f: for line in f.readlines(): line = line.strip() - if not line or line.startswith('#'): + if not line or line.startswith("#"): continue requirements.append(line) return requirements @@ -39,23 +39,23 @@ # package generated static assets as module data files data_files = [] -for root, _, files in os.walk('static/'): +for root, _, files in os.walk("static/"): root_files = [os.path.join(root, i) for i in files] - data_files.append((os.path.join('share/swh/web', root), root_files)) + data_files.append((os.path.join("share/swh/web", root), root_files)) setup( - name='swh.web', - description='Software Heritage Web UI', + name="swh.web", + description="Software Heritage Web UI", long_description=long_description, - long_description_content_type='text/markdown', - author='Software Heritage developers', - author_email='swh-devel@inria.fr', - url='https://forge.softwareheritage.org/diffusion/DWUI/', + long_description_content_type="text/markdown", + author="Software Heritage developers", + author_email="swh-devel@inria.fr", + url="https://forge.softwareheritage.org/diffusion/DWUI/", packages=find_packages(), scripts=[], - install_requires=parse_requirements() + parse_requirements('swh'), - setup_requires=['vcversioner'], - extras_require={'testing': parse_requirements('test')}, + install_requires=parse_requirements() + parse_requirements("swh"), + setup_requires=["vcversioner"], + extras_require={"testing": parse_requirements("test")}, vcversioner={}, include_package_data=True, classifiers=[ @@ -67,9 +67,9 @@ "Framework :: Django", ], project_urls={ - 'Bug Reports': 'https://forge.softwareheritage.org/maniphest', - 'Funding': 'https://www.softwareheritage.org/donate', - 'Source': 'https://forge.softwareheritage.org/source/swh-web', + "Bug Reports": "https://forge.softwareheritage.org/maniphest", + "Funding": "https://www.softwareheritage.org/donate", + "Source": "https://forge.softwareheritage.org/source/swh-web", }, - data_files=data_files + data_files=data_files, ) diff --git a/swh/web/admin/adminurls.py b/swh/web/admin/adminurls.py --- a/swh/web/admin/adminurls.py +++ b/swh/web/admin/adminurls.py @@ -11,7 +11,7 @@ Class to manage swh-web admin urls. """ - scope = 'admin' + scope = "admin" def admin_route(*url_patterns, view_name=None): @@ -24,7 +24,7 @@ view_name: the name of the Django view associated to the routes used to reverse the url """ - url_patterns = ['^' + url_pattern + '$' for url_pattern in url_patterns] + url_patterns = ["^" + url_pattern + "$" for url_pattern in url_patterns] def decorator(f): # register the route and its view in the browse endpoints index diff --git a/swh/web/admin/deposit.py b/swh/web/admin/deposit.py --- a/swh/web/admin/deposit.py +++ b/swh/web/admin/deposit.py @@ -19,77 +19,85 @@ from swh.web.admin.adminurls import admin_route from swh.web.config import get_config -config = get_config()['deposit'] +config = get_config()["deposit"] -@admin_route(r'deposit/', view_name='admin-deposit') +@admin_route(r"deposit/", view_name="admin-deposit") @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save(request): - return render(request, 'admin/deposit.html') + return render(request, "admin/deposit.html") -@admin_route(r'deposit/list/', - view_name='admin-deposit-list') +@admin_route(r"deposit/list/", view_name="admin-deposit-list") @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_deposit_list(request): table_data = {} - table_data['draw'] = int(request.GET['draw']) - deposits_list_url = config['private_api_url'] + 'deposits' - deposits_list_auth = HTTPBasicAuth(config['private_api_user'], - config['private_api_password']) + table_data["draw"] = int(request.GET["draw"]) + deposits_list_url = config["private_api_url"] + "deposits" + deposits_list_auth = HTTPBasicAuth( + config["private_api_user"], config["private_api_password"] + ) try: - nb_deposits = requests.get('%s?page_size=1' % deposits_list_url, - auth=deposits_list_auth, - timeout=30).json()['count'] - - deposits_data = cache.get('swh-deposit-list') - if not deposits_data or deposits_data['count'] != nb_deposits: - deposits_data = requests.get('%s?page_size=%s' % - (deposits_list_url, nb_deposits), - auth=deposits_list_auth, - timeout=30).json() - cache.set('swh-deposit-list', deposits_data) - - deposits = deposits_data['results'] - - search_value = request.GET['search[value]'] + nb_deposits = requests.get( + "%s?page_size=1" % deposits_list_url, auth=deposits_list_auth, timeout=30 + ).json()["count"] + + deposits_data = cache.get("swh-deposit-list") + if not deposits_data or deposits_data["count"] != nb_deposits: + deposits_data = requests.get( + "%s?page_size=%s" % (deposits_list_url, nb_deposits), + auth=deposits_list_auth, + timeout=30, + ).json() + cache.set("swh-deposit-list", deposits_data) + + deposits = deposits_data["results"] + + search_value = request.GET["search[value]"] if search_value: - deposits = \ - [d for d in deposits - if any(search_value.lower() in val - for val in [str(v).lower() for v in d.values()])] - - column_order = request.GET['order[0][column]'] - field_order = request.GET['columns[%s][name]' % column_order] - order_dir = request.GET['order[0][dir]'] - - deposits = sorted(deposits, key=lambda d: d[field_order] or '') - if order_dir == 'desc': + deposits = [ + d + for d in deposits + if any( + search_value.lower() in val + for val in [str(v).lower() for v in d.values()] + ) + ] + + column_order = request.GET["order[0][column]"] + field_order = request.GET["columns[%s][name]" % column_order] + order_dir = request.GET["order[0][dir]"] + + deposits = sorted(deposits, key=lambda d: d[field_order] or "") + if order_dir == "desc": deposits = list(reversed(deposits)) - length = int(request.GET['length']) - page = int(request.GET['start']) / length + 1 + length = int(request.GET["length"]) + page = int(request.GET["start"]) / length + 1 paginator = Paginator(deposits, length) data = paginator.page(page).object_list - table_data['recordsTotal'] = deposits_data['count'] - table_data['recordsFiltered'] = len(deposits) - table_data['data'] = [{ - 'id': d['id'], - 'external_id': d['external_id'], - 'reception_date': d['reception_date'], - 'status': d['status'], - 'status_detail': d['status_detail'], - 'swh_anchor_id': d['swh_anchor_id'], - 'swh_anchor_id_context': d['swh_anchor_id_context'], - 'swh_id': d['swh_id'], - 'swh_id_context': d['swh_id_context'] - } for d in data] + table_data["recordsTotal"] = deposits_data["count"] + table_data["recordsFiltered"] = len(deposits) + table_data["data"] = [ + { + "id": d["id"], + "external_id": d["external_id"], + "reception_date": d["reception_date"], + "status": d["status"], + "status_detail": d["status_detail"], + "swh_anchor_id": d["swh_anchor_id"], + "swh_anchor_id_context": d["swh_anchor_id_context"], + "swh_id": d["swh_id"], + "swh_id_context": d["swh_id_context"], + } + for d in data + ] except Exception as exc: sentry_sdk.capture_exception(exc) - table_data['error'] = ('An error occurred while retrieving ' - 'the list of deposits !') + table_data["error"] = ( + "An error occurred while retrieving " "the list of deposits !" + ) - return HttpResponse(json.dumps(table_data), - content_type='application/json') + return HttpResponse(json.dumps(table_data), content_type="application/json") diff --git a/swh/web/admin/origin_save.py b/swh/web/admin/origin_save.py --- a/swh/web/admin/origin_save.py +++ b/swh/web/admin/origin_save.py @@ -16,57 +16,65 @@ from swh.web.admin.adminurls import admin_route from swh.web.common.models import ( - SaveAuthorizedOrigin, SaveUnauthorizedOrigin, SaveOriginRequest + SaveAuthorizedOrigin, + SaveUnauthorizedOrigin, + SaveOriginRequest, ) from swh.web.common.origin_save import ( - create_save_origin_request, get_save_origin_task_info, - SAVE_REQUEST_PENDING, SAVE_REQUEST_REJECTED + create_save_origin_request, + get_save_origin_task_info, + SAVE_REQUEST_PENDING, + SAVE_REQUEST_REJECTED, ) -@admin_route(r'origin/save/', view_name='admin-origin-save') +@admin_route(r"origin/save/", view_name="admin-origin-save") @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save(request): - return render(request, 'admin/origin-save.html') + return render(request, "admin/origin-save.html") def _datatables_origin_urls_response(request, urls_query_set): - search_value = request.GET['search[value]'] + search_value = request.GET["search[value]"] if search_value: urls_query_set = urls_query_set.filter(url__icontains=search_value) - column_order = request.GET['order[0][column]'] - field_order = request.GET['columns[%s][name]' % column_order] - order_dir = request.GET['order[0][dir]'] - if order_dir == 'desc': - field_order = '-' + field_order + column_order = request.GET["order[0][column]"] + field_order = request.GET["columns[%s][name]" % column_order] + order_dir = request.GET["order[0][dir]"] + if order_dir == "desc": + field_order = "-" + field_order urls_query_set = urls_query_set.order_by(field_order) table_data = {} - table_data['draw'] = int(request.GET['draw']) - table_data['recordsTotal'] = urls_query_set.count() - table_data['recordsFiltered'] = urls_query_set.count() - length = int(request.GET['length']) - page = int(request.GET['start']) / length + 1 + table_data["draw"] = int(request.GET["draw"]) + table_data["recordsTotal"] = urls_query_set.count() + table_data["recordsFiltered"] = urls_query_set.count() + length = int(request.GET["length"]) + page = int(request.GET["start"]) / length + 1 paginator = Paginator(urls_query_set, length) urls_query_set = paginator.page(page).object_list - table_data['data'] = [{'url': u.url} for u in urls_query_set] - table_data_json = json.dumps(table_data, separators=(',', ': ')) - return HttpResponse(table_data_json, content_type='application/json') + table_data["data"] = [{"url": u.url} for u in urls_query_set] + table_data_json = json.dumps(table_data, separators=(",", ": ")) + return HttpResponse(table_data_json, content_type="application/json") -@admin_route(r'origin/save/authorized_urls/list/', - view_name='admin-origin-save-authorized-urls-list') +@admin_route( + r"origin/save/authorized_urls/list/", + view_name="admin-origin-save-authorized-urls-list", +) @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_authorized_urls_list(request): authorized_urls = SaveAuthorizedOrigin.objects.all() return _datatables_origin_urls_response(request, authorized_urls) -@admin_route(r'origin/save/authorized_urls/add/(?P.+)/', - view_name='admin-origin-save-add-authorized-url') +@admin_route( + r"origin/save/authorized_urls/add/(?P.+)/", + view_name="admin-origin-save-add-authorized-url", +) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_add_authorized_url(request, origin_url): @@ -76,9 +84,9 @@ # add the new authorized url SaveAuthorizedOrigin.objects.create(url=origin_url) # check if pending save requests with that url prefix exist - pending_save_requests = \ - SaveOriginRequest.objects.filter(origin_url__startswith=origin_url, - status=SAVE_REQUEST_PENDING) + pending_save_requests = SaveOriginRequest.objects.filter( + origin_url__startswith=origin_url, status=SAVE_REQUEST_PENDING + ) # create origin save tasks for previously pending requests for psr in pending_save_requests: create_save_origin_request(psr.visit_type, psr.origin_url) @@ -88,8 +96,10 @@ return HttpResponse(status=status_code) -@admin_route(r'origin/save/authorized_urls/remove/(?P.+)/', - view_name='admin-origin-save-remove-authorized-url') +@admin_route( + r"origin/save/authorized_urls/remove/(?P.+)/", + view_name="admin-origin-save-remove-authorized-url", +) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_remove_authorized_url(request, origin_url): @@ -103,16 +113,20 @@ return HttpResponse(status=status_code) -@admin_route(r'origin/save/unauthorized_urls/list/', - view_name='admin-origin-save-unauthorized-urls-list') +@admin_route( + r"origin/save/unauthorized_urls/list/", + view_name="admin-origin-save-unauthorized-urls-list", +) @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_unauthorized_urls_list(request): unauthorized_urls = SaveUnauthorizedOrigin.objects.all() return _datatables_origin_urls_response(request, unauthorized_urls) -@admin_route(r'origin/save/unauthorized_urls/add/(?P.+)/', - view_name='admin-origin-save-add-unauthorized-url') +@admin_route( + r"origin/save/unauthorized_urls/add/(?P.+)/", + view_name="admin-origin-save-add-unauthorized-url", +) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_add_unauthorized_url(request, origin_url): @@ -121,9 +135,9 @@ except ObjectDoesNotExist: SaveUnauthorizedOrigin.objects.create(url=origin_url) # check if pending save requests with that url prefix exist - pending_save_requests = \ - SaveOriginRequest.objects.filter(origin_url__startswith=origin_url, - status=SAVE_REQUEST_PENDING) + pending_save_requests = SaveOriginRequest.objects.filter( + origin_url__startswith=origin_url, status=SAVE_REQUEST_PENDING + ) # mark pending requests as rejected for psr in pending_save_requests: psr.status = SAVE_REQUEST_REJECTED @@ -134,8 +148,10 @@ return HttpResponse(status=status_code) -@admin_route(r'origin/save/unauthorized_urls/remove/(?P.+)/', - view_name='admin-origin-save-remove-unauthorized-url') +@admin_route( + r"origin/save/unauthorized_urls/remove/(?P.+)/", + view_name="admin-origin-save-remove-unauthorized-url", +) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_remove_unauthorized_url(request, origin_url): @@ -149,8 +165,10 @@ return HttpResponse(status=status_code) -@admin_route(r'origin/save/request/accept/(?P.+)/url/(?P.+)/', # noqa - view_name='admin-origin-save-request-accept') +@admin_route( + r"origin/save/request/accept/(?P.+)/url/(?P.+)/", + view_name="admin-origin-save-request-accept", +) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_request_accept(request, visit_type, origin_url): @@ -162,8 +180,10 @@ return HttpResponse(status=200) -@admin_route(r'origin/save/request/reject/(?P.+)/url/(?P.+)/', # noqa - view_name='admin-origin-save-request-reject') +@admin_route( + r"origin/save/request/reject/(?P.+)/url/(?P.+)/", + view_name="admin-origin-save-request-reject", +) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_request_reject(request, visit_type, origin_url): @@ -171,16 +191,18 @@ SaveUnauthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: SaveUnauthorizedOrigin.objects.create(url=origin_url) - sor = SaveOriginRequest.objects.get(visit_type=visit_type, - origin_url=origin_url, - status=SAVE_REQUEST_PENDING) + sor = SaveOriginRequest.objects.get( + visit_type=visit_type, origin_url=origin_url, status=SAVE_REQUEST_PENDING + ) sor.status = SAVE_REQUEST_REJECTED sor.save() return HttpResponse(status=200) -@admin_route(r'origin/save/request/remove/(?P.+)/', - view_name='admin-origin-save-request-remove') +@admin_route( + r"origin/save/request/remove/(?P.+)/", + view_name="admin-origin-save-request-remove", +) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_request_remove(request, sor_id): @@ -194,13 +216,14 @@ return HttpResponse(status=status_code) -@admin_route(r'origin/save/task/info/(?P.+)/', - view_name='admin-origin-save-task-info') +@admin_route( + r"origin/save/task/info/(?P.+)/", + view_name="admin-origin-save-task-info", +) @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _save_origin_task_info(request, save_request_id): request_info = get_save_origin_task_info(save_request_id) - for date_field in ('scheduled', 'started', 'ended'): + for date_field in ("scheduled", "started", "ended"): if date_field in request_info and request_info[date_field] is not None: request_info[date_field] = request_info[date_field].isoformat() - return HttpResponse(json.dumps(request_info), - content_type='application/json') + return HttpResponse(json.dumps(request_info), content_type="application/json") diff --git a/swh/web/admin/urls.py b/swh/web/admin/urls.py --- a/swh/web/admin/urls.py +++ b/swh/web/admin/urls.py @@ -9,19 +9,17 @@ from swh.web.admin.adminurls import AdminUrls -import swh.web.admin.origin_save # noqa -import swh.web.admin.deposit # noqa +import swh.web.admin.origin_save # noqa +import swh.web.admin.deposit # noqa def _admin_default_view(request): - return redirect('admin-origin-save') + return redirect("admin-origin-save") urlpatterns = [ - url(r'^$', _admin_default_view, name='admin'), - url(r'^login/$', - LoginView.as_view(template_name='login.html'), - name='login'), + url(r"^$", _admin_default_view, name="admin"), + url(r"^login/$", LoginView.as_view(template_name="login.html"), name="login"), ] urlpatterns += AdminUrls.get_url_patterns() diff --git a/swh/web/api/apidoc.py b/swh/web/api/apidoc.py --- a/swh/web/api/apidoc.py +++ b/swh/web/api/apidoc.py @@ -32,23 +32,23 @@ """ # httpdomain roles we want to parse (based on sphinxcontrib.httpdomain 1.6) - parameter_roles = ('param', 'parameter', 'arg', 'argument') + parameter_roles = ("param", "parameter", "arg", "argument") - request_json_object_roles = ('reqjsonobj', 'reqjson', 'jsonobj', '>json') + response_json_object_roles = ("resjsonobj", "resjson", ">jsonobj", ">json") - response_json_array_roles = ('resjsonarr', '>jsonarr') + response_json_array_roles = ("resjsonarr", ">jsonarr") - query_parameter_roles = ('queryparameter', 'queryparam', 'qparam', 'query') + query_parameter_roles = ("queryparameter", "queryparam", "qparam", "query") - request_header_roles = ('header', 'resheader', 'responseheader') + response_header_roles = (">header", "resheader", "responseheader") - status_code_roles = ('statuscode', 'status', 'code') + status_code_roles = ("statuscode", "status", "code") def __init__(self, document, data): super().__init__(document) @@ -70,33 +70,36 @@ paragraph into a valid raw rst string (as the apidoc documentation transform rst to html when rendering). """ - par = par.replace('\n', ' ') + par = par.replace("\n", " ") # keep emphasized, strong and literal text - par = par.replace('', '*') - par = par.replace('', '*') - par = par.replace('', '**') - par = par.replace('', '**') - par = par.replace('', '``') - par = par.replace('', '``') + par = par.replace("", "*") + par = par.replace("", "*") + par = par.replace("", "**") + par = par.replace("", "**") + par = par.replace("", "``") + par = par.replace("", "``") # keep links to web pages - if '', - r'`\1 <\2>`_', par) + if "', + r"`\1 <\2>`_", + par, + ) # remove parsed document markups but keep rst links - par = re.sub(r'<[^<]+?>(?!`_)', '', par) + par = re.sub(r"<[^<]+?>(?!`_)", "", par) # api urls cleanup to generate valid links afterwards subs_made = 1 while subs_made: - (par, subs_made) = re.subn(r'(:http:.*)(\(\w+\))', r'\1', par) + (par, subs_made) = re.subn(r"(:http:.*)(\(\w+\))", r"\1", par) subs_made = 1 while subs_made: - (par, subs_made) = re.subn(r'(:http:.*)(\[.*\])', r'\1', par) - par = par.replace('//', '/') + (par, subs_made) = re.subn(r"(:http:.*)(\[.*\])", r"\1", par) + par = par.replace("//", "/") # transform references to api endpoints doc into valid rst links - par = re.sub(':http:get:`([^,`]*)`', r'`\1 <\1doc/>`_', par) + par = re.sub(":http:get:`([^,`]*)`", r"`\1 <\1doc/>`_", par) # transform references to some elements into bold text - par = re.sub(':http:header:`(.*)`', r'**\1**', par) - par = re.sub(':func:`(.*)`', r'**\1**', par) + par = re.sub(":http:header:`(.*)`", r"**\1**", par) + par = re.sub(":func:`(.*)`", r"**\1**", par) return par def visit_field_list(self, node): @@ -112,75 +115,82 @@ # parse field text elif isinstance(child, docutils.nodes.paragraph): text = self.process_paragraph(str(child)) - field_data = field_name.split(' ') + field_data = field_name.split(" ") # Parameters if field_data[0] in self.parameter_roles: if field_data[2] not in self.args_set: - self.data['args'].append({'name': field_data[2], - 'type': field_data[1], - 'doc': text}) + self.data["args"].append( + {"name": field_data[2], "type": field_data[1], "doc": text} + ) self.args_set.add(field_data[2]) # Query Parameters if field_data[0] in self.query_parameter_roles: if field_data[2] not in self.params_set: - self.data['params'].append({'name': field_data[2], - 'type': field_data[1], - 'doc': text}) + self.data["params"].append( + {"name": field_data[2], "type": field_data[1], "doc": text} + ) self.params_set.add(field_data[2]) # Request data type - if (field_data[0] in self.request_json_array_roles or - field_data[0] in self.request_json_object_roles): + if ( + field_data[0] in self.request_json_array_roles + or field_data[0] in self.request_json_object_roles + ): # array if field_data[0] in self.request_json_array_roles: - self.data['input_type'] = 'array' + self.data["input_type"] = "array" # object else: - self.data['input_type'] = 'object' + self.data["input_type"] = "object" # input object field if field_data[2] not in self.inputs_set: - self.data['inputs'].append({'name': field_data[2], - 'type': field_data[1], - 'doc': text}) + self.data["inputs"].append( + {"name": field_data[2], "type": field_data[1], "doc": text} + ) self.inputs_set.add(field_data[2]) - self.current_json_obj = self.data['inputs'][-1] + self.current_json_obj = self.data["inputs"][-1] # Response type - if (field_data[0] in self.response_json_array_roles or - field_data[0] in self.response_json_object_roles): + if ( + field_data[0] in self.response_json_array_roles + or field_data[0] in self.response_json_object_roles + ): # array if field_data[0] in self.response_json_array_roles: - self.data['return_type'] = 'array' + self.data["return_type"] = "array" # object else: - self.data['return_type'] = 'object' + self.data["return_type"] = "object" # returned object field if field_data[2] not in self.returns_set: - self.data['returns'].append({'name': field_data[2], - 'type': field_data[1], - 'doc': text}) + self.data["returns"].append( + {"name": field_data[2], "type": field_data[1], "doc": text} + ) self.returns_set.add(field_data[2]) - self.current_json_obj = self.data['returns'][-1] + self.current_json_obj = self.data["returns"][-1] # Status Codes if field_data[0] in self.status_code_roles: if field_data[1] not in self.status_codes_set: - self.data['status_codes'].append({'code': field_data[1], # noqa - 'doc': text}) + self.data["status_codes"].append( + {"code": field_data[1], "doc": text} + ) self.status_codes_set.add(field_data[1]) # Request Headers if field_data[0] in self.request_header_roles: if field_data[1] not in self.reqheaders_set: - self.data['reqheaders'].append({'name': field_data[1], - 'doc': text}) + self.data["reqheaders"].append( + {"name": field_data[1], "doc": text} + ) self.reqheaders_set.add(field_data[1]) # Response Headers if field_data[0] in self.response_header_roles: if field_data[1] not in self.resheaders_set: - resheader = {'name': field_data[1], - 'doc': text} - self.data['resheaders'].append(resheader) + resheader = {"name": field_data[1], "doc": text} + self.data["resheaders"].append(resheader) self.resheaders_set.add(field_data[1]) - if resheader['name'] == 'Content-Type' and \ - resheader['doc'] == 'application/octet-stream': - self.data['return_type'] = 'octet stream' + if ( + resheader["name"] == "Content-Type" + and resheader["doc"] == "application/octet-stream" + ): + self.data["return_type"] = "octet stream" def visit_paragraph(self, node): """ @@ -190,10 +200,9 @@ if isinstance(node.parent, docutils.nodes.block_quote): text = self.process_paragraph(str(node)) # endpoint description - if (not text.startswith('**') and - text not in self.data['description']): - self.data['description'] += '\n\n' if self.data['description'] else '' # noqa - self.data['description'] += text + if not text.startswith("**") and text not in self.data["description"]: + self.data["description"] += "\n\n" if self.data["description"] else "" + self.data["description"] += text def visit_literal_block(self, node): """ @@ -202,36 +211,34 @@ text = node.astext() # literal block in endpoint description if not self.field_list_visited: - self.data['description'] += \ - ':\n\n%s\n' % textwrap.indent(text, '\t') + self.data["description"] += ":\n\n%s\n" % textwrap.indent(text, "\t") # extract example url - if ':swh_web_api:' in text: - self.data['examples'].append( - '/api/1/' + re.sub('.*`(.*)`.*', r'\1', text)) + if ":swh_web_api:" in text: + self.data["examples"].append("/api/1/" + re.sub(".*`(.*)`.*", r"\1", text)) def visit_bullet_list(self, node): # bullet list in endpoint description if not self.field_list_visited: - self.data['description'] += '\n\n' + self.data["description"] += "\n\n" for child in node.traverse(): # process list item if isinstance(child, docutils.nodes.paragraph): line_text = self.process_paragraph(str(child)) - self.data['description'] += '\t* %s\n' % line_text + self.data["description"] += "\t* %s\n" % line_text elif self.current_json_obj: - self.current_json_obj['doc'] += '\n\n' + self.current_json_obj["doc"] += "\n\n" for child in node.traverse(): # process list item if isinstance(child, docutils.nodes.paragraph): line_text = self.process_paragraph(str(child)) - self.current_json_obj['doc'] += '\t\t* %s\n' % line_text + self.current_json_obj["doc"] += "\t\t* %s\n" % line_text self.current_json_obj = None def visit_warning(self, node): text = self.process_paragraph(str(node)) - rst_warning = '\n\n.. warning::\n%s\n' % textwrap.indent(text, '\t') - if rst_warning not in self.data['description']: - self.data['description'] += rst_warning + rst_warning = "\n\n.. warning::\n%s\n" % textwrap.indent(text, "\t") + if rst_warning not in self.data["description"]: + self.data["description"] += rst_warning def unknown_visit(self, node): pass @@ -241,30 +248,29 @@ def _parse_httpdomain_doc(doc, data): - doc_lines = doc.split('\n') + doc_lines = doc.split("\n") doc_lines_filtered = [] urls = defaultdict(list) - default_http_methods = ['HEAD', 'OPTIONS'] + default_http_methods = ["HEAD", "OPTIONS"] # httpdomain is a sphinx extension that is unknown to docutils but # fortunately we can still parse its directives' content, # so remove lines with httpdomain directives before executing the # rst parser from docutils for doc_line in doc_lines: - if '.. http' not in doc_line: + if ".. http" not in doc_line: doc_lines_filtered.append(doc_line) else: - url = doc_line[doc_line.find('/'):] + url = doc_line[doc_line.find("/") :] # emphasize url arguments for html rendering - url = re.sub(r'\((\w+)\)', r' **\(\1\)** ', url) - method = re.search(r'http:(\w+)::', doc_line).group(1) + url = re.sub(r"\((\w+)\)", r" **\(\1\)** ", url) + method = re.search(r"http:(\w+)::", doc_line).group(1) urls[url].append(method.upper()) for url, methods in urls.items(): - data['urls'].append({'rule': url, - 'methods': methods + default_http_methods}) + data["urls"].append({"rule": url, "methods": methods + default_http_methods}) # parse the rst docstring and do not print system messages about # unknown httpdomain roles - document = parse_rst('\n'.join(doc_lines_filtered), report_level=5) + document = parse_rst("\n".join(doc_lines_filtered), report_level=5) # remove the system_message nodes from the parsed document for node in document.traverse(docutils.nodes.system_message): node.parent.remove(node) @@ -279,9 +285,14 @@ """ -def api_doc(route: str, noargs: bool = False, need_params: bool = False, - tags: List[str] = [], handle_response: bool = False, - api_version: str = '1'): +def api_doc( + route: str, + noargs: bool = False, + need_params: bool = False, + tags: List[str] = [], + handle_response: bool = False, + api_version: str = "1", +): """ Decorator for an API endpoint implementation used to generate a dedicated view displaying its HTML documentation. @@ -314,25 +325,29 @@ # @api_doc() Decorator call def decorator(f): # if the route is not hidden, add it to the index - if 'hidden' not in tags_set: + if "hidden" not in tags_set: doc_data = get_doc_data(f, route, noargs) - doc_desc = doc_data['description'] - first_dot_pos = doc_desc.find('.') - APIUrls.add_doc_route(route, doc_desc[:first_dot_pos+1], - noargs=noargs, api_version=api_version, - tags=tags_set) + doc_desc = doc_data["description"] + first_dot_pos = doc_desc.find(".") + APIUrls.add_doc_route( + route, + doc_desc[: first_dot_pos + 1], + noargs=noargs, + api_version=api_version, + tags=tags_set, + ) # create a dedicated view to display endpoint HTML doc - @api_view(['GET', 'HEAD']) + @api_view(["GET", "HEAD"]) @wraps(f) def doc_view(request): doc_data = get_doc_data(f, route, noargs) return make_api_response(request, None, doc_data) - route_name = '%s-doc' % route[1:-1].replace('/', '-') - urlpattern = f'^{api_version}{route}doc/$' + route_name = "%s-doc" % route[1:-1].replace("/", "-") + urlpattern = f"^{api_version}{route}doc/$" - view_name = 'api-%s-%s' % (api_version, route_name) + view_name = "api-%s-%s" % (api_version, route_name) APIUrls.add_url_pattern(urlpattern, doc_view, view_name) @wraps(f) @@ -343,8 +358,11 @@ response = f(request, **kwargs) except Exception as exc: sentry_sdk.capture_exception(exc) - if request.accepted_media_type == 'text/html' and \ - need_params and not request.query_params: + if ( + request.accepted_media_type == "text/html" + and need_params + and not request.query_params + ): response = None else: return error_response(request, exc, doc_data) @@ -365,77 +383,83 @@ Build documentation data for the decorated api endpoint function """ data = { - 'description': '', - 'response_data': None, - 'urls': [], - 'args': [], - 'params': [], - 'input_type': '', - 'inputs': [], - 'resheaders': [], - 'reqheaders': [], - 'return_type': '', - 'returns': [], - 'status_codes': [], - 'examples': [], - 'route': route, - 'noargs': noargs + "description": "", + "response_data": None, + "urls": [], + "args": [], + "params": [], + "input_type": "", + "inputs": [], + "resheaders": [], + "reqheaders": [], + "return_type": "", + "returns": [], + "status_codes": [], + "examples": [], + "route": route, + "noargs": noargs, } if not f.__doc__: - raise APIDocException('apidoc: expected a docstring' - ' for function %s' - % (f.__name__,)) + raise APIDocException( + "apidoc: expected a docstring" " for function %s" % (f.__name__,) + ) # use raw docstring as endpoint documentation if sphinx # httpdomain is not used - if '.. http' not in f.__doc__: - data['description'] = f.__doc__ + if ".. http" not in f.__doc__: + data["description"] = f.__doc__ # else parse the sphinx httpdomain docstring with docutils # (except when building the swh-web documentation through autodoc # sphinx extension, not needed and raise errors with sphinx >= 1.7) - elif 'SWH_WEB_DOC_BUILD' not in os.environ: + elif "SWH_WEB_DOC_BUILD" not in os.environ: _parse_httpdomain_doc(f.__doc__, data) # process input/returned object info for nicer html display - inputs_list = '' - returns_list = '' - for inp in data['inputs']: + inputs_list = "" + returns_list = "" + for inp in data["inputs"]: # special case for array of non object type, for instance # :jsonarr string -: an array of string - if ret['name'] != '-': - returns_list += ('\t* **%s (%s)**: %s\n' % - (ret['name'], ret['type'], ret['doc'])) - data['inputs_list'] = inputs_list - data['returns_list'] = returns_list + if ret["name"] != "-": + returns_list += "\t* **%s (%s)**: %s\n" % ( + ret["name"], + ret["type"], + ret["doc"], + ) + data["inputs_list"] = inputs_list + data["returns_list"] = returns_list return data -DOC_COMMON_HEADERS = ''' +DOC_COMMON_HEADERS = """ :reqheader Accept: the requested response content type, either ``application/json`` (default) or ``application/yaml`` :resheader Content-Type: this depends on :http:header:`Accept` - header of request''' -DOC_RESHEADER_LINK = ''' + header of request""" +DOC_RESHEADER_LINK = """ :resheader Link: indicates that a subsequent result page is available and contains the url pointing to it -''' +""" DEFAULT_SUBSTITUTIONS = { - 'common_headers': DOC_COMMON_HEADERS, - 'resheader_link': DOC_RESHEADER_LINK, + "common_headers": DOC_COMMON_HEADERS, + "resheader_link": DOC_RESHEADER_LINK, } def format_docstring(**substitutions): def decorator(f): - f.__doc__ = f.__doc__.format(**{ - **DEFAULT_SUBSTITUTIONS, **substitutions}) + f.__doc__ = f.__doc__.format(**{**DEFAULT_SUBSTITUTIONS, **substitutions}) return f + return decorator diff --git a/swh/web/api/apiresponse.py b/swh/web/api/apiresponse.py --- a/swh/web/api/apiresponse.py +++ b/swh/web/api/apiresponse.py @@ -13,10 +13,7 @@ from swh.storage.exc import StorageDBError, StorageAPIError from swh.web.api import utils -from swh.web.common.exc import ( - NotFoundExc, ForbiddenExc, - BadInputExc, LargePayloadExc -) +from swh.web.common.exc import NotFoundExc, ForbiddenExc, BadInputExc, LargePayloadExc from swh.web.common.utils import shorten_path, gen_path_info from swh.web.config import get_config @@ -40,22 +37,20 @@ """ link_headers = [] - if 'headers' not in rv: + if "headers" not in rv: return {} - rv_headers = rv['headers'] + rv_headers = rv["headers"] - if 'link-next' in rv_headers: - link_headers.append('<%s>; rel="next"' % rv_headers['link-next']) - if 'link-prev' in rv_headers: - link_headers.append('<%s>; rel="previous"' % rv_headers['link-prev']) + if "link-next" in rv_headers: + link_headers.append('<%s>; rel="next"' % rv_headers["link-next"]) + if "link-prev" in rv_headers: + link_headers.append('<%s>; rel="previous"' % rv_headers["link-prev"]) if link_headers: - link_header_str = ','.join(link_headers) - headers = options.get('headers', {}) - headers.update({ - 'Link': link_header_str - }) + link_header_str = ",".join(link_headers) + headers = options.get("headers", {}) + headers.update({"Link": link_header_str}) return headers return {} @@ -68,9 +63,9 @@ If such field is not provided, returns the data as is. """ - fields = request.query_params.get('fields') + fields = request.query_params.get("fields") if fields: - fields = set(fields.split(',')) + fields = set(fields.split(",")) data = utils.filter_field_keys(data, fields) return data @@ -87,11 +82,11 @@ key. """ - if 'results' in rv: - return rv['results'] + if "results" in rv: + return rv["results"] - if 'headers' in rv: - rv.pop('headers') + if "headers" in rv: + rv.pop("headers") return rv @@ -110,49 +105,49 @@ """ if data: - options['headers'] = compute_link_header(data, options) + options["headers"] = compute_link_header(data, options) data = transform(data) data = filter_by_fields(request, data) doc_env = doc_data headers = {} - if 'headers' in options: - doc_env['headers_data'] = options['headers'] - headers = options['headers'] + if "headers" in options: + doc_env["headers_data"] = options["headers"] + headers = options["headers"] # get request status code - doc_env['status_code'] = options.get('status', 200) + doc_env["status_code"] = options.get("status", 200) - response_args = {'status': doc_env['status_code'], - 'headers': headers, - 'content_type': request.accepted_media_type} + response_args = { + "status": doc_env["status_code"], + "headers": headers, + "content_type": request.accepted_media_type, + } # when requesting HTML, typically when browsing the API through its # documented views, we need to enrich the input data with documentation # related ones and inform DRF that we request HTML template rendering - if request.accepted_media_type == 'text/html': + if request.accepted_media_type == "text/html": if data: - data = json.dumps(data, sort_keys=True, - indent=4, - separators=(',', ': ')) - doc_env['response_data'] = data - doc_env['heading'] = shorten_path(str(request.path)) + data = json.dumps(data, sort_keys=True, indent=4, separators=(",", ": ")) + doc_env["response_data"] = data + doc_env["heading"] = shorten_path(str(request.path)) # generate breadcrumbs data - if 'route' in doc_env: - doc_env['endpoint_path'] = gen_path_info(doc_env['route']) - for i in range(len(doc_env['endpoint_path']) - 1): - doc_env['endpoint_path'][i]['path'] += '/doc/' - if not doc_env['noargs']: - doc_env['endpoint_path'][-1]['path'] += '/doc/' + if "route" in doc_env: + doc_env["endpoint_path"] = gen_path_info(doc_env["route"]) + for i in range(len(doc_env["endpoint_path"]) - 1): + doc_env["endpoint_path"][i]["path"] += "/doc/" + if not doc_env["noargs"]: + doc_env["endpoint_path"][-1]["path"] += "/doc/" - response_args['data'] = doc_env - response_args['template_name'] = 'api/apidoc.html' + response_args["data"] = doc_env + response_args["template_name"] = "api/apidoc.html" # otherwise simply return the raw data and let DRF picks # the correct renderer (JSON or YAML) else: - response_args['data'] = data + response_args["data"] = data return Response(**response_args) @@ -180,17 +175,16 @@ elif isinstance(error, StorageAPIError): error_code = 503 - error_opts = {'status': error_code} + error_opts = {"status": error_code} error_data = { - 'exception': error.__class__.__name__, - 'reason': str(error), + "exception": error.__class__.__name__, + "reason": str(error), } - if request.accepted_media_type == 'text/html': - error_data['reason'] = escape(error_data['reason']) + if request.accepted_media_type == "text/html": + error_data["reason"] = escape(error_data["reason"]) - if get_config()['debug']: - error_data['traceback'] = traceback.format_exc() + if get_config()["debug"]: + error_data["traceback"] = traceback.format_exc() - return make_api_response(request, error_data, doc_data, - options=error_opts) + return make_api_response(request, error_data, doc_data, options=error_opts) diff --git a/swh/web/api/apiurls.py b/swh/web/api/apiurls.py --- a/swh/web/api/apiurls.py +++ b/swh/web/api/apiurls.py @@ -22,37 +22,42 @@ generating related urls in API documentation """ + _apidoc_routes = {} # type: Dict[str, Dict[str, str]] - scope = 'api' + scope = "api" @classmethod def get_app_endpoints(cls): return cls._apidoc_routes @classmethod - def add_doc_route(cls, route, docstring, noargs=False, - api_version='1', **kwargs): + def add_doc_route(cls, route, docstring, noargs=False, api_version="1", **kwargs): """ Add a route to the self-documenting API reference """ - route_name = route[1:-1].replace('/', '-') + route_name = route[1:-1].replace("/", "-") if not noargs: - route_name = '%s-doc' % route_name - route_view_name = 'api-%s-%s' % (api_version, route_name) + route_name = "%s-doc" % route_name + route_view_name = "api-%s-%s" % (api_version, route_name) if route not in cls._apidoc_routes: - d = {'docstring': docstring, - 'route': '/api/%s%s' % (api_version, route), - 'route_view_name': route_view_name} + d = { + "docstring": docstring, + "route": "/api/%s%s" % (api_version, route), + "route_view_name": route_view_name, + } for k, v in kwargs.items(): d[k] = v cls._apidoc_routes[route] = d -def api_route(url_pattern=None, view_name=None, - methods=['GET', 'HEAD', 'OPTIONS'], - throttle_scope='swh_api', - api_version='1', - checksum_args=None): +def api_route( + url_pattern=None, + view_name=None, + methods=["GET", "HEAD", "OPTIONS"], + throttle_scope="swh_api", + api_version="1", + checksum_args=None, +): """ Decorator to ease the registration of an API endpoint using the Django REST Framework. @@ -65,7 +70,7 @@ """ - url_pattern = '^' + api_version + url_pattern + '$' + url_pattern = "^" + api_version + url_pattern + "$" def decorator(f): # create a DRF view from the wrapped function @@ -74,18 +79,18 @@ @functools.wraps(f) def api_view_f(*args, **kwargs): return f(*args, **kwargs) + # small hacks for correctly generating API endpoints index doc api_view_f.__name__ = f.__name__ api_view_f.http_method_names = methods # register the route and its view in the endpoints index - APIUrls.add_url_pattern(url_pattern, api_view_f, - view_name) + APIUrls.add_url_pattern(url_pattern, api_view_f, view_name) if checksum_args: - APIUrls.add_redirect_for_checksum_args(view_name, - [url_pattern], - checksum_args) + APIUrls.add_redirect_for_checksum_args( + view_name, [url_pattern], checksum_args + ) return f return decorator diff --git a/swh/web/api/renderers.py b/swh/web/api/renderers.py --- a/swh/web/api/renderers.py +++ b/swh/web/api/renderers.py @@ -13,9 +13,9 @@ Renderer which serializes to YAML. """ - media_type = 'application/yaml' - format = 'yaml' - charset = 'utf-8' + media_type = "application/yaml" + format = "yaml" + charset = "utf-8" ensure_ascii = False default_flow_style = False @@ -23,15 +23,15 @@ """ Renders `data` into serialized YAML. """ - assert yaml, 'YAMLRenderer requires pyyaml to be installed' + assert yaml, "YAMLRenderer requires pyyaml to be installed" if data is None: - return '' + return "" return yaml.dump( data, stream=None, encoding=self.charset, allow_unicode=not self.ensure_ascii, - default_flow_style=self.default_flow_style + default_flow_style=self.default_flow_style, ) diff --git a/swh/web/api/throttling.py b/swh/web/api/throttling.py --- a/swh/web/api/throttling.py +++ b/swh/web/api/throttling.py @@ -3,9 +3,7 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -from ipaddress import ( - ip_address, ip_network, IPv4Network, IPv6Network -) +from ipaddress import ip_address, ip_network, IPv4Network, IPv6Network from typing import Callable, List, TypeVar, Union from django.core.exceptions import ImproperlyConfigured @@ -16,7 +14,7 @@ from swh.web.config import get_config -APIView = TypeVar('APIView', bound='rest_framework.views.APIView') +APIView = TypeVar("APIView", bound="rest_framework.views.APIView") Request = rest_framework.request.Request @@ -62,16 +60,18 @@ super().__init__() self.exempted_networks = None - def get_exempted_networks(self, scope_name: str - ) -> List[Union[IPv4Network, IPv6Network]]: + def get_exempted_networks( + self, scope_name: str + ) -> List[Union[IPv4Network, IPv6Network]]: if not self.exempted_networks: - scopes = get_config()['throttling']['scopes'] + scopes = get_config()["throttling"]["scopes"] scope = scopes.get(scope_name) if scope: - networks = scope.get('exempted_networks') + networks = scope.get("exempted_networks") if networks: - self.exempted_networks = [ip_network(network) - for network in networks] + self.exempted_networks = [ + ip_network(network) for network in networks + ] return self.exempted_networks def allow_request(self, request: Request, view: APIView) -> bool: @@ -87,7 +87,7 @@ # check if there is a specific rate limiting associated # to the request type assert request.method is not None - request_scope = f'{default_scope}_{request.method.lower()}' + request_scope = f"{default_scope}_{request.method.lower()}" setattr(view, self.scope_attr, request_scope) try: request_allowed = super().allow_request(request, view) @@ -104,7 +104,7 @@ default_scope = self.scope # check if there is a specific rate limiting associated # to the request type - self.scope = default_scope + '_' + request.method.lower() + self.scope = default_scope + "_" + request.method.lower() try: self.rate = self.get_rate() # use default rate limiting otherwise @@ -114,8 +114,9 @@ self.rate = self.get_rate() self.num_requests, self.duration = self.parse_rate(self.rate) - request_allowed = \ - super(ScopedRateThrottle, self).allow_request(request, view) + request_allowed = super(ScopedRateThrottle, self).allow_request( + request, view + ) self.scope = default_scope exempted_networks = self.get_exempted_networks(default_scope) @@ -123,20 +124,21 @@ if exempted_networks: remote_address = ip_address(self.get_ident(request)) - exempted_ip = any(remote_address in network - for network in exempted_networks) + exempted_ip = any( + remote_address in network for network in exempted_networks + ) request_allowed = exempted_ip or request_allowed # set throttling related data in the request metadata # in order for the ThrottlingHeadersMiddleware to # add X-RateLimit-* headers in the HTTP response - if not exempted_ip and hasattr(self, 'history'): + if not exempted_ip and hasattr(self, "history"): hit_count = len(self.history) - request.META['RateLimit-Limit'] = self.num_requests - request.META['RateLimit-Remaining'] = self.num_requests - hit_count + request.META["RateLimit-Limit"] = self.num_requests + request.META["RateLimit-Remaining"] = self.num_requests - hit_count wait = self.wait() if wait is not None: - request.META['RateLimit-Reset'] = int(self.now + wait) + request.META["RateLimit-Reset"] = int(self.now + wait) return request_allowed @@ -151,12 +153,12 @@ ... """ + def decorator(func: APIView) -> APIView: SwhScopeRateThrottle = type( - 'CustomScopeRateThrottle', - (SwhWebRateThrottle,), - {'scope': scope} + "CustomScopeRateThrottle", (SwhWebRateThrottle,), {"scope": scope} ) - func.throttle_classes = (SwhScopeRateThrottle, ) + func.throttle_classes = (SwhScopeRateThrottle,) return func + return decorator diff --git a/swh/web/api/urls.py b/swh/web/api/urls.py --- a/swh/web/api/urls.py +++ b/swh/web/api/urls.py @@ -3,17 +3,17 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -import swh.web.api.views.content # noqa -import swh.web.api.views.directory # noqa -import swh.web.api.views.identifiers # noqa -import swh.web.api.views.origin # noqa -import swh.web.api.views.origin_save # noqa -import swh.web.api.views.release # noqa -import swh.web.api.views.revision # noqa -import swh.web.api.views.snapshot # noqa -import swh.web.api.views.stat # noqa -import swh.web.api.views.vault # noqa -import swh.web.api.views.ping # noqa +import swh.web.api.views.content # noqa +import swh.web.api.views.directory # noqa +import swh.web.api.views.identifiers # noqa +import swh.web.api.views.origin # noqa +import swh.web.api.views.origin_save # noqa +import swh.web.api.views.release # noqa +import swh.web.api.views.revision # noqa +import swh.web.api.views.snapshot # noqa +import swh.web.api.views.stat # noqa +import swh.web.api.views.vault # noqa +import swh.web.api.views.ping # noqa from swh.web.api.apiurls import APIUrls diff --git a/swh/web/api/utils.py b/swh/web/api/utils.py --- a/swh/web/api/utils.py +++ b/swh/web/api/utils.py @@ -41,11 +41,12 @@ """Map a person (person, committer, tagger, etc...) to a string. """ - return ''.join([person['name'], ' <', person['email'], '>']) + return "".join([person["name"], " <", person["email"], ">"]) -def enrich_object(object: Dict[str, str], - request: Optional[HttpRequest] = None) -> Dict[str, str]: +def enrich_object( + object: Dict[str, str], request: Optional[HttpRequest] = None +) -> Dict[str, str]: """Enrich an object (revision, release) with link to the 'target' of type 'target_type'. @@ -59,22 +60,25 @@ directory) """ - if 'target' in object and 'target_type' in object: - if object['target_type'] in ('revision', 'release', 'directory'): - object['target_url'] = reverse( - 'api-1-%s' % object['target_type'], - url_args={'sha1_git': object['target']}, - request=request) - elif object['target_type'] == 'content': - object['target_url'] = reverse( - 'api-1-content', - url_args={'q': 'sha1_git:' + object['target']}, - request=request) - elif object['target_type'] == 'snapshot': - object['target_url'] = reverse( - 'api-1-snapshot', - url_args={'snapshot_id': object['target']}, - request=request) + if "target" in object and "target_type" in object: + if object["target_type"] in ("revision", "release", "directory"): + object["target_url"] = reverse( + "api-1-%s" % object["target_type"], + url_args={"sha1_git": object["target"]}, + request=request, + ) + elif object["target_type"] == "content": + object["target_url"] = reverse( + "api-1-content", + url_args={"q": "sha1_git:" + object["target"]}, + request=request, + ) + elif object["target_type"] == "snapshot": + object["target_url"] = reverse( + "api-1-snapshot", + url_args={"snapshot_id": object["target"]}, + request=request, + ) return object @@ -82,8 +86,9 @@ enrich_release = enrich_object -def enrich_directory(directory: Dict[str, str], - request: Optional[HttpRequest] = None) -> Dict[str, str]: +def enrich_directory( + directory: Dict[str, str], request: Optional[HttpRequest] = None +) -> Dict[str, str]: """Enrich directory with url to content or directory. Args: @@ -93,31 +98,28 @@ Returns: An enriched directory dict filled with additional urls """ - if 'type' in directory: - target_type = directory['type'] - target = directory['target'] - if target_type == 'file': - directory['target_url'] = reverse( - 'api-1-content', - url_args={'q': 'sha1_git:%s' % target}, - request=request) - elif target_type == 'dir': - directory['target_url'] = reverse( - 'api-1-directory', - url_args={'sha1_git': target}, - request=request) + if "type" in directory: + target_type = directory["type"] + target = directory["target"] + if target_type == "file": + directory["target_url"] = reverse( + "api-1-content", url_args={"q": "sha1_git:%s" % target}, request=request + ) + elif target_type == "dir": + directory["target_url"] = reverse( + "api-1-directory", url_args={"sha1_git": target}, request=request + ) else: - directory['target_url'] = reverse( - 'api-1-revision', - url_args={'sha1_git': target}, - request=request) + directory["target_url"] = reverse( + "api-1-revision", url_args={"sha1_git": target}, request=request + ) return directory -def enrich_metadata_endpoint(content_metadata: Dict[str, str], - request: Optional[HttpRequest] = None - ) -> Dict[str, str]: +def enrich_metadata_endpoint( + content_metadata: Dict[str, str], request: Optional[HttpRequest] = None +) -> Dict[str, str]: """Enrich content metadata dict with link to the upper metadata endpoint. Args: @@ -128,16 +130,18 @@ An enriched content metadata dict filled with an additional url """ c = content_metadata - c['content_url'] = reverse('api-1-content', - url_args={'q': 'sha1:%s' % c['id']}, - request=request) + c["content_url"] = reverse( + "api-1-content", url_args={"q": "sha1:%s" % c["id"]}, request=request + ) return c -def enrich_content(content: Dict[str, Any], - top_url: Optional[bool] = False, - query_string: Optional[str] = None, - request: Optional[HttpRequest] = None) -> Dict[str, str]: +def enrich_content( + content: Dict[str, Any], + top_url: Optional[bool] = False, + query_string: Optional[str] = None, + request: Optional[HttpRequest] = None, +) -> Dict[str, str]: """Enrich content with links to: - data_url: its raw data - filetype_url: its filetype information @@ -159,37 +163,34 @@ """ checksums = content - if 'checksums' in content: - checksums = content['checksums'] - hash_algo = 'sha1' + if "checksums" in content: + checksums = content["checksums"] + hash_algo = "sha1" if query_string: hash_algo = parse_hash(query_string)[0] if hash_algo in checksums: - q = '%s:%s' % (hash_algo, checksums[hash_algo]) + q = "%s:%s" % (hash_algo, checksums[hash_algo]) if top_url: - content['content_url'] = reverse( - 'api-1-content', url_args={'q': q}) - content['data_url'] = reverse('api-1-content-raw', - url_args={'q': q}, - request=request) - content['filetype_url'] = reverse( - 'api-1-content-filetype', - url_args={'q': q}, - request=request) - content['language_url'] = reverse( - 'api-1-content-language', - url_args={'q': q}, - request=request) - content['license_url'] = reverse( - 'api-1-content-license', - url_args={'q': q}, - request=request) + content["content_url"] = reverse("api-1-content", url_args={"q": q}) + content["data_url"] = reverse( + "api-1-content-raw", url_args={"q": q}, request=request + ) + content["filetype_url"] = reverse( + "api-1-content-filetype", url_args={"q": q}, request=request + ) + content["language_url"] = reverse( + "api-1-content-language", url_args={"q": q}, request=request + ) + content["license_url"] = reverse( + "api-1-content-license", url_args={"q": q}, request=request + ) return content -def enrich_revision(revision: Dict[str, Any], - request: Optional[HttpRequest] = None) -> Dict[str, Any]: +def enrich_revision( + revision: Dict[str, Any], request: Optional[HttpRequest] = None +) -> Dict[str, Any]: """Enrich revision with links where it makes sense (directory, parents). Keep track of the navigation breadcrumbs if they are specified. @@ -201,51 +202,55 @@ An enriched revision dict filled with additional urls """ - revision['url'] = reverse('api-1-revision', - url_args={'sha1_git': revision['id']}, - request=request) - revision['history_url'] = reverse('api-1-revision-log', - url_args={'sha1_git': revision['id']}, - request=request) - - if 'directory' in revision: - revision['directory_url'] = reverse( - 'api-1-directory', - url_args={'sha1_git': revision['directory']}, - request=request) - - if 'parents' in revision: + revision["url"] = reverse( + "api-1-revision", url_args={"sha1_git": revision["id"]}, request=request + ) + revision["history_url"] = reverse( + "api-1-revision-log", url_args={"sha1_git": revision["id"]}, request=request + ) + + if "directory" in revision: + revision["directory_url"] = reverse( + "api-1-directory", + url_args={"sha1_git": revision["directory"]}, + request=request, + ) + + if "parents" in revision: parents = [] - for parent in revision['parents']: - parents.append({ - 'id': parent, - 'url': reverse('api-1-revision', - url_args={'sha1_git': parent}, - request=request) - }) - - revision['parents'] = parents - - if 'children' in revision: + for parent in revision["parents"]: + parents.append( + { + "id": parent, + "url": reverse( + "api-1-revision", url_args={"sha1_git": parent}, request=request + ), + } + ) + + revision["parents"] = parents + + if "children" in revision: children = [] - for child in revision['children']: - children.append(reverse( - 'api-1-revision', - url_args={'sha1_git': child}, - request=request)) - revision['children_urls'] = children - - if 'message_decoding_failed' in revision: - revision['message_url'] = reverse( - 'api-1-revision-raw-message', - url_args={'sha1_git': revision['id']}, - request=request) + for child in revision["children"]: + children.append( + reverse("api-1-revision", url_args={"sha1_git": child}, request=request) + ) + revision["children_urls"] = children + + if "message_decoding_failed" in revision: + revision["message_url"] = reverse( + "api-1-revision-raw-message", + url_args={"sha1_git": revision["id"]}, + request=request, + ) return revision -def enrich_snapshot(snapshot: Dict[str, Any], - request: Optional[HttpRequest] = None) -> Dict[str, Any]: +def enrich_snapshot( + snapshot: Dict[str, Any], request: Optional[HttpRequest] = None +) -> Dict[str, Any]: """Enrich snapshot with links to the branch targets Args: @@ -255,22 +260,23 @@ Returns: An enriched snapshot dict filled with additional urls """ - if 'branches' in snapshot: - snapshot['branches'] = { + if "branches" in snapshot: + snapshot["branches"] = { k: enrich_object(v, request) if v else None - for k, v in snapshot['branches'].items() + for k, v in snapshot["branches"].items() } - for k, v in snapshot['branches'].items(): - if v and v['target_type'] == 'alias': + for k, v in snapshot["branches"].items(): + if v and v["target_type"] == "alias": branch = resolve_branch_alias(snapshot, v) if branch: branch = enrich_object(branch, request) - v['target_url'] = branch['target_url'] + v["target_url"] = branch["target_url"] return snapshot -def enrich_origin(origin: Dict[str, Any], - request: Optional[HttpRequest] = None) -> Dict[str, Any]: +def enrich_origin( + origin: Dict[str, Any], request: Optional[HttpRequest] = None +) -> Dict[str, Any]: """Enrich origin dict with link to its visits Args: @@ -280,19 +286,23 @@ Returns: An enriched origin dict filled with an additional url """ - if 'url' in origin: - origin['origin_visits_url'] = reverse( - 'api-1-origin-visits', - url_args={'origin_url': origin['url']}, - request=request) + if "url" in origin: + origin["origin_visits_url"] = reverse( + "api-1-origin-visits", + url_args={"origin_url": origin["url"]}, + request=request, + ) return origin -def enrich_origin_visit(origin_visit: Dict[str, Any], *, - with_origin_link: bool, with_origin_visit_link: bool, - request: Optional[HttpRequest] = None - ) -> Dict[str, Any]: +def enrich_origin_visit( + origin_visit: Dict[str, Any], + *, + with_origin_link: bool, + with_origin_visit_link: bool, + request: Optional[HttpRequest] = None, +) -> Dict[str, Any]: """Enrich origin visit dict with additional links Args: @@ -306,19 +316,20 @@ """ ov = origin_visit if with_origin_link: - ov['origin_url'] = reverse('api-1-origin', - url_args={'origin_url': ov['origin']}, - request=request) + ov["origin_url"] = reverse( + "api-1-origin", url_args={"origin_url": ov["origin"]}, request=request + ) if with_origin_visit_link: - ov['origin_visit_url'] = reverse('api-1-origin-visit', - url_args={'origin_url': ov['origin'], - 'visit_id': ov['visit']}, - request=request) - snapshot = ov['snapshot'] + ov["origin_visit_url"] = reverse( + "api-1-origin-visit", + url_args={"origin_url": ov["origin"], "visit_id": ov["visit"]}, + request=request, + ) + snapshot = ov["snapshot"] if snapshot: - ov['snapshot_url'] = reverse('api-1-snapshot', - url_args={'snapshot_id': snapshot}, - request=request) + ov["snapshot_url"] = reverse( + "api-1-snapshot", url_args={"snapshot_id": snapshot}, request=request + ) else: - ov['snapshot_url'] = None + ov["snapshot_url"] = None return ov diff --git a/swh/web/api/views/content.py b/swh/web/api/views/content.py --- a/swh/web/api/views/content.py +++ b/swh/web/api/views/content.py @@ -16,9 +16,12 @@ from swh.web.api.views.utils import api_lookup -@api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/filetype/', - 'api-1-content-filetype', checksum_args=['q']) -@api_doc('/content/filetype/') +@api_route( + r"/content/(?P[0-9a-z_:]*[0-9a-f]+)/filetype/", + "api-1-content-filetype", + checksum_args=["q"], +) +@api_doc("/content/filetype/") @format_docstring() def api_content_filetype(request, q): """ @@ -26,19 +29,21 @@ Get information about the detected MIME type of a content object. - :param string hash_type: optional parameter specifying which hashing algorithm has been used - to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` - or ``blake2s256``. If that parameter is not provided, it is assumed that the - hashing algorithm used is `sha1`. - :param string hash: hexadecimal representation of the checksum value computed with - the specified hashing algorithm. + :param string hash_type: optional parameter specifying which hashing algorithm + has been used to compute the content checksum. It can be either ``sha1``, + ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not + provided, it is assumed that the hashing algorithm used is `sha1`. + :param string hash: hexadecimal representation of the checksum value computed + with the specified hashing algorithm. - :>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for - getting information about the content + :>json object content_url: link to + :http:get:`/api/1/content/[(hash_type):](hash)/` for getting information + about the content :>json string encoding: the detected content encoding :>json string id: the **sha1** identifier of the content :>json string mimetype: the detected MIME type of the content - :>json object tool: information about the tool used to detect the content filetype + :>json object tool: information about the tool used to detect the content + filetype {common_headers} @@ -51,17 +56,22 @@ .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/filetype/` - """ # noqa + """ return api_lookup( - service.lookup_content_filetype, q, - notfound_msg='No filetype information found for content {}.'.format(q), + service.lookup_content_filetype, + q, + notfound_msg="No filetype information found for content {}.".format(q), enrich_fn=utils.enrich_metadata_endpoint, - request=request) + request=request, + ) -@api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/language/', - 'api-1-content-language', checksum_args=['q']) -@api_doc('/content/language/') +@api_route( + r"/content/(?P[0-9a-z_:]*[0-9a-f]+)/language/", + "api-1-content-language", + checksum_args=["q"], +) +@api_doc("/content/language/") @format_docstring() def api_content_language(request, q): """ @@ -71,18 +81,20 @@ Note: this endpoint currently returns no data. - :param string hash_type: optional parameter specifying which hashing algorithm has been used - to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` - or ``blake2s256``. If that parameter is not provided, it is assumed that the - hashing algorithm used is ``sha1``. - :param string hash: hexadecimal representation of the checksum value computed with - the specified hashing algorithm. + :param string hash_type: optional parameter specifying which hashing algorithm + has been used to compute the content checksum. It can be either ``sha1``, + ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not + provided, it is assumed that the hashing algorithm used is ``sha1``. + :param string hash: hexadecimal representation of the checksum value computed + with the specified hashing algorithm. - :>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for - getting information about the content + :>json object content_url: link to + :http:get:`/api/1/content/[(hash_type):](hash)/` for getting information + about the content :>json string id: the **sha1** identifier of the content :>json string lang: the detected programming language if any - :>json object tool: information about the tool used to detect the programming language + :>json object tool: information about the tool used to detect the programming + language {common_headers} @@ -95,17 +107,22 @@ .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/language/` - """ # noqa + """ return api_lookup( - service.lookup_content_language, q, - notfound_msg='No language information found for content {}.'.format(q), + service.lookup_content_language, + q, + notfound_msg="No language information found for content {}.".format(q), enrich_fn=utils.enrich_metadata_endpoint, - request=request) + request=request, + ) -@api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/license/', - 'api-1-content-license', checksum_args=['q']) -@api_doc('/content/license/') +@api_route( + r"/content/(?P[0-9a-z_:]*[0-9a-f]+)/license/", + "api-1-content-license", + checksum_args=["q"], +) +@api_doc("/content/license/") @format_docstring() def api_content_license(request, q): """ @@ -113,17 +130,18 @@ Get information about the license of a content object. - :param string hash_type: optional parameter specifying which hashing algorithm has been used - to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` - or ``blake2s256``. If that parameter is not provided, it is assumed that the - hashing algorithm used is ``sha1``. - :param string hash: hexadecimal representation of the checksum value computed with - the specified hashing algorithm. + :param string hash_type: optional parameter specifying which hashing algorithm + has been used to compute the content checksum. It can be either ``sha1``, + ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not + provided, it is assumed that the hashing algorithm used is ``sha1``. + :param string hash: hexadecimal representation of the checksum value computed + with the specified hashing algorithm. - :>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for - getting information about the content + :>json object content_url: link to + :http:get:`/api/1/content/[(hash_type):](hash)/` for getting information + about the content :>json string id: the **sha1** identifier of the content - :>json array licenses: array of strings containing the detected license names if any + :>json array licenses: array of strings containing the detected license names :>json object tool: information about the tool used to detect the license {common_headers} @@ -137,45 +155,52 @@ .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/license/` - """ # noqa + """ return api_lookup( - service.lookup_content_license, q, - notfound_msg='No license information found for content {}.'.format(q), + service.lookup_content_license, + q, + notfound_msg="No license information found for content {}.".format(q), enrich_fn=utils.enrich_metadata_endpoint, - request=request) + request=request, + ) -@api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/ctags/', - 'api-1-content-ctags') -@api_doc('/content/ctags/', tags=['hidden']) +@api_route(r"/content/(?P[0-9a-z_:]*[0-9a-f]+)/ctags/", "api-1-content-ctags") +@api_doc("/content/ctags/", tags=["hidden"]) def api_content_ctags(request, q): """ Get information about all `Ctags `_-style symbols defined in a content object. """ return api_lookup( - service.lookup_content_ctags, q, - notfound_msg='No ctags symbol found for content {}.'.format(q), + service.lookup_content_ctags, + q, + notfound_msg="No ctags symbol found for content {}.".format(q), enrich_fn=utils.enrich_metadata_endpoint, - request=request) + request=request, + ) -@api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/raw/', 'api-1-content-raw', - checksum_args=['q']) -@api_doc('/content/raw/', handle_response=True) +@api_route( + r"/content/(?P[0-9a-z_:]*[0-9a-f]+)/raw/", + "api-1-content-raw", + checksum_args=["q"], +) +@api_doc("/content/raw/", handle_response=True) def api_content_raw(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/raw/ Get the raw content of a content object (aka a "blob"), as a byte sequence. - :param string hash_type: optional parameter specifying which hashing algorithm has been used - to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` - or ``blake2s256``. If that parameter is not provided, it is assumed that the - hashing algorithm used is ``sha1``. - :param string hash: hexadecimal representation of the checksum value computed with - the specified hashing algorithm. - :query string filename: if provided, the downloaded content will get that filename + :param string hash_type: optional parameter specifying which hashing algorithm + has been used to compute the content checksum. It can be either ``sha1``, + ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not + provided, it is assumed that the hashing algorithm used is ``sha1``. + :param string hash: hexadecimal representation of the checksum value computed + with the specified hashing algorithm. + :query string filename: if provided, the downloaded content will get that + filename :resheader Content-Type: application/octet-stream @@ -188,71 +213,76 @@ .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/raw/` - """ # noqa + """ + def generate(content): - yield content['data'] + yield content["data"] content_raw = service.lookup_content_raw(q) if not content_raw: - raise NotFoundExc('Content %s is not found.' % q) + raise NotFoundExc("Content %s is not found." % q) - filename = request.query_params.get('filename') + filename = request.query_params.get("filename") if not filename: - filename = 'content_%s_raw' % q.replace(':', '_') + filename = "content_%s_raw" % q.replace(":", "_") - response = HttpResponse(generate(content_raw), - content_type='application/octet-stream') - response['Content-disposition'] = 'attachment; filename=%s' % filename + response = HttpResponse( + generate(content_raw), content_type="application/octet-stream" + ) + response["Content-disposition"] = "attachment; filename=%s" % filename return response -@api_route(r'/content/symbol/(?P.+)/', 'api-1-content-symbol') -@api_doc('/content/symbol/', tags=['hidden']) +@api_route(r"/content/symbol/(?P.+)/", "api-1-content-symbol") +@api_doc("/content/symbol/", tags=["hidden"]) def api_content_symbol(request, q=None): """Search content objects by `Ctags `_-style symbol (e.g., function name, data type, method, ...). """ result = {} - last_sha1 = request.query_params.get('last_sha1', None) - per_page = int(request.query_params.get('per_page', '10')) + last_sha1 = request.query_params.get("last_sha1", None) + per_page = int(request.query_params.get("per_page", "10")) def lookup_exp(exp, last_sha1=last_sha1, per_page=per_page): exp = list(service.lookup_expression(exp, last_sha1, per_page)) return exp if exp else None symbols = api_lookup( - lookup_exp, q, + lookup_exp, + q, notfound_msg="No indexed raw content match expression '{}'.".format(q), enrich_fn=functools.partial(utils.enrich_content, top_url=True), - request=request) + request=request, + ) if symbols: nb_symbols = len(symbols) if nb_symbols == per_page: query_params = {} - new_last_sha1 = symbols[-1]['sha1'] - query_params['last_sha1'] = new_last_sha1 - if request.query_params.get('per_page'): - query_params['per_page'] = per_page - - result['headers'] = { - 'link-next': reverse('api-1-content-symbol', url_args={'q': q}, - query_params=query_params, - request=request) + new_last_sha1 = symbols[-1]["sha1"] + query_params["last_sha1"] = new_last_sha1 + if request.query_params.get("per_page"): + query_params["per_page"] = per_page + + result["headers"] = { + "link-next": reverse( + "api-1-content-symbol", + url_args={"q": q}, + query_params=query_params, + request=request, + ) } - result.update({ - 'results': symbols - }) + result.update({"results": symbols}) return result -@api_route(r'/content/known/search/', 'api-1-content-known', methods=['POST']) -@api_route(r'/content/known/(?P(?!search).*)/', 'api-1-content-known') -@api_doc('/content/known/', tags=['hidden']) +@api_route(r"/content/known/search/", "api-1-content-known", methods=["POST"]) +@api_route(r"/content/known/(?P(?!search).*)/", "api-1-content-known") +@api_doc("/content/known/", tags=["hidden"]) @format_docstring() def api_check_content_known(request, q=None): """ @@ -262,14 +292,15 @@ based on its **sha1** checksum. :param string sha1: hexadecimal representation of the **sha1** checksum value - for the content to check existence. Multiple values can be provided separated - by ','. + for the content to check existence. Multiple values can be provided + separated by ','. {common_headers} - :>json array search_res: array holding the search result for each provided **sha1** - :>json object search_stats: some statistics regarding the number of **sha1** provided - and the percentage of those found in the archive + :>json array search_res: array holding the search result for each provided + **sha1** + :>json object search_stats: some statistics regarding the number of **sha1** + provided and the percentage of those found in the archive :statuscode 200: no error :statuscode 400: an invalid **sha1** has been provided @@ -279,53 +310,52 @@ .. parsed-literal:: :swh_web_api:`content/known/dc2830a9e72f23c1dfebef4413003221baa5fb62,0c3f19cb47ebfbe643fb19fa94c874d18fa62d12/` - """ # noqa - response = {'search_res': None, - 'search_stats': None} - search_stats = {'nbfiles': 0, 'pct': 0} + """ + response = {"search_res": None, "search_stats": None} + search_stats = {"nbfiles": 0, "pct": 0} search_res = None queries = [] # GET: Many hash separated values request if q: - hashes = q.split(',') + hashes = q.split(",") for v in hashes: - queries.append({'filename': None, 'sha1': v}) + queries.append({"filename": None, "sha1": v}) # POST: Many hash requests in post form submission - elif request.method == 'POST': + elif request.method == "POST": data = request.data # Remove potential inputs with no associated value for k, v in data.items(): if v is not None: - if k == 'q' and len(v) > 0: - queries.append({'filename': None, 'sha1': v}) - elif v != '': - queries.append({'filename': k, 'sha1': v}) + if k == "q" and len(v) > 0: + queries.append({"filename": None, "sha1": v}) + elif v != "": + queries.append({"filename": k, "sha1": v}) if queries: lookup = service.lookup_multiple_hashes(queries) result = [] nb_queries = len(queries) for el in lookup: - res_d = {'sha1': el['sha1'], - 'found': el['found']} - if 'filename' in el and el['filename']: - res_d['filename'] = el['filename'] + res_d = {"sha1": el["sha1"], "found": el["found"]} + if "filename" in el and el["filename"]: + res_d["filename"] = el["filename"] result.append(res_d) search_res = result - nbfound = len([x for x in lookup if x['found']]) - search_stats['nbfiles'] = nb_queries - search_stats['pct'] = (nbfound / nb_queries) * 100 + nbfound = len([x for x in lookup if x["found"]]) + search_stats["nbfiles"] = nb_queries + search_stats["pct"] = (nbfound / nb_queries) * 100 - response['search_res'] = search_res - response['search_stats'] = search_stats + response["search_res"] = search_res + response["search_stats"] = search_stats return response -@api_route(r'/content/(?P[0-9a-z_:]*[0-9a-f]+)/', 'api-1-content', - checksum_args=['q']) -@api_doc('/content/') +@api_route( + r"/content/(?P[0-9a-z_:]*[0-9a-f]+)/", "api-1-content", checksum_args=["q"] +) +@api_doc("/content/") @format_docstring() def api_content_metadata(request, q): """ @@ -335,24 +365,29 @@ In the archive, a content object is identified based on checksum values computed using various hashing algorithms. - :param string hash_type: optional parameter specifying which hashing algorithm has been used - to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` - or ``blake2s256``. If that parameter is not provided, it is assumed that the - hashing algorithm used is ``sha1``. - :param string hash: hexadecimal representation of the checksum value computed with - the specified hashing algorithm. + :param string hash_type: optional parameter specifying which hashing algorithm + has been used to compute the content checksum. It can be either ``sha1``, + ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not + provided, it is assumed that the hashing algorithm used is ``sha1``. + :param string hash: hexadecimal representation of the checksum value computed + with the specified hashing algorithm. {common_headers} - :>json object checksums: object holding the computed checksum values for the requested content - :>json string data_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/raw/` + :>json object checksums: object holding the computed checksum values for the + requested content + :>json string data_url: link to + :http:get:`/api/1/content/[(hash_type):](hash)/raw/` for downloading the content raw bytes - :>json string filetype_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/filetype/` + :>json string filetype_url: link to + :http:get:`/api/1/content/[(hash_type):](hash)/filetype/` for getting information about the content MIME type - :>json string language_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/language/` + :>json string language_url: link to + :http:get:`/api/1/content/[(hash_type):](hash)/language/` for getting information about the programming language used in the content :>json number length: length of the content in bytes - :>json string license_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/license/` + :>json string license_url: link to + :http:get:`/api/1/content/[(hash_type):](hash)/license/` for getting information about the license of the content :statuscode 200: no error @@ -363,10 +398,12 @@ .. parsed-literal:: - curl -i :swh_web_api:`content/sha1_git:fe95a46679d128ff167b7c55df5d02356c5a1ae1/` - """ # noqa + :swh_web_api:`content/sha1_git:fe95a46679d128ff167b7c55df5d02356c5a1ae1/` + """ return api_lookup( - service.lookup_content, q, - notfound_msg='Content with {} not found.'.format(q), + service.lookup_content, + q, + notfound_msg="Content with {} not found.".format(q), enrich_fn=functools.partial(utils.enrich_content, query_string=q), - request=request) + request=request, + ) diff --git a/swh/web/api/views/directory.py b/swh/web/api/views/directory.py --- a/swh/web/api/views/directory.py +++ b/swh/web/api/views/directory.py @@ -10,44 +10,55 @@ from swh.web.api.views.utils import api_lookup -@api_route(r'/directory/(?P[0-9a-f]+)/', 'api-1-directory', - checksum_args=['sha1_git']) -@api_route(r'/directory/(?P[0-9a-f]+)/(?P.+)/', - 'api-1-directory', - checksum_args=['sha1_git']) -@api_doc('/directory/') +@api_route( + r"/directory/(?P[0-9a-f]+)/", + "api-1-directory", + checksum_args=["sha1_git"], +) +@api_route( + r"/directory/(?P[0-9a-f]+)/(?P.+)/", + "api-1-directory", + checksum_args=["sha1_git"], +) +@api_doc("/directory/") @format_docstring() def api_directory(request, sha1_git, path=None): """ .. http:get:: /api/1/directory/(sha1_git)/[(path)/] Get information about directory objects. - Directories are identified by **sha1** checksums, compatible with Git directory identifiers. - See :func:`swh.model.identifiers.directory_identifier` in our data model module for details - about how they are computed. + Directories are identified by **sha1** checksums, compatible with Git + directory identifiers. + See :func:`swh.model.identifiers.directory_identifier` in our data model + module for details about how they are computed. - When given only a directory identifier, this endpoint returns information about the directory itself, - returning its content (usually a list of directory entries). When given a directory identifier and a - path, this endpoint returns information about the directory entry pointed by the relative path, - starting path resolution from the given directory. + When given only a directory identifier, this endpoint returns information + about the directory itself, returning its content (usually a list of directory + entries). When given a directory identifier and a path, this endpoint returns + information about the directory entry pointed by the relative path, starting + path resolution from the given directory. - :param string sha1_git: hexadecimal representation of the directory **sha1_git** identifier - :param string path: optional parameter to get information about the directory entry - pointed by that relative path + :param string sha1_git: hexadecimal representation of the directory + **sha1_git** identifier + :param string path: optional parameter to get information about the + directory entry pointed by that relative path {common_headers} - :>jsonarr object checksums: object holding the computed checksum values for a directory entry - (only for file entries) + :>jsonarr object checksums: object holding the computed checksum values for + a directory entry (only for file entries) :>jsonarr string dir_id: **sha1_git** identifier of the requested directory - :>jsonarr number length: length of a directory entry in bytes (only for file entries) - for getting information about the content MIME type + :>jsonarr number length: length of a directory entry in bytes (only for file + entries) for getting information about the content MIME type :>jsonarr string name: the directory entry name :>jsonarr number perms: permissions for the directory entry :>jsonarr string target: **sha1_git** identifier of the directory entry - :>jsonarr string target_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` - or :http:get:`/api/1/directory/(sha1_git)/[(path)/]` depending on the directory entry type - :>jsonarr string type: the type of the directory entry, can be either ``dir``, ``file`` or ``rev`` + :>jsonarr string target_url: link to + :http:get:`/api/1/content/[(hash_type):](hash)/` + or :http:get:`/api/1/directory/(sha1_git)/[(path)/]` + depending on the directory entry type + :>jsonarr string type: the type of the directory entry, can be either + ``dir``, ``file`` or ``rev`` :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided @@ -58,19 +69,25 @@ .. parsed-literal:: :swh_web_api:`directory/977fc4b98c0e85816348cebd3b12026407c368b6/` - """ # noqa + """ if path: - error_msg_path = ('Entry with path %s relative to directory ' - 'with sha1_git %s not found.') % (path, sha1_git) + error_msg_path = ( + "Entry with path %s relative to directory " "with sha1_git %s not found." + ) % (path, sha1_git) return api_lookup( - service.lookup_directory_with_path, sha1_git, path, + service.lookup_directory_with_path, + sha1_git, + path, notfound_msg=error_msg_path, enrich_fn=utils.enrich_directory, - request=request) + request=request, + ) else: - error_msg_nopath = 'Directory with sha1_git %s not found.' % sha1_git + error_msg_nopath = "Directory with sha1_git %s not found." % sha1_git return api_lookup( - service.lookup_directory, sha1_git, + service.lookup_directory, + sha1_git, notfound_msg=error_msg_nopath, enrich_fn=utils.enrich_directory, - request=request) + request=request, + ) diff --git a/swh/web/api/views/identifiers.py b/swh/web/api/views/identifiers.py --- a/swh/web/api/views/identifiers.py +++ b/swh/web/api/views/identifiers.py @@ -8,14 +8,14 @@ from swh.web.common import service from swh.web.common.exc import LargePayloadExc from swh.web.common.identifiers import ( - resolve_swh_persistent_id, get_persistent_identifier, - group_swh_persistent_identifiers + resolve_swh_persistent_id, + get_persistent_identifier, + group_swh_persistent_identifiers, ) -@api_route(r'/resolve/(?P.*)/', - 'api-1-resolve-swh-pid') -@api_doc('/resolve/') +@api_route(r"/resolve/(?P.*)/", "api-1-resolve-swh-pid") +@api_doc("/resolve/") @format_docstring() def api_resolve_swh_pid(request, swh_id): """ @@ -57,20 +57,20 @@ # id is well-formed, now check that the pointed # object is present in the archive, NotFoundExc # will be raised otherwise - swh_id_parsed = swh_id_resolved['swh_id_parsed'] + swh_id_parsed = swh_id_resolved["swh_id_parsed"] object_type = swh_id_parsed.object_type object_id = swh_id_parsed.object_id service.lookup_object(object_type, object_id) # id is well-formed and the pointed object exists swh_id_data = swh_id_parsed._asdict() - swh_id_data['browse_url'] = request.build_absolute_uri( - swh_id_resolved['browse_url']) + swh_id_data["browse_url"] = request.build_absolute_uri( + swh_id_resolved["browse_url"] + ) return swh_id_data -@api_route(r'/known/', - 'api-1-known', methods=['POST']) -@api_doc('/known/') +@api_route(r"/known/", "api-1-known", methods=["POST"]) +@api_doc("/known/") @format_docstring() def api_swh_pid_known(request): """ @@ -100,13 +100,13 @@ """ limit = 1000 if len(request.data) > limit: - raise LargePayloadExc('The maximum number of PIDs this endpoint can ' - 'receive is %s' % limit) + raise LargePayloadExc( + "The maximum number of PIDs this endpoint can " "receive is %s" % limit + ) - persistent_ids = [get_persistent_identifier(pid) - for pid in request.data] + persistent_ids = [get_persistent_identifier(pid) for pid in request.data] - response = {str(pid): {'known': False} for pid in persistent_ids} + response = {str(pid): {"known": False} for pid in persistent_ids} # group pids by their type pids_by_type = group_swh_persistent_identifiers(persistent_ids) @@ -115,6 +115,6 @@ for pid in persistent_ids: if pid.object_id not in missing_hashes: - response[str(pid)]['known'] = True + response[str(pid)]["known"] = True return response diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -16,16 +16,15 @@ from swh.web.api.views.utils import api_lookup -DOC_RETURN_ORIGIN = ''' +DOC_RETURN_ORIGIN = """ :>json string origin_visits_url: link to in order to get information about the visits for that origin :>json string url: the origin canonical url -''' +""" -DOC_RETURN_ORIGIN_ARRAY = \ - DOC_RETURN_ORIGIN.replace(':>json', ':>jsonarr') +DOC_RETURN_ORIGIN_ARRAY = DOC_RETURN_ORIGIN.replace(":>json", ":>jsonarr") -DOC_RETURN_ORIGIN_VISIT = ''' +DOC_RETURN_ORIGIN_VISIT = """ :>json string date: ISO representation of the visit date (in UTC) :>json str origin: the origin canonical url :>json string origin_url: link to get information about the origin @@ -38,21 +37,20 @@ :>json string status: status of the visit (either **full**, **partial** or **ongoing**) :>json number visit: the unique identifier of the visit -''' +""" -DOC_RETURN_ORIGIN_VISIT_ARRAY = \ - DOC_RETURN_ORIGIN_VISIT.replace(':>json', ':>jsonarr') +DOC_RETURN_ORIGIN_VISIT_ARRAY = DOC_RETURN_ORIGIN_VISIT.replace(":>json", ":>jsonarr") -DOC_RETURN_ORIGIN_VISIT_ARRAY += ''' +DOC_RETURN_ORIGIN_VISIT_ARRAY += """ :>jsonarr number id: the unique identifier of the origin :>jsonarr string origin_visit_url: link to :http:get:`/api/1/origin/(origin_url)/visit/(visit_id)/` in order to get information about the visit -''' +""" -@api_route(r'/origins/', 'api-1-origins') -@api_doc('/origins/', noargs=True) +@api_route(r"/origins/", "api-1-origins") +@api_doc("/origins/", noargs=True) @format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY) def api_origins(request): """ @@ -83,29 +81,32 @@ :swh_web_api:`origins?origin_count=500` """ - origin_from = int(request.query_params.get('origin_from', '1')) - origin_count = int(request.query_params.get('origin_count', '100')) + origin_from = int(request.query_params.get("origin_from", "1")) + origin_count = int(request.query_params.get("origin_count", "100")) origin_count = min(origin_count, 10000) results = api_lookup( - service.lookup_origins, origin_from, origin_count+1, + service.lookup_origins, + origin_from, + origin_count + 1, enrich_fn=enrich_origin, - request=request) - response = {'results': results, 'headers': {}} + request=request, + ) + response = {"results": results, "headers": {}} if len(results) > origin_count: - origin_from = results.pop()['id'] - response['headers']['link-next'] = reverse( - 'api-1-origins', - query_params={'origin_from': origin_from, - 'origin_count': origin_count}, - request=request) + origin_from = results.pop()["id"] + response["headers"]["link-next"] = reverse( + "api-1-origins", + query_params={"origin_from": origin_from, "origin_count": origin_count}, + request=request, + ) for result in results: - if 'id' in result: - del result['id'] + if "id" in result: + del result["id"] return response -@api_route(r'/origin/(?P.+)/get/', 'api-1-origin') -@api_doc('/origin/') +@api_route(r"/origin/(?P.+)/get/", "api-1-origin") +@api_doc("/origin/") @format_docstring(return_origin=DOC_RETURN_ORIGIN) def api_origin(request, origin_url): """ @@ -129,23 +130,25 @@ :swh_web_api:`origin/https://github.com/python/cpython/get/` """ - ori_dict = { - 'url': origin_url - } + ori_dict = {"url": origin_url} - error_msg = 'Origin with url %s not found.' % ori_dict['url'] + error_msg = "Origin with url %s not found." % ori_dict["url"] return api_lookup( - service.lookup_origin, ori_dict, + service.lookup_origin, + ori_dict, notfound_msg=error_msg, enrich_fn=enrich_origin, - request=request) + request=request, + ) -@api_route(r'/origin/search/(?P.+)/', - 'api-1-origin-search', - throttle_scope='swh_api_origin_search') -@api_doc('/origin/search/') +@api_route( + r"/origin/search/(?P.+)/", + "api-1-origin-search", + throttle_scope="swh_api_origin_search", +) +@api_doc("/origin/search/") @format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY) def api_origin_search(request, url_pattern): """ @@ -182,37 +185,41 @@ :swh_web_api:`origin/search/python/?limit=2` """ result = {} - limit = min(int(request.query_params.get('limit', '70')), 1000) - page_token = request.query_params.get('page_token') - with_visit = request.query_params.get('with_visit', 'false') + limit = min(int(request.query_params.get("limit", "70")), 1000) + page_token = request.query_params.get("page_token") + with_visit = request.query_params.get("with_visit", "false") (results, page_token) = api_lookup( - service.search_origin, url_pattern, limit, - bool(strtobool(with_visit)), page_token, - enrich_fn=enrich_origin, request=request) + service.search_origin, + url_pattern, + limit, + bool(strtobool(with_visit)), + page_token, + enrich_fn=enrich_origin, + request=request, + ) if page_token is not None: query_params = {} - query_params['limit'] = limit - query_params['page_token'] = page_token - - result['headers'] = { - 'link-next': reverse('api-1-origin-search', - url_args={'url_pattern': url_pattern}, - query_params=query_params, - request=request) + query_params["limit"] = limit + query_params["page_token"] = page_token + + result["headers"] = { + "link-next": reverse( + "api-1-origin-search", + url_args={"url_pattern": url_pattern}, + query_params=query_params, + request=request, + ) } - result.update({ - 'results': results - }) + result.update({"results": results}) return result -@api_route(r'/origin/metadata-search/', - 'api-1-origin-metadata-search') -@api_doc('/origin/metadata-search/', noargs=True, need_params=True) +@api_route(r"/origin/metadata-search/", "api-1-origin-metadata-search") +@api_doc("/origin/metadata-search/", noargs=True, need_params=True) @format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY) def api_origin_metadata_search(request): """ @@ -240,25 +247,25 @@ :swh_web_api:`origin/metadata-search/?limit=2&fulltext=Jane%20Doe` """ - fulltext = request.query_params.get('fulltext', None) - limit = min(int(request.query_params.get('limit', '70')), 100) + fulltext = request.query_params.get("fulltext", None) + limit = min(int(request.query_params.get("limit", "70")), 100) if not fulltext: content = '"fulltext" must be provided and non-empty.' raise BadInputExc(content) - results = api_lookup(service.search_origin_metadata, fulltext, limit, - request=request) + results = api_lookup( + service.search_origin_metadata, fulltext, limit, request=request + ) return { - 'results': results, + "results": results, } -@api_route(r'/origin/(?P.*)/visits/', 'api-1-origin-visits') -@api_doc('/origin/visits/') -@format_docstring( - return_origin_visit_array=DOC_RETURN_ORIGIN_VISIT_ARRAY) +@api_route(r"/origin/(?P.*)/visits/", "api-1-origin-visits") +@api_doc("/origin/visits/") +@format_docstring(return_origin_visit_array=DOC_RETURN_ORIGIN_VISIT_ARRAY) def api_origin_visits(request, origin_url): """ .. http:get:: /api/1/origin/(origin_url)/visits/ @@ -289,16 +296,15 @@ """ result = {} - origin_query = {'url': origin_url} - notfound_msg = 'No origin {} found'.format(origin_url) - url_args_next = {'origin_url': origin_url} - per_page = int(request.query_params.get('per_page', '10')) - last_visit = request.query_params.get('last_visit') + origin_query = {"url": origin_url} + notfound_msg = "No origin {} found".format(origin_url) + url_args_next = {"origin_url": origin_url} + per_page = int(request.query_params.get("per_page", "10")) + last_visit = request.query_params.get("last_visit") if last_visit: last_visit = int(last_visit) - def _lookup_origin_visits( - origin_query, last_visit=last_visit, per_page=per_page): + def _lookup_origin_visits(origin_query, last_visit=last_visit, per_page=per_page): all_visits = get_origin_visits(origin_query) all_visits.reverse() visits = [] @@ -306,47 +312,52 @@ visits = all_visits[:per_page] else: for i, v in enumerate(all_visits): - if v['visit'] == last_visit: - visits = all_visits[i+1:i+1+per_page] + if v["visit"] == last_visit: + visits = all_visits[i + 1 : i + 1 + per_page] break for v in visits: yield v - results = api_lookup(_lookup_origin_visits, origin_query, - notfound_msg=notfound_msg, - enrich_fn=partial(enrich_origin_visit, - with_origin_link=False, - with_origin_visit_link=True), - request=request) + results = api_lookup( + _lookup_origin_visits, + origin_query, + notfound_msg=notfound_msg, + enrich_fn=partial( + enrich_origin_visit, with_origin_link=False, with_origin_visit_link=True + ), + request=request, + ) if results: nb_results = len(results) if nb_results == per_page: - new_last_visit = results[-1]['visit'] + new_last_visit = results[-1]["visit"] query_params = {} - query_params['last_visit'] = new_last_visit - - if request.query_params.get('per_page'): - query_params['per_page'] = per_page - - result['headers'] = { - 'link-next': reverse('api-1-origin-visits', - url_args=url_args_next, - query_params=query_params, - request=request) + query_params["last_visit"] = new_last_visit + + if request.query_params.get("per_page"): + query_params["per_page"] = per_page + + result["headers"] = { + "link-next": reverse( + "api-1-origin-visits", + url_args=url_args_next, + query_params=query_params, + request=request, + ) } - result.update({ - 'results': results - }) + result.update({"results": results}) return result -@api_route(r'/origin/(?P.*)/visit/latest/', - 'api-1-origin-visit-latest', - throttle_scope='swh_api_origin_visit_latest') -@api_doc('/origin/visit/latest/') +@api_route( + r"/origin/(?P.*)/visit/latest/", + "api-1-origin-visit-latest", + throttle_scope="swh_api_origin_visit_latest", +) +@api_doc("/origin/visit/latest/") @format_docstring(return_origin_visit=DOC_RETURN_ORIGIN_VISIT) def api_origin_visit_latest(request, origin_url=None): """ @@ -372,21 +383,23 @@ :swh_web_api:`origin/https://github.com/hylang/hy/visit/latest/` """ - require_snapshot = request.query_params.get('require_snapshot', 'false') + require_snapshot = request.query_params.get("require_snapshot", "false") return api_lookup( - service.lookup_origin_visit_latest, origin_url, + service.lookup_origin_visit_latest, + origin_url, bool(strtobool(require_snapshot)), - notfound_msg=('No visit for origin {} found' - .format(origin_url)), - enrich_fn=partial(enrich_origin_visit, - with_origin_link=True, - with_origin_visit_link=False), - request=request) - - -@api_route(r'/origin/(?P.*)/visit/(?P[0-9]+)/', - 'api-1-origin-visit') -@api_doc('/origin/visit/') + notfound_msg=("No visit for origin {} found".format(origin_url)), + enrich_fn=partial( + enrich_origin_visit, with_origin_link=True, with_origin_visit_link=False + ), + request=request, + ) + + +@api_route( + r"/origin/(?P.*)/visit/(?P[0-9]+)/", "api-1-origin-visit" +) +@api_doc("/origin/visit/") @format_docstring(return_origin_visit=DOC_RETURN_ORIGIN_VISIT) def api_origin_visit(request, visit_id, origin_url): """ @@ -412,18 +425,21 @@ :swh_web_api:`origin/https://github.com/hylang/hy/visit/1/` """ return api_lookup( - service.lookup_origin_visit, origin_url, int(visit_id), - notfound_msg=('No visit {} for origin {} found' - .format(visit_id, origin_url)), - enrich_fn=partial(enrich_origin_visit, - with_origin_link=True, - with_origin_visit_link=False), - request=request) - - -@api_route(r'/origin/(?P.+)' - '/intrinsic-metadata', 'api-origin-intrinsic-metadata') -@api_doc('/origin/intrinsic-metadata/') + service.lookup_origin_visit, + origin_url, + int(visit_id), + notfound_msg=("No visit {} for origin {} found".format(visit_id, origin_url)), + enrich_fn=partial( + enrich_origin_visit, with_origin_link=True, with_origin_visit_link=False + ), + request=request, + ) + + +@api_route( + r"/origin/(?P.+)" "/intrinsic-metadata", "api-origin-intrinsic-metadata" +) +@api_doc("/origin/intrinsic-metadata/") @format_docstring() def api_origin_intrinsic_metadata(request, origin_url): """ @@ -445,15 +461,15 @@ .. parsed-literal:: :swh_web_api:`origin/https://github.com/python/cpython/intrinsic-metadata` - """ # noqa - ori_dict = { - 'url': origin_url - } + """ + ori_dict = {"url": origin_url} - error_msg = 'Origin with url %s not found' % ori_dict['url'] + error_msg = "Origin with url %s not found" % ori_dict["url"] return api_lookup( - service.lookup_origin_intrinsic_metadata, ori_dict, + service.lookup_origin_intrinsic_metadata, + ori_dict, notfound_msg=error_msg, enrich_fn=enrich_origin, - request=request) + request=request, + ) diff --git a/swh/web/api/views/origin_save.py b/swh/web/api/views/origin_save.py --- a/swh/web/api/views/origin_save.py +++ b/swh/web/api/views/origin_save.py @@ -8,15 +8,19 @@ from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route from swh.web.common.origin_save import ( - create_save_origin_request, get_save_origin_requests + create_save_origin_request, + get_save_origin_requests, ) -@api_route(r'/origin/save/(?P.+)/url/(?P.+)/', - 'api-1-save-origin', methods=['GET', 'POST'], - throttle_scope='swh_save_origin') +@api_route( + r"/origin/save/(?P.+)/url/(?P.+)/", + "api-1-save-origin", + methods=["GET", "POST"], + throttle_scope="swh_save_origin", +) @never_cache -@api_doc('/origin/save/') +@api_doc("/origin/save/") @format_docstring() def api_save_origin(request, visit_type, origin_url): """ @@ -74,12 +78,12 @@ :statuscode 404: no save requests have been found for a given origin """ - if request.method == 'POST': + if request.method == "POST": sor = create_save_origin_request(visit_type, origin_url) - del sor['id'] + del sor["id"] else: sor = get_save_origin_requests(visit_type, origin_url) for s in sor: - del s['id'] + del s["id"] return sor diff --git a/swh/web/api/views/ping.py b/swh/web/api/views/ping.py --- a/swh/web/api/views/ping.py +++ b/swh/web/api/views/ping.py @@ -7,8 +7,8 @@ from swh.web.api.apiurls import api_route -@api_route(r'/ping/', 'api-1-ping') -@api_doc('/ping/', noargs=True) +@api_route(r"/ping/", "api-1-ping") +@api_doc("/ping/", noargs=True) def ping(request): """ .. http:get:: /api/1/ping/ @@ -18,4 +18,4 @@ :statuscode 200: no error """ - return 'pong' + return "pong" diff --git a/swh/web/api/views/release.py b/swh/web/api/views/release.py --- a/swh/web/api/views/release.py +++ b/swh/web/api/views/release.py @@ -10,9 +10,10 @@ from swh.web.api.views.utils import api_lookup -@api_route(r'/release/(?P[0-9a-f]+)/', 'api-1-release', - checksum_args=['sha1_git']) -@api_doc('/release/') +@api_route( + r"/release/(?P[0-9a-f]+)/", "api-1-release", checksum_args=["sha1_git"] +) +@api_doc("/release/") @format_docstring() def api_release(request, sha1_git): """ @@ -49,9 +50,11 @@ :swh_web_api:`release/208f61cc7a5dbc9879ae6e5c2f95891e270f09ef/` """ - error_msg = 'Release with sha1_git %s not found.' % sha1_git + error_msg = "Release with sha1_git %s not found." % sha1_git return api_lookup( - service.lookup_release, sha1_git, + service.lookup_release, + sha1_git, notfound_msg=error_msg, enrich_fn=utils.enrich_release, - request=request) + request=request, + ) diff --git a/swh/web/api/views/revision.py b/swh/web/api/views/revision.py --- a/swh/web/api/views/revision.py +++ b/swh/web/api/views/revision.py @@ -13,7 +13,7 @@ from swh.web.api.views.utils import api_lookup -DOC_RETURN_REVISION = ''' +DOC_RETURN_REVISION = """ :>json object author: information about the author of the revision :>json object committer: information about the committer of the revision @@ -34,14 +34,12 @@ :http:get:`/api/1/revision/(sha1_git)/` to get more information about it :>json string type: the type of the revision -''' # noqa +""" -DOC_RETURN_REVISION_ARRAY = \ - DOC_RETURN_REVISION.replace(':>json', ':>jsonarr') +DOC_RETURN_REVISION_ARRAY = DOC_RETURN_REVISION.replace(":>json", ":>jsonarr") -def _revision_directory_by(revision, path, request_path, - limit=100, with_data=False): +def _revision_directory_by(revision, path, request_path, limit=100, with_data=False): """ Compute the revision matching criterion's directory or content data. @@ -56,26 +54,29 @@ to a content. """ + def enrich_directory_local(dir, context_url=request_path): return utils.enrich_directory(dir, context_url) rev_id, result = service.lookup_directory_through_revision( - revision, path, limit=limit, with_data=with_data) + revision, path, limit=limit, with_data=with_data + ) - content = result['content'] - if result['type'] == 'dir': # dir_entries - result['content'] = list(map(enrich_directory_local, content)) - elif result['type'] == 'file': # content - result['content'] = utils.enrich_content(content) - elif result['type'] == 'rev': # revision - result['content'] = utils.enrich_revision(content) + content = result["content"] + if result["type"] == "dir": # dir_entries + result["content"] = list(map(enrich_directory_local, content)) + elif result["type"] == "file": # content + result["content"] = utils.enrich_content(content) + elif result["type"] == "rev": # revision + result["content"] = utils.enrich_revision(content) return result -@api_route(r'/revision/(?P[0-9a-f]+)/', 'api-1-revision', - checksum_args=['sha1_git']) -@api_doc('/revision/') +@api_route( + r"/revision/(?P[0-9a-f]+)/", "api-1-revision", checksum_args=["sha1_git"] +) +@api_doc("/revision/") @format_docstring(return_revision=DOC_RETURN_REVISION) def api_revision(request, sha1_git): """ @@ -103,37 +104,44 @@ .. parsed-literal:: :swh_web_api:`revision/aafb16d69fd30ff58afdd69036a26047f3aebdc6/` - """ # noqa + """ return api_lookup( - service.lookup_revision, sha1_git, - notfound_msg='Revision with sha1_git {} not found.'.format(sha1_git), + service.lookup_revision, + sha1_git, + notfound_msg="Revision with sha1_git {} not found.".format(sha1_git), enrich_fn=utils.enrich_revision, - request=request) + request=request, + ) -@api_route(r'/revision/(?P[0-9a-f]+)/raw/', - 'api-1-revision-raw-message', checksum_args=['sha1_git']) -@api_doc('/revision/raw/', tags=['hidden'], handle_response=True) +@api_route( + r"/revision/(?P[0-9a-f]+)/raw/", + "api-1-revision-raw-message", + checksum_args=["sha1_git"], +) +@api_doc("/revision/raw/", tags=["hidden"], handle_response=True) def api_revision_raw_message(request, sha1_git): """Return the raw data of the message of revision identified by sha1_git """ raw = service.lookup_revision_message(sha1_git) - response = HttpResponse(raw['message'], - content_type='application/octet-stream') - response['Content-disposition'] = \ - 'attachment;filename=rev_%s_raw' % sha1_git + response = HttpResponse(raw["message"], content_type="application/octet-stream") + response["Content-disposition"] = "attachment;filename=rev_%s_raw" % sha1_git return response -@api_route(r'/revision/(?P[0-9a-f]+)/directory/', - 'api-1-revision-directory', checksum_args=['sha1_git']) -@api_route(r'/revision/(?P[0-9a-f]+)/directory/(?P.+)/', - 'api-1-revision-directory', checksum_args=['sha1_git']) -@api_doc('/revision/directory/') +@api_route( + r"/revision/(?P[0-9a-f]+)/directory/", + "api-1-revision-directory", + checksum_args=["sha1_git"], +) +@api_route( + r"/revision/(?P[0-9a-f]+)/directory/(?P.+)/", + "api-1-revision-directory", + checksum_args=["sha1_git"], +) +@api_doc("/revision/directory/") @format_docstring() -def api_revision_directory(request, sha1_git, - dir_path=None, - with_data=False): +def api_revision_directory(request, sha1_git, dir_path=None, with_data=False): """ .. http:get:: /api/1/revision/(sha1_git)/directory/[(path)/] @@ -142,13 +150,15 @@ This endpoint behaves like :http:get:`/api/1/directory/(sha1_git)/[(path)/]`, but operates on the root directory associated to a given revision. - :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier - :param string path: optional parameter to get information about the directory entry - pointed by that relative path + :param string sha1_git: hexadecimal representation of the revision + **sha1_git** identifier + :param string path: optional parameter to get information about the + directory entry pointed by that relative path {common_headers} - :>json array content: directory entries as returned by :http:get:`/api/1/directory/(sha1_git)/[(path)/]` + :>json array content: directory entries as returned by + :http:get:`/api/1/directory/(sha1_git)/[(path)/]` :>json string path: path of directory from the revision root one :>json string revision: the unique revision identifier :>json string type: the type of the directory @@ -162,30 +172,39 @@ .. parsed-literal:: :swh_web_api:`revision/f1b94134a4b879bc55c3dacdb496690c8ebdc03f/directory/` - """ # noqa - return _revision_directory_by({'sha1_git': sha1_git}, - dir_path, request.path, - with_data=with_data) - - -@api_route(r'/revision/(?P[0-9a-f]+)/log/', 'api-1-revision-log', - checksum_args=['sha1_git']) -@api_route(r'/revision/(?P[0-9a-f]+)' - r'/prev/(?P[0-9a-f]*/*)/log/', - 'api-1-revision-log', checksum_args=['sha1_git', 'prev_sha1s']) -@api_doc('/revision/log/') + """ + return _revision_directory_by( + {"sha1_git": sha1_git}, dir_path, request.path, with_data=with_data + ) + + +@api_route( + r"/revision/(?P[0-9a-f]+)/log/", + "api-1-revision-log", + checksum_args=["sha1_git"], +) +@api_route( + r"/revision/(?P[0-9a-f]+)" r"/prev/(?P[0-9a-f]*/*)/log/", + "api-1-revision-log", + checksum_args=["sha1_git", "prev_sha1s"], +) +@api_doc("/revision/log/") @format_docstring(return_revision_array=DOC_RETURN_REVISION_ARRAY) def api_revision_log(request, sha1_git, prev_sha1s=None): """ .. http:get:: /api/1/revision/(sha1_git)[/prev/(prev_sha1s)]/log/ - Get a list of all revisions heading to a given one, in other words show the commit log. + Get a list of all revisions heading to a given one, in other words show + the commit log. - :param string sha1_git: hexadecimal representation of the revision **sha1_git** identifier - :param string prev_sha1s: optional parameter representing the navigation breadcrumbs - (descendant revisions previously visited). If multiple values, use / as delimiter. - If provided, revisions information will be added at the beginning of the returned list. - :query int per_page: number of elements in the returned list, for pagination purpose + :param string sha1_git: hexadecimal representation of the revision + **sha1_git** identifier + :param string prev_sha1s: optional parameter representing the navigation + breadcrumbs (descendant revisions previously visited). If multiple values, + use / as delimiter. If provided, revisions information will be added at + the beginning of the returned list. + :query int per_page: number of elements in the returned list, for pagination + purpose {common_headers} {resheader_link} @@ -201,33 +220,38 @@ .. parsed-literal:: :swh_web_api:`revision/e1a315fa3fa734e2a6154ed7b5b9ae0eb8987aad/log/` - """ # noqa + """ result = {} - per_page = int(request.query_params.get('per_page', '10')) + per_page = int(request.query_params.get("per_page", "10")) - def lookup_revision_log_with_limit(s, limit=per_page+1): + def lookup_revision_log_with_limit(s, limit=per_page + 1): return service.lookup_revision_log(s, limit) - error_msg = 'Revision with sha1_git %s not found.' % sha1_git - rev_get = api_lookup(lookup_revision_log_with_limit, sha1_git, - notfound_msg=error_msg, - enrich_fn=utils.enrich_revision, - request=request) + error_msg = "Revision with sha1_git %s not found." % sha1_git + rev_get = api_lookup( + lookup_revision_log_with_limit, + sha1_git, + notfound_msg=error_msg, + enrich_fn=utils.enrich_revision, + request=request, + ) nb_rev = len(rev_get) - if nb_rev == per_page+1: + if nb_rev == per_page + 1: rev_backward = rev_get[:-1] - new_last_sha1 = rev_get[-1]['id'] + new_last_sha1 = rev_get[-1]["id"] query_params = {} - if request.query_params.get('per_page'): - query_params['per_page'] = per_page + if request.query_params.get("per_page"): + query_params["per_page"] = per_page - result['headers'] = { - 'link-next': reverse('api-1-revision-log', - url_args={'sha1_git': new_last_sha1}, - query_params=query_params, - request=request) + result["headers"] = { + "link-next": reverse( + "api-1-revision-log", + url_args={"sha1_git": new_last_sha1}, + query_params=query_params, + request=request, + ) } else: @@ -237,15 +261,15 @@ revisions = rev_backward else: - rev_forward_ids = prev_sha1s.split('/') + rev_forward_ids = prev_sha1s.split("/") rev_forward = api_lookup( - service.lookup_revision_multiple, rev_forward_ids, + service.lookup_revision_multiple, + rev_forward_ids, notfound_msg=error_msg, enrich_fn=utils.enrich_revision, - request=request) + request=request, + ) revisions = rev_forward + rev_backward - result.update({ - 'results': revisions - }) + result.update({"results": revisions}) return result diff --git a/swh/web/api/views/snapshot.py b/swh/web/api/views/snapshot.py --- a/swh/web/api/views/snapshot.py +++ b/swh/web/api/views/snapshot.py @@ -12,9 +12,12 @@ from swh.web.api.views.utils import api_lookup -@api_route(r'/snapshot/(?P[0-9a-f]+)/', 'api-1-snapshot', - checksum_args=['snapshot_id']) -@api_doc('/snapshot/') +@api_route( + r"/snapshot/(?P[0-9a-f]+)/", + "api-1-snapshot", + checksum_args=["snapshot_id"], +) +@api_doc("/snapshot/") @format_docstring() def api_snapshot(request, snapshot_id): """ @@ -65,30 +68,36 @@ :swh_web_api:`snapshot/6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a/` """ - snapshot_content_max_size = get_config()['snapshot_content_max_size'] + snapshot_content_max_size = get_config()["snapshot_content_max_size"] - branches_from = request.GET.get('branches_from', '') - branches_count = int(request.GET.get('branches_count', - snapshot_content_max_size)) - target_types = request.GET.get('target_types', None) - target_types = target_types.split(',') if target_types else None + branches_from = request.GET.get("branches_from", "") + branches_count = int(request.GET.get("branches_count", snapshot_content_max_size)) + target_types = request.GET.get("target_types", None) + target_types = target_types.split(",") if target_types else None results = api_lookup( - service.lookup_snapshot, snapshot_id, branches_from, - branches_count, target_types, - notfound_msg='Snapshot with id {} not found.'.format(snapshot_id), + service.lookup_snapshot, + snapshot_id, + branches_from, + branches_count, + target_types, + notfound_msg="Snapshot with id {} not found.".format(snapshot_id), enrich_fn=enrich_snapshot, - request=request) - - response = {'results': results, 'headers': {}} - - if results['next_branch'] is not None: - response['headers']['link-next'] = reverse( - 'api-1-snapshot', - url_args={'snapshot_id': snapshot_id}, - query_params={'branches_from': results['next_branch'], - 'branches_count': branches_count, - 'target_types': target_types}, - request=request) + request=request, + ) + + response = {"results": results, "headers": {}} + + if results["next_branch"] is not None: + response["headers"]["link-next"] = reverse( + "api-1-snapshot", + url_args={"snapshot_id": snapshot_id}, + query_params={ + "branches_from": results["next_branch"], + "branches_count": branches_count, + "target_types": target_types, + }, + request=request, + ) return response diff --git a/swh/web/api/views/stat.py b/swh/web/api/views/stat.py --- a/swh/web/api/views/stat.py +++ b/swh/web/api/views/stat.py @@ -8,8 +8,8 @@ from swh.web.api.apiurls import api_route -@api_route(r'/stat/counters/', 'api-1-stat-counters') -@api_doc('/stat/counters/', noargs=True) +@api_route(r"/stat/counters/", "api-1-stat-counters") +@api_doc("/stat/counters/", noargs=True) @format_docstring() def api_stats(request): """ diff --git a/swh/web/api/views/utils.py b/swh/web/api/views/utils.py --- a/swh/web/api/views/utils.py +++ b/swh/web/api/views/utils.py @@ -17,14 +17,19 @@ class EnrichFunction(Protocol): - def __call__(self, input: Mapping[str, str], - request: Optional[HttpRequest]) -> Dict[str, str]: ... - - -def api_lookup(lookup_fn: Callable[..., Any], *args: Any, - notfound_msg: Optional[str] = 'Object not found', - enrich_fn: Optional[EnrichFunction] = None, - request: Optional[HttpRequest] = None): + def __call__( + self, input: Mapping[str, str], request: Optional[HttpRequest] + ) -> Dict[str, str]: + ... + + +def api_lookup( + lookup_fn: Callable[..., Any], + *args: Any, + notfound_msg: Optional[str] = "Object not found", + enrich_fn: Optional[EnrichFunction] = None, + request: Optional[HttpRequest] = None, +): r""" Capture a redundant behavior of: - looking up the backend with a criteria (be it an identifier or @@ -53,8 +58,12 @@ NotFoundExp or whatever `lookup_fn` raises. """ + + def _enrich_fn_noop(x, request): + return x + if enrich_fn is None: - enrich_fn = (lambda x, request: x) + enrich_fn = _enrich_fn_noop res = lookup_fn(*args) if res is None: raise NotFoundExc(notfound_msg) @@ -63,24 +72,22 @@ return enrich_fn(res, request=request) -@api_view(['GET', 'HEAD']) +@api_view(["GET", "HEAD"]) def api_home(request): - return Response({}, template_name='api/api.html') + return Response({}, template_name="api/api.html") -APIUrls.add_url_pattern(r'^$', api_home, view_name='api-1-homepage') +APIUrls.add_url_pattern(r"^$", api_home, view_name="api-1-homepage") -@api_route(r'/', 'api-1-endpoints') +@api_route(r"/", "api-1-endpoints") def api_endpoints(request): """Display the list of opened api endpoints. """ routes = APIUrls.get_app_endpoints().copy() for route, doc in routes.items(): - doc['doc_intro'] = doc['docstring'].split('\n\n')[0] + doc["doc_intro"] = doc["docstring"].split("\n\n")[0] # Return a list of routes with consistent ordering - env = { - 'doc_routes': sorted(routes.items()) - } + env = {"doc_routes": sorted(routes.items())} return Response(env, template_name="api/endpoints.html") diff --git a/swh/web/api/views/vault.py b/swh/web/api/views/vault.py --- a/swh/web/api/views/vault.py +++ b/swh/web/api/views/vault.py @@ -18,28 +18,36 @@ # XXX: a bit spaghetti. Would be better with class-based views. def _dispatch_cook_progress(request, obj_type, obj_id): hex_id = hashutil.hash_to_hex(obj_id) - object_name = obj_type.split('_')[0].title() - if request.method == 'GET': + object_name = obj_type.split("_")[0].title() + if request.method == "GET": return api_lookup( - service.vault_progress, obj_type, obj_id, - notfound_msg=("{} '{}' was never requested." - .format(object_name, hex_id)), - request=request) - elif request.method == 'POST': - email = request.POST.get('email', request.GET.get('email', None)) + service.vault_progress, + obj_type, + obj_id, + notfound_msg=("{} '{}' was never requested.".format(object_name, hex_id)), + request=request, + ) + elif request.method == "POST": + email = request.POST.get("email", request.GET.get("email", None)) return api_lookup( - service.vault_cook, obj_type, obj_id, email, - notfound_msg=("{} '{}' not found." - .format(object_name, hex_id)), - request=request) - - -@api_route(r'/vault/directory/(?P[0-9a-f]+)/', - 'api-1-vault-cook-directory', methods=['GET', 'POST'], - checksum_args=['dir_id'], - throttle_scope='swh_vault_cooking') + service.vault_cook, + obj_type, + obj_id, + email, + notfound_msg=("{} '{}' not found.".format(object_name, hex_id)), + request=request, + ) + + +@api_route( + r"/vault/directory/(?P[0-9a-f]+)/", + "api-1-vault-cook-directory", + methods=["GET", "POST"], + checksum_args=["dir_id"], + throttle_scope="swh_vault_cooking", +) @never_cache -@api_doc('/vault/directory/') +@api_doc("/vault/directory/") @format_docstring() def api_vault_cook_directory(request, dir_id): """ @@ -86,18 +94,22 @@ (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( - dir_id, ['sha1'], 'Only sha1_git is supported.') + dir_id, ["sha1"], "Only sha1_git is supported." + ) - res = _dispatch_cook_progress(request, 'directory', obj_id) - res['fetch_url'] = reverse('api-1-vault-fetch-directory', - url_args={'dir_id': dir_id}) + res = _dispatch_cook_progress(request, "directory", obj_id) + res["fetch_url"] = reverse( + "api-1-vault-fetch-directory", url_args={"dir_id": dir_id} + ) return res -@api_route(r'/vault/directory/(?P[0-9a-f]+)/raw/', - 'api-1-vault-fetch-directory', - checksum_args=['dir_id']) -@api_doc('/vault/directory/raw/', handle_response=True) +@api_route( + r"/vault/directory/(?P[0-9a-f]+)/raw/", + "api-1-vault-fetch-directory", + checksum_args=["dir_id"], +) +@api_doc("/vault/directory/raw/", handle_response=True) def api_vault_fetch_directory(request, dir_id): """ .. http:get:: /api/1/vault/directory/(dir_id)/raw/ @@ -118,23 +130,30 @@ (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( - dir_id, ['sha1'], 'Only sha1_git is supported.') + dir_id, ["sha1"], "Only sha1_git is supported." + ) res = api_lookup( - service.vault_fetch, 'directory', obj_id, + service.vault_fetch, + "directory", + obj_id, notfound_msg="Directory with ID '{}' not found.".format(dir_id), - request=request) - fname = '{}.tar.gz'.format(dir_id) - response = HttpResponse(res, content_type='application/gzip') - response['Content-disposition'] = 'attachment; filename={}'.format(fname) + request=request, + ) + fname = "{}.tar.gz".format(dir_id) + response = HttpResponse(res, content_type="application/gzip") + response["Content-disposition"] = "attachment; filename={}".format(fname) return response -@api_route(r'/vault/revision/(?P[0-9a-f]+)/gitfast/', - 'api-1-vault-cook-revision_gitfast', methods=['GET', 'POST'], - checksum_args=['rev_id'], - throttle_scope='swh_vault_cooking') +@api_route( + r"/vault/revision/(?P[0-9a-f]+)/gitfast/", + "api-1-vault-cook-revision_gitfast", + methods=["GET", "POST"], + checksum_args=["rev_id"], + throttle_scope="swh_vault_cooking", +) @never_cache -@api_doc('/vault/revision/gitfast/') +@api_doc("/vault/revision/gitfast/") @format_docstring() def api_vault_cook_revision_gitfast(request, rev_id): """ @@ -182,18 +201,22 @@ (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( - rev_id, ['sha1'], 'Only sha1_git is supported.') + rev_id, ["sha1"], "Only sha1_git is supported." + ) - res = _dispatch_cook_progress(request, 'revision_gitfast', obj_id) - res['fetch_url'] = reverse('api-1-vault-fetch-revision_gitfast', - url_args={'rev_id': rev_id}) + res = _dispatch_cook_progress(request, "revision_gitfast", obj_id) + res["fetch_url"] = reverse( + "api-1-vault-fetch-revision_gitfast", url_args={"rev_id": rev_id} + ) return res -@api_route(r'/vault/revision/(?P[0-9a-f]+)/gitfast/raw/', - 'api-1-vault-fetch-revision_gitfast', - checksum_args=['rev_id']) -@api_doc('/vault/revision/gitfast/raw/', handle_response=True) +@api_route( + r"/vault/revision/(?P[0-9a-f]+)/gitfast/raw/", + "api-1-vault-fetch-revision_gitfast", + checksum_args=["rev_id"], +) +@api_doc("/vault/revision/gitfast/raw/", handle_response=True) def api_vault_fetch_revision_gitfast(request, rev_id): """ .. http:get:: /api/1/vault/revision/(rev_id)/gitfast/raw/ @@ -214,27 +237,34 @@ (in case of POST) """ _, obj_id = query.parse_hash_with_algorithms_or_throws( - rev_id, ['sha1'], 'Only sha1_git is supported.') + rev_id, ["sha1"], "Only sha1_git is supported." + ) res = api_lookup( - service.vault_fetch, 'revision_gitfast', obj_id, + service.vault_fetch, + "revision_gitfast", + obj_id, notfound_msg="Revision with ID '{}' not found.".format(rev_id), - request=request) - fname = '{}.gitfast.gz'.format(rev_id) - response = HttpResponse(res, content_type='application/gzip') - response['Content-disposition'] = 'attachment; filename={}'.format(fname) + request=request, + ) + fname = "{}.gitfast.gz".format(rev_id) + response = HttpResponse(res, content_type="application/gzip") + response["Content-disposition"] = "attachment; filename={}".format(fname) return response -@api_route(r'/vault/revision_gitfast/(?P[0-9a-f]+)/raw/', - 'api-1-vault-revision_gitfast-raw', - checksum_args=['rev_id']) -@api_doc('/vault/revision_gitfast/raw/', tags=['hidden'], handle_response=True) +@api_route( + r"/vault/revision_gitfast/(?P[0-9a-f]+)/raw/", + "api-1-vault-revision_gitfast-raw", + checksum_args=["rev_id"], +) +@api_doc("/vault/revision_gitfast/raw/", tags=["hidden"], handle_response=True) def _api_vault_revision_gitfast_raw(request, rev_id): """ The vault backend sends an email containing an invalid url to fetch a gitfast archive. So setup a redirection to the correct one as a temporary workaround. """ - rev_gitfast_raw_url = reverse('api-1-vault-fetch-revision_gitfast', - url_args={'rev_id': rev_id}) + rev_gitfast_raw_url = reverse( + "api-1-vault-fetch-revision_gitfast", url_args={"rev_id": rev_id} + ) return redirect(rev_gitfast_raw_url) diff --git a/swh/web/auth/backends.py b/swh/web/auth/backends.py --- a/swh/web/auth/backends.py +++ b/swh/web/auth/backends.py @@ -28,22 +28,24 @@ # compute an integer user identifier for Django User model # by concatenating all groups of the UUID4 user identifier # generated by Keycloak and converting it from hex to decimal - user_id = int(''.join(decoded_token['sub'].split('-')), 16) + user_id = int("".join(decoded_token["sub"].split("-")), 16) # create a Django user that will not be saved to database - user = OIDCUser(id=user_id, - username=decoded_token['preferred_username'], - password='', - first_name=decoded_token['given_name'], - last_name=decoded_token['family_name'], - email=decoded_token['email']) + user = OIDCUser( + id=user_id, + username=decoded_token["preferred_username"], + password="", + first_name=decoded_token["given_name"], + last_name=decoded_token["family_name"], + email=decoded_token["email"], + ) # set is_staff user property based on groups - if 'groups' in decoded_token: - user.is_staff = '/staff' in decoded_token['groups'] + if "groups" in decoded_token: + user.is_staff = "/staff" in decoded_token["groups"] # add user sub to custom User proxy model - user.sub = decoded_token['sub'] + user.sub = decoded_token["sub"] return user @@ -51,20 +53,20 @@ def _oidc_user_from_profile(oidc_profile: Dict[str, Any]) -> OIDCUser: # decode JWT token - decoded_token = _oidc_client.decode_token(oidc_profile['access_token']) + decoded_token = _oidc_client.decode_token(oidc_profile["access_token"]) # create OIDCUser from decoded token user = _oidc_user_from_decoded_token(decoded_token) # get authentication init datetime - auth_datetime = datetime.fromtimestamp(decoded_token['auth_time']) - exp_datetime = datetime.fromtimestamp(decoded_token['exp']) + auth_datetime = datetime.fromtimestamp(decoded_token["auth_time"]) + exp_datetime = datetime.fromtimestamp(decoded_token["exp"]) # compute OIDC tokens expiration date - oidc_profile['expires_at'] = exp_datetime - oidc_profile['refresh_expires_at'] = ( - auth_datetime + - timedelta(seconds=oidc_profile['refresh_expires_in'])) + oidc_profile["expires_at"] = exp_datetime + oidc_profile["refresh_expires_at"] = auth_datetime + timedelta( + seconds=oidc_profile["refresh_expires_in"] + ) # add OIDC profile data to custom User proxy model for key, val in oidc_profile.items(): @@ -75,15 +77,16 @@ class OIDCAuthorizationCodePKCEBackend: - - def authenticate(self, request: HttpRequest, code: str, code_verifier: str, - redirect_uri: str) -> Optional[OIDCUser]: + def authenticate( + self, request: HttpRequest, code: str, code_verifier: str, redirect_uri: str + ) -> Optional[OIDCUser]: user = None try: # try to authenticate user with OIDC PKCE authorization code flow oidc_profile = _oidc_client.authorization_code( - code, redirect_uri, code_verifier=code_verifier) + code, redirect_uri, code_verifier=code_verifier + ) # create Django user user = _oidc_user_from_profile(oidc_profile) @@ -93,8 +96,7 @@ ttl = int(user.expires_at.timestamp() - timezone.now().timestamp()) # save oidc_profile in cache - cache.set(f'oidc_user_{user.id}', oidc_profile, - timeout=max(0, ttl)) + cache.set(f"oidc_user_{user.id}", oidc_profile, timeout=max(0, ttl)) except Exception as e: sentry_sdk.capture_exception(e) @@ -102,13 +104,12 @@ def get_user(self, user_id: int) -> Optional[OIDCUser]: # get oidc profile from cache - oidc_profile = cache.get(f'oidc_user_{user_id}') + oidc_profile = cache.get(f"oidc_user_{user_id}") if oidc_profile: try: user = _oidc_user_from_profile(oidc_profile) # restore auth backend - setattr(user, 'backend', - f'{__name__}.{self.__class__.__name__}') + setattr(user, "backend", f"{__name__}.{self.__class__.__name__}") return user except Exception as e: sentry_sdk.capture_exception(e) @@ -119,20 +120,19 @@ class OIDCBearerTokenAuthentication(BaseAuthentication): def authenticate(self, request): - auth_header = request.META.get('HTTP_AUTHORIZATION') + auth_header = request.META.get("HTTP_AUTHORIZATION") if auth_header is None: return None try: - auth_type, token = auth_header.split(' ', 1) + auth_type, token = auth_header.split(" ", 1) except ValueError: - raise AuthenticationFailed( - 'Invalid HTTP authorization header format') + raise AuthenticationFailed("Invalid HTTP authorization header format") - if auth_type != 'Bearer': + if auth_type != "Bearer": raise AuthenticationFailed( - (f'Invalid or unsupported HTTP authorization' - f' type ({auth_type}).')) + (f"Invalid or unsupported HTTP authorization" f" type ({auth_type}).") + ) try: # attempt to decode token decoded_token = _oidc_client.decode_token(token) diff --git a/swh/web/auth/keycloak.py b/swh/web/auth/keycloak.py --- a/swh/web/auth/keycloak.py +++ b/swh/web/auth/keycloak.py @@ -15,8 +15,13 @@ for managing authentication and user permissions with OpenID Connect. """ - def __init__(self, server_url: str, realm_name: str, client_id: str, - realm_public_key: str = ''): + def __init__( + self, + server_url: str, + realm_name: str, + client_id: str, + realm_public_key: str = "", + ): """ Args: server_url: URL of the Keycloak server @@ -26,9 +31,7 @@ retrieved if not provided) """ self._keycloak = KeycloakOpenID( - server_url=server_url, - client_id=client_id, - realm_name=realm_name, + server_url=server_url, client_id=client_id, realm_name=realm_name, ) self.server_url = server_url @@ -45,8 +48,7 @@ """ return self._keycloak.well_know() - def authorization_url(self, redirect_uri: str, - **extra_params: str) -> str: + def authorization_url(self, redirect_uri: str, **extra_params: str) -> str: """ Get OpenID Connect authorization URL to authenticate users. @@ -57,11 +59,12 @@ """ auth_url = self._keycloak.auth_url(redirect_uri) if extra_params: - auth_url += '&%s' % urlencode(extra_params) + auth_url += "&%s" % urlencode(extra_params) return auth_url - def authorization_code(self, code: str, redirect_uri: str, - **extra_params: str) -> Dict[str, Any]: + def authorization_code( + self, code: str, redirect_uri: str, **extra_params: str + ) -> Dict[str, Any]: """ Get OpenID Connect authentication tokens using Authorization Code flow. @@ -74,10 +77,11 @@ payload. """ return self._keycloak.token( - grant_type='authorization_code', + grant_type="authorization_code", code=code, redirect_uri=redirect_uri, - **extra_params) + **extra_params, + ) def refresh_token(self, refresh_token: str) -> Dict[str, Any]: """ @@ -91,9 +95,9 @@ """ return self._keycloak.refresh_token(refresh_token) - def decode_token(self, token: str, - options: Optional[Dict[str, Any]] = None - ) -> Dict[str, Any]: + def decode_token( + self, token: str, options: Optional[Dict[str, Any]] = None + ) -> Dict[str, Any]: """ Try to decode a JWT token. @@ -106,12 +110,13 @@ """ if not self.realm_public_key: realm_public_key = self._keycloak.public_key() - self.realm_public_key = '-----BEGIN PUBLIC KEY-----\n' + self.realm_public_key = "-----BEGIN PUBLIC KEY-----\n" self.realm_public_key += realm_public_key - self.realm_public_key += '\n-----END PUBLIC KEY-----' + self.realm_public_key += "\n-----END PUBLIC KEY-----" - return self._keycloak.decode_token(token, key=self.realm_public_key, - options=options) + return self._keycloak.decode_token( + token, key=self.realm_public_key, options=options + ) def logout(self, refresh_token: str) -> None: """ @@ -140,8 +145,9 @@ _keycloak_oidc: Dict[Tuple[str, str], KeycloakOpenIDConnect] = {} -def get_keycloak_oidc_client(server_url: str, realm_name: str, - client_id: str) -> KeycloakOpenIDConnect: +def get_keycloak_oidc_client( + server_url: str, realm_name: str, client_id: str +) -> KeycloakOpenIDConnect: """ Instantiate a KeycloakOpenIDConnect class for a given client in a given realm. @@ -156,7 +162,7 @@ """ realm_client_key = (realm_name, client_id) if realm_client_key not in _keycloak_oidc: - _keycloak_oidc[realm_client_key] = KeycloakOpenIDConnect(server_url, - realm_name, - client_id) + _keycloak_oidc[realm_client_key] = KeycloakOpenIDConnect( + server_url, realm_name, client_id + ) return _keycloak_oidc[realm_client_key] diff --git a/swh/web/auth/middlewares.py b/swh/web/auth/middlewares.py --- a/swh/web/auth/middlewares.py +++ b/swh/web/auth/middlewares.py @@ -14,21 +14,22 @@ Middleware for silently refreshing on OpenID Connect session from the browser and get new access token. """ + def __init__(self, get_response=None): self.get_response = get_response self.exempted_urls = [ - reverse(v) for v in ('logout', - 'oidc-login', - 'oidc-login-complete', - 'oidc-logout') + reverse(v) + for v in ("logout", "oidc-login", "oidc-login-complete", "oidc-logout") ] def __call__(self, request): - if (request.method != 'GET' - or request.user.is_authenticated - or BACKEND_SESSION_KEY not in request.session - or 'OIDC' not in request.session[BACKEND_SESSION_KEY] - or request.path in self.exempted_urls): + if ( + request.method != "GET" + or request.user.is_authenticated + or BACKEND_SESSION_KEY not in request.session + or "OIDC" not in request.session[BACKEND_SESSION_KEY] + or request.path in self.exempted_urls + ): return self.get_response(request) # At that point, we know that a OIDC user was previously logged in. @@ -37,7 +38,7 @@ # redirected to logout page and a link will be offered to login again. # See implementation of "oidc-login-complete" view for more details. next_path = request.get_full_path() - redirect_url = reverse('oidc-login', - query_params={'next_path': next_path, - 'prompt': 'none'}) + redirect_url = reverse( + "oidc-login", query_params={"next_path": next_path, "prompt": "none"} + ) return HttpResponseRedirect(redirect_url) diff --git a/swh/web/auth/models.py b/swh/web/auth/models.py --- a/swh/web/auth/models.py +++ b/swh/web/auth/models.py @@ -19,7 +19,7 @@ """ # OIDC subject identifier - sub: str = '' + sub: str = "" # OIDC tokens and session related data, only relevant when a user # authenticates from a web browser @@ -32,7 +32,7 @@ session_state: Optional[str] = None class Meta: - app_label = 'swh.web.auth' + app_label = "swh.web.auth" proxy = True def save(self, **kwargs): diff --git a/swh/web/auth/utils.py b/swh/web/auth/utils.py --- a/swh/web/auth/utils.py +++ b/swh/web/auth/utils.py @@ -9,9 +9,7 @@ from base64 import urlsafe_b64encode from typing import Tuple -from swh.web.auth.keycloak import ( - KeycloakOpenIDConnect, get_keycloak_oidc_client -) +from swh.web.auth.keycloak import KeycloakOpenIDConnect, get_keycloak_oidc_client from swh.web.config import get_config @@ -34,18 +32,17 @@ # create the PKCE code challenge by hashing the code verifier with SHA256 # and encoding the result in URL-safe base64 (without padding) - code_challenge = hashlib.sha256(code_verifier_str.encode('ascii')).digest() - code_challenge_str = urlsafe_b64encode(code_challenge).decode('ascii') - code_challenge_str = code_challenge_str.replace('=', '') + code_challenge = hashlib.sha256(code_verifier_str.encode("ascii")).digest() + code_challenge_str = urlsafe_b64encode(code_challenge).decode("ascii") + code_challenge_str = code_challenge_str.replace("=", "") return code_verifier_str, code_challenge_str -OIDC_SWH_WEB_CLIENT_ID = 'swh-web' +OIDC_SWH_WEB_CLIENT_ID = "swh-web" -def get_oidc_client(client_id: str = OIDC_SWH_WEB_CLIENT_ID - ) -> KeycloakOpenIDConnect: +def get_oidc_client(client_id: str = OIDC_SWH_WEB_CLIENT_ID) -> KeycloakOpenIDConnect: """ Instantiate a KeycloakOpenIDConnect class for a given client in the SoftwareHeritage realm. @@ -57,6 +54,8 @@ An object to ease the interaction with the Keycloak server """ swhweb_config = get_config() - return get_keycloak_oidc_client(swhweb_config['keycloak']['server_url'], - swhweb_config['keycloak']['realm_name'], - client_id) + return get_keycloak_oidc_client( + swhweb_config["keycloak"]["server_url"], + swhweb_config["keycloak"]["realm_name"], + client_id, + ) diff --git a/swh/web/auth/views.py b/swh/web/auth/views.py --- a/swh/web/auth/views.py +++ b/swh/web/auth/views.py @@ -12,7 +12,9 @@ from django.contrib.auth import authenticate, login, logout from django.http import HttpRequest from django.http.response import ( - HttpResponse, HttpResponseRedirect, HttpResponseServerError + HttpResponse, + HttpResponseRedirect, + HttpResponseServerError, ) from swh.web.auth.models import OIDCUser @@ -27,30 +29,31 @@ """ # generate a CSRF token state = str(uuid.uuid4()) - redirect_uri = reverse('oidc-login-complete', request=request) + redirect_uri = reverse("oidc-login-complete", request=request) code_verifier, code_challenge = gen_oidc_pkce_codes() - request.session['login_data'] = { - 'code_verifier': code_verifier, - 'state': state, - 'redirect_uri': redirect_uri, - 'next_path': request.GET.get('next_path', ''), - 'prompt': request.GET.get('prompt', ''), + request.session["login_data"] = { + "code_verifier": code_verifier, + "state": state, + "redirect_uri": redirect_uri, + "next_path": request.GET.get("next_path", ""), + "prompt": request.GET.get("prompt", ""), } authorization_url_params = { - 'state': state, - 'code_challenge': code_challenge, - 'code_challenge_method': 'S256', - 'scope': 'openid', - 'prompt': request.GET.get('prompt', ''), + "state": state, + "code_challenge": code_challenge, + "code_challenge_method": "S256", + "scope": "openid", + "prompt": request.GET.get("prompt", ""), } try: oidc_client = get_oidc_client() authorization_url = oidc_client.authorization_url( - redirect_uri, **authorization_url_params) + redirect_uri, **authorization_url_params + ) return HttpResponseRedirect(authorization_url) except Exception as e: @@ -62,39 +65,41 @@ Django view to finalize login process using OpenID Connect. """ try: - if 'login_data' not in request.session: - raise Exception('Login process has not been initialized.') + if "login_data" not in request.session: + raise Exception("Login process has not been initialized.") - login_data = request.session['login_data'] - next_path = login_data['next_path'] or request.build_absolute_uri('/') + login_data = request.session["login_data"] + next_path = login_data["next_path"] or request.build_absolute_uri("/") - if 'error' in request.GET: - if login_data['prompt'] == 'none': + if "error" in request.GET: + if login_data["prompt"] == "none": # Silent login failed because OIDC session expired. # Redirect to logout page and inform user. logout(request) - logout_url = reverse('logout', - query_params={'next_path': next_path, - 'remote_user': 1}) + logout_url = reverse( + "logout", query_params={"next_path": next_path, "remote_user": 1} + ) return HttpResponseRedirect(logout_url) - return HttpResponseServerError(request.GET['error']) + return HttpResponseServerError(request.GET["error"]) - if 'code' not in request.GET or 'state' not in request.GET: - raise BadInputExc('Missing query parameters for authentication.') + if "code" not in request.GET or "state" not in request.GET: + raise BadInputExc("Missing query parameters for authentication.") # get CSRF token returned by OIDC server - state = request.GET['state'] + state = request.GET["state"] - if state != login_data['state']: - raise BadInputExc('Wrong CSRF token, aborting login process.') + if state != login_data["state"]: + raise BadInputExc("Wrong CSRF token, aborting login process.") - user = authenticate(request=request, - code=request.GET['code'], - code_verifier=login_data['code_verifier'], - redirect_uri=login_data['redirect_uri']) + user = authenticate( + request=request, + code=request.GET["code"], + code_verifier=login_data["code_verifier"], + redirect_uri=login_data["redirect_uri"], + ) if user is None: - raise Exception('User authentication failed.') + raise Exception("User authentication failed.") login(request, user) @@ -110,24 +115,23 @@ try: user = request.user logout(request) - if hasattr(user, 'refresh_token'): + if hasattr(user, "refresh_token"): oidc_client = get_oidc_client() user = cast(OIDCUser, user) refresh_token = cast(str, user.refresh_token) # end OpenID Connect session oidc_client.logout(refresh_token) # remove user data from cache - cache.delete(f'oidc_user_{user.id}') + cache.delete(f"oidc_user_{user.id}") - logout_url = reverse('logout', query_params={'remote_user': 1}) + logout_url = reverse("logout", query_params={"remote_user": 1}) return HttpResponseRedirect(request.build_absolute_uri(logout_url)) except Exception as e: return handle_view_exception(request, e) urlpatterns = [ - url(r'^oidc/login/$', oidc_login, name='oidc-login'), - url(r'^oidc/login-complete/$', oidc_login_complete, - name='oidc-login-complete'), - url(r'^oidc/logout/$', oidc_logout, name='oidc-logout'), + url(r"^oidc/login/$", oidc_login, name="oidc-login"), + url(r"^oidc/login-complete/$", oidc_login_complete, name="oidc-login-complete"), + url(r"^oidc/logout/$", oidc_logout, name="oidc-logout"), ] diff --git a/swh/web/browse/browseurls.py b/swh/web/browse/browseurls.py --- a/swh/web/browse/browseurls.py +++ b/swh/web/browse/browseurls.py @@ -11,7 +11,7 @@ Class to manage swh-web browse application urls. """ - scope = 'browse' + scope = "browse" def browse_route(*url_patterns, view_name=None, checksum_args=None): @@ -24,7 +24,7 @@ view_name: the name of the Django view associated to the routes used to reverse the url """ - url_patterns = ['^' + url_pattern + '$' for url_pattern in url_patterns] + url_patterns = ["^" + url_pattern + "$" for url_pattern in url_patterns] view_name = view_name def decorator(f): @@ -33,9 +33,9 @@ BrowseUrls.add_url_pattern(url_pattern, f, view_name) if checksum_args: - BrowseUrls.add_redirect_for_checksum_args(view_name, - url_patterns, - checksum_args) + BrowseUrls.add_redirect_for_checksum_args( + view_name, url_patterns, checksum_args + ) return f diff --git a/swh/web/browse/identifiers.py b/swh/web/browse/identifiers.py --- a/swh/web/browse/identifiers.py +++ b/swh/web/browse/identifiers.py @@ -17,9 +17,8 @@ The url that points to it is :http:get:`/(swh_id)/`. """ try: - swh_id_resolved = resolve_swh_persistent_id( - swh_id, query_params=request.GET) + swh_id_resolved = resolve_swh_persistent_id(swh_id, query_params=request.GET) except Exception as exc: return handle_view_exception(request, exc) - return redirect(swh_id_resolved['browse_url']) + return redirect(swh_id_resolved["browse_url"]) diff --git a/swh/web/browse/urls.py b/swh/web/browse/urls.py --- a/swh/web/browse/urls.py +++ b/swh/web/browse/urls.py @@ -6,12 +6,12 @@ from django.conf.urls import url from django.shortcuts import render, redirect -import swh.web.browse.views.directory # noqa -import swh.web.browse.views.content # noqa -import swh.web.browse.views.origin # noqa -import swh.web.browse.views.release # noqa -import swh.web.browse.views.revision # noqa -import swh.web.browse.views.snapshot # noqa +import swh.web.browse.views.directory # noqa +import swh.web.browse.views.content # noqa +import swh.web.browse.views.origin # noqa +import swh.web.browse.views.release # noqa +import swh.web.browse.views.revision # noqa +import swh.web.browse.views.snapshot # noqa from swh.web.browse.browseurls import BrowseUrls from swh.web.browse.identifiers import swh_id_browse @@ -19,33 +19,37 @@ def _browse_help_view(request): - return render(request, 'browse/help.html', - {'heading': 'How to browse the archive ?'}) + return render( + request, "browse/help.html", {"heading": "How to browse the archive ?"} + ) def _browse_search_view(request): - return render(request, 'browse/search.html', - {'heading': 'Search software origins to browse'}) + return render( + request, "browse/search.html", {"heading": "Search software origins to browse"} + ) def _browse_vault_view(request): - return render(request, 'browse/vault-ui.html', - {'heading': 'Download archive content from the Vault'}) + return render( + request, + "browse/vault-ui.html", + {"heading": "Download archive content from the Vault"}, + ) def _browse_origin_save_view(request): - return redirect(reverse('origin-save')) + return redirect(reverse("origin-save")) urlpatterns = [ - url(r'^$', _browse_search_view), - url(r'^help/$', _browse_help_view, name='browse-help'), - url(r'^search/$', _browse_search_view, name='browse-search'), - url(r'^vault/$', _browse_vault_view, name='browse-vault'), + url(r"^$", _browse_search_view), + url(r"^help/$", _browse_help_view, name="browse-help"), + url(r"^search/$", _browse_search_view, name="browse-search"), + url(r"^vault/$", _browse_vault_view, name="browse-vault"), # for backward compatibility - url(r'^origin/save/$', _browse_origin_save_view, - name='browse-origin-save'), - url(r'^(?Pswh:[0-9]+:[a-z]+:[0-9a-f]+.*)/$', swh_id_browse), + url(r"^origin/save/$", _browse_origin_save_view, name="browse-origin-save"), + url(r"^(?Pswh:[0-9]+:[a-z]+:[0-9a-f]+.*)/$", swh_id_browse), ] urlpatterns += BrowseUrls.get_url_patterns() diff --git a/swh/web/browse/utils.py b/swh/web/browse/utils.py --- a/swh/web/browse/utils.py +++ b/swh/web/browse/utils.py @@ -22,7 +22,10 @@ from swh.web.common.identifiers import get_swh_persistent_id from swh.web.common.origin_visits import get_origin_visit from swh.web.common.utils import ( - reverse, format_utc_iso_date, swh_object_icons, rst_to_html + reverse, + format_utc_iso_date, + swh_object_icons, + rst_to_html, ) from swh.web.config import get_config @@ -44,7 +47,7 @@ Raises: NotFoundExc if the directory is not found """ - cache_entry_id = 'directory_entries_%s' % sha1_git + cache_entry_id = "directory_entries_%s" % sha1_git cache_entry = cache.get(cache_entry_id) if cache_entry: @@ -52,17 +55,17 @@ entries = list(service.lookup_directory(sha1_git)) for e in entries: - e['perms'] = stat.filemode(e['perms']) - if e['type'] == 'rev': + e["perms"] = stat.filemode(e["perms"]) + if e["type"] == "rev": # modify dir entry name to explicitly show it points # to a revision - e['name'] = '%s @ %s' % (e['name'], e['target'][:7]) + e["name"] = "%s @ %s" % (e["name"], e["target"][:7]) - dirs = [e for e in entries if e['type'] in ('dir', 'rev')] - files = [e for e in entries if e['type'] == 'file'] + dirs = [e for e in entries if e["type"] in ("dir", "rev")] + files = [e for e in entries if e["type"] == "file"] - dirs = sorted(dirs, key=lambda d: d['name']) - files = sorted(files, key=lambda f: f['name']) + dirs = sorted(dirs, key=lambda d: d["name"]) + files = sorted(files, key=lambda f: f["name"]) cache.set(cache_entry_id, (dirs, files)) @@ -86,11 +89,11 @@ """ # https://pypi.org/project/python-magic/ # packaged as python3-magic in debian buster - if hasattr(magic, 'from_buffer'): + if hasattr(magic, "from_buffer"): m = magic.Magic(mime=True, mime_encoding=True) mime_encoding = m.from_buffer(content) - mime_type, encoding = mime_encoding.split(';') - encoding = encoding.replace(' charset=', '') + mime_type, encoding = mime_encoding.split(";") + encoding = encoding.replace(" charset=", "") # https://pypi.org/project/file-magic/ # packaged as python3-magic in debian stretch else: @@ -110,42 +113,44 @@ # maximum authorized content size in bytes for HTML display # with code highlighting -content_display_max_size = get_config()['content_display_max_size'] +content_display_max_size = get_config()["content_display_max_size"] -snapshot_content_max_size = get_config()['snapshot_content_max_size'] +snapshot_content_max_size = get_config()["snapshot_content_max_size"] def _re_encode_content(mimetype, encoding, content_data): # encode textual content to utf-8 if needed - if mimetype.startswith('text/'): + if mimetype.startswith("text/"): # probably a malformed UTF-8 content, re-encode it # by replacing invalid chars with a substitution one - if encoding == 'unknown-8bit': - content_data = content_data.decode('utf-8', 'replace')\ - .encode('utf-8') - elif encoding not in ['utf-8', 'binary']: - content_data = content_data.decode(encoding, 'replace')\ - .encode('utf-8') - elif mimetype.startswith('application/octet-stream'): + if encoding == "unknown-8bit": + content_data = content_data.decode("utf-8", "replace").encode("utf-8") + elif encoding not in ["utf-8", "binary"]: + content_data = content_data.decode(encoding, "replace").encode("utf-8") + elif mimetype.startswith("application/octet-stream"): # file may detect a text content as binary # so try to decode it for display - encodings = ['us-ascii', 'utf-8'] - encodings += ['iso-8859-%s' % i for i in range(1, 17)] + encodings = ["us-ascii", "utf-8"] + encodings += ["iso-8859-%s" % i for i in range(1, 17)] for enc in encodings: try: - content_data = content_data.decode(enc).encode('utf-8') + content_data = content_data.decode(enc).encode("utf-8") except Exception as exc: sentry_sdk.capture_exception(exc) else: # ensure display in content view encoding = enc - mimetype = 'text/plain' + mimetype = "text/plain" break return mimetype, encoding, content_data -def request_content(query_string, max_size=content_display_max_size, - raise_if_unavailable=True, re_encode=True): +def request_content( + query_string, + max_size=content_display_max_size, + raise_if_unavailable=True, + re_encode=True, +): """Function that retrieves a content from the archive. Raw bytes content is first retrieved, then the content mime type. @@ -179,22 +184,22 @@ license = service.lookup_content_license(query_string) except Exception as exc: sentry_sdk.capture_exception(exc) - mimetype = 'unknown' - encoding = 'unknown' + mimetype = "unknown" + encoding = "unknown" if filetype: - mimetype = filetype['mimetype'] - encoding = filetype['encoding'] + mimetype = filetype["mimetype"] + encoding = filetype["encoding"] # workaround when encountering corrupted data due to implicit # conversion from bytea to text in the indexer db (see T818) # TODO: Remove that code when all data have been correctly converted - if mimetype.startswith('\\'): + if mimetype.startswith("\\"): filetype = None - content_data['error_code'] = 200 - content_data['error_message'] = '' - content_data['error_description'] = '' + content_data["error_code"] = 200 + content_data["error_message"] = "" + content_data["error_description"] = "" - if not max_size or content_data['length'] < max_size: + if not max_size or content_data["length"] < max_size: try: content_raw = service.lookup_content_raw(query_string) except Exception as exc: @@ -202,46 +207,58 @@ raise exc else: sentry_sdk.capture_exception(exc) - content_data['raw_data'] = None - content_data['error_code'] = 404 - content_data['error_description'] = \ - 'The bytes of the content are currently not available in the archive.' # noqa - content_data['error_message'] = \ - http_status_code_message[content_data['error_code']] + content_data["raw_data"] = None + content_data["error_code"] = 404 + content_data["error_description"] = ( + "The bytes of the content are currently not available " + "in the archive." + ) + content_data["error_message"] = http_status_code_message[ + content_data["error_code"] + ] else: - content_data['raw_data'] = content_raw['data'] + content_data["raw_data"] = content_raw["data"] if not filetype: - mimetype, encoding = \ - get_mimetype_and_encoding_for_content(content_data['raw_data']) # noqa + mimetype, encoding = get_mimetype_and_encoding_for_content( + content_data["raw_data"] + ) if re_encode: mimetype, encoding, raw_data = _re_encode_content( - mimetype, encoding, content_data['raw_data']) - content_data['raw_data'] = raw_data + mimetype, encoding, content_data["raw_data"] + ) + content_data["raw_data"] = raw_data else: - content_data['raw_data'] = None + content_data["raw_data"] = None - content_data['mimetype'] = mimetype - content_data['encoding'] = encoding + content_data["mimetype"] = mimetype + content_data["encoding"] = encoding if language: - content_data['language'] = language['lang'] + content_data["language"] = language["lang"] else: - content_data['language'] = 'not detected' + content_data["language"] = "not detected" if license: - content_data['licenses'] = ', '.join(license['facts'][0]['licenses']) + content_data["licenses"] = ", ".join(license["facts"][0]["licenses"]) else: - content_data['licenses'] = 'not detected' + content_data["licenses"] = "not detected" return content_data -_browsers_supported_image_mimes = set(['image/gif', 'image/png', - 'image/jpeg', 'image/bmp', - 'image/webp', 'image/svg', - 'image/svg+xml']) +_browsers_supported_image_mimes = set( + [ + "image/gif", + "image/png", + "image/jpeg", + "image/bmp", + "image/webp", + "image/svg", + "image/svg+xml", + ] +) def prepare_content_for_display(content_data, mime_type, path): @@ -273,25 +290,23 @@ language = highlightjs.get_hljs_language_from_mime_type(mime_type) if not language: - language = 'nohighlight' - elif mime_type.startswith('application/'): - mime_type = mime_type.replace('application/', 'text/') + language = "nohighlight" + elif mime_type.startswith("application/"): + mime_type = mime_type.replace("application/", "text/") - if mime_type.startswith('image/'): + if mime_type.startswith("image/"): if mime_type in _browsers_supported_image_mimes: - content_data = base64.b64encode(content_data).decode('ascii') + content_data = base64.b64encode(content_data).decode("ascii") else: content_data = None - if mime_type.startswith('image/svg'): - mime_type = 'image/svg+xml' + if mime_type.startswith("image/svg"): + mime_type = "image/svg+xml" - if mime_type.startswith('text/'): - content_data = content_data.decode('utf-8', errors='replace') + if mime_type.startswith("text/"): + content_data = content_data.decode("utf-8", errors="replace") - return {'content_data': content_data, - 'language': language, - 'mimetype': mime_type} + return {"content_data": content_data, "language": language, "mimetype": mime_type} def process_snapshot_branches(snapshot): @@ -310,7 +325,7 @@ targeting revisions and second member the sorted list of branches targeting releases """ - snapshot_branches = snapshot['branches'] + snapshot_branches = snapshot["branches"] branches = {} branch_aliases = {} releases = {} @@ -321,50 +336,48 @@ if not target: # FIXME: display branches with an unknown target anyway continue - target_id = target['target'] - target_type = target['target_type'] - if target_type == 'revision': + target_id = target["target"] + target_type = target["target_type"] + if target_type == "revision": branches[branch_name] = { - 'name': branch_name, - 'revision': target_id, + "name": branch_name, + "revision": target_id, } revision_to_branch[target_id].add(branch_name) - elif target_type == 'release': + elif target_type == "release": release_to_branch[target_id].add(branch_name) - elif target_type == 'alias': + elif target_type == "alias": branch_aliases[branch_name] = target_id # FIXME: handle pointers to other object types def _enrich_release_branch(branch, release): releases[branch] = { - 'name': release['name'], - 'branch_name': branch, - 'date': format_utc_iso_date(release['date']), - 'id': release['id'], - 'message': release['message'], - 'target_type': release['target_type'], - 'target': release['target'], + "name": release["name"], + "branch_name": branch, + "date": format_utc_iso_date(release["date"]), + "id": release["id"], + "message": release["message"], + "target_type": release["target_type"], + "target": release["target"], } def _enrich_revision_branch(branch, revision): - branches[branch].update({ - 'revision': revision['id'], - 'directory': revision['directory'], - 'date': format_utc_iso_date(revision['date']), - 'message': revision['message'] - }) - - releases_info = service.lookup_release_multiple( - release_to_branch.keys() - ) + branches[branch].update( + { + "revision": revision["id"], + "directory": revision["directory"], + "date": format_utc_iso_date(revision["date"]), + "message": revision["message"], + } + ) + + releases_info = service.lookup_release_multiple(release_to_branch.keys()) for release in releases_info: - branches_to_update = release_to_branch[release['id']] + branches_to_update = release_to_branch[release["id"]] for branch in branches_to_update: _enrich_release_branch(branch, release) - if release['target_type'] == 'revision': - revision_to_release[release['target']].update( - branches_to_update - ) + if release["target_type"] == "revision": + revision_to_release[release["target"]].update(branches_to_update) revisions = service.lookup_revision_multiple( set(revision_to_branch.keys()) | set(revision_to_release.keys()) @@ -373,38 +386,38 @@ for revision in revisions: if not revision: continue - for branch in revision_to_branch[revision['id']]: + for branch in revision_to_branch[revision["id"]]: _enrich_revision_branch(branch, revision) - for release in revision_to_release[revision['id']]: - releases[release]['directory'] = revision['directory'] + for release in revision_to_release[revision["id"]]: + releases[release]["directory"] = revision["directory"] for branch_alias, branch_target in branch_aliases.items(): if branch_target in branches: branches[branch_alias] = dict(branches[branch_target]) else: - snp = service.lookup_snapshot(snapshot['id'], - branches_from=branch_target, - branches_count=1) - if snp and branch_target in snp['branches']: + snp = service.lookup_snapshot( + snapshot["id"], branches_from=branch_target, branches_count=1 + ) + if snp and branch_target in snp["branches"]: - if snp['branches'][branch_target] is None: + if snp["branches"][branch_target] is None: continue - target_type = snp['branches'][branch_target]['target_type'] - target = snp['branches'][branch_target]['target'] - if target_type == 'revision': - branches[branch_alias] = snp['branches'][branch_target] + target_type = snp["branches"][branch_target]["target_type"] + target = snp["branches"][branch_target]["target"] + if target_type == "revision": + branches[branch_alias] = snp["branches"][branch_target] revision = service.lookup_revision(target) _enrich_revision_branch(branch_alias, revision) - elif target_type == 'release': + elif target_type == "release": release = service.lookup_release(target) _enrich_release_branch(branch_alias, release) if branch_alias in branches: - branches[branch_alias]['name'] = branch_alias + branches[branch_alias]["name"] = branch_alias - ret_branches = list(sorted(branches.values(), key=lambda b: b['name'])) - ret_releases = list(sorted(releases.values(), key=lambda b: b['name'])) + ret_branches = list(sorted(branches.values(), key=lambda b: b["name"])) + ret_releases = list(sorted(releases.values(), key=lambda b: b["name"])) return ret_branches, ret_releases @@ -430,30 +443,29 @@ Raises: NotFoundExc if the snapshot does not exist """ - cache_entry_id = 'swh_snapshot_%s' % snapshot_id + cache_entry_id = "swh_snapshot_%s" % snapshot_id cache_entry = cache.get(cache_entry_id) if cache_entry: - return cache_entry['branches'], cache_entry['releases'] + return cache_entry["branches"], cache_entry["releases"] branches = [] releases = [] if snapshot_id: snapshot = service.lookup_snapshot( - snapshot_id, branches_count=snapshot_content_max_size) + snapshot_id, branches_count=snapshot_content_max_size + ) branches, releases = process_snapshot_branches(snapshot) - cache.set(cache_entry_id, { - 'branches': branches, - 'releases': releases, - }) + cache.set(cache_entry_id, {"branches": branches, "releases": releases,}) return branches, releases -def get_origin_visit_snapshot(origin_info, visit_ts=None, visit_id=None, - snapshot_id=None): +def get_origin_visit_snapshot( + origin_info, visit_ts=None, visit_id=None, snapshot_id=None +): """Returns the lists of branches and releases associated to a swh origin for a given visit. The visit is expressed by a timestamp. In the latter case, @@ -485,7 +497,7 @@ visit_info = get_origin_visit(origin_info, visit_ts, visit_id, snapshot_id) - return get_snapshot_content(visit_info['snapshot']) + return get_snapshot_content(visit_info["snapshot"]) def gen_link(url, link_text=None, link_attrs=None): @@ -504,30 +516,27 @@ An HTML link in the form 'link_text' """ - attrs = ' ' + attrs = " " if link_attrs: for k, v in link_attrs.items(): attrs += '%s="%s" ' % (k, v) if not link_text: link_text = url - link = '%s' \ - % (attrs, escape(url), escape(link_text)) + link = '%s' % (attrs, escape(url), escape(link_text)) return mark_safe(link) def _snapshot_context_query_params(snapshot_context): query_params = None - if snapshot_context and snapshot_context['origin_info']: - origin_info = snapshot_context['origin_info'] - query_params = {'origin': origin_info['url']} - if 'timestamp' in snapshot_context['url_args']: - query_params['timestamp'] = \ - snapshot_context['url_args']['timestamp'] - if 'visit_id' in snapshot_context['query_params']: - query_params['visit_id'] = \ - snapshot_context['query_params']['visit_id'] + if snapshot_context and snapshot_context["origin_info"]: + origin_info = snapshot_context["origin_info"] + query_params = {"origin": origin_info["url"]} + if "timestamp" in snapshot_context["url_args"]: + query_params["timestamp"] = snapshot_context["url_args"]["timestamp"] + if "visit_id" in snapshot_context["query_params"]: + query_params["visit_id"] = snapshot_context["query_params"]["visit_id"] elif snapshot_context: - query_params = {'snapshot_id': snapshot_context['snapshot_id']} + query_params = {"snapshot_id": snapshot_context["snapshot_id"]} return query_params @@ -546,15 +555,18 @@ """ query_params = _snapshot_context_query_params(snapshot_context) - return reverse('browse-revision', - url_args={'sha1_git': revision_id}, - query_params=query_params) + return reverse( + "browse-revision", url_args={"sha1_git": revision_id}, query_params=query_params + ) -def gen_revision_link(revision_id, shorten_id=False, snapshot_context=None, - link_text='Browse', - link_attrs={'class': 'btn btn-default btn-sm', - 'role': 'button'}): +def gen_revision_link( + revision_id, + shorten_id=False, + snapshot_context=None, + link_text="Browse", + link_attrs={"class": "btn btn-default btn-sm", "role": "button"}, +): """ Utility function for generating a link to a revision HTML view to insert in Django templates. @@ -587,9 +599,12 @@ return gen_link(revision_url, link_text, link_attrs) -def gen_directory_link(sha1_git, snapshot_context=None, link_text='Browse', - link_attrs={'class': 'btn btn-default btn-sm', - 'role': 'button'}): +def gen_directory_link( + sha1_git, + snapshot_context=None, + link_text="Browse", + link_attrs={"class": "btn btn-default btn-sm", "role": "button"}, +): """ Utility function for generating a link to a directory HTML view to insert in Django templates. @@ -610,18 +625,21 @@ query_params = _snapshot_context_query_params(snapshot_context) - directory_url = reverse('browse-directory', - url_args={'sha1_git': sha1_git}, - query_params=query_params) + directory_url = reverse( + "browse-directory", url_args={"sha1_git": sha1_git}, query_params=query_params + ) if not link_text: link_text = sha1_git return gen_link(directory_url, link_text, link_attrs) -def gen_snapshot_link(snapshot_id, snapshot_context=None, link_text='Browse', - link_attrs={'class': 'btn btn-default btn-sm', - 'role': 'button'}): +def gen_snapshot_link( + snapshot_id, + snapshot_context=None, + link_text="Browse", + link_attrs={"class": "btn btn-default btn-sm", "role": "button"}, +): """ Utility function for generating a link to a snapshot HTML view to insert in Django templates. @@ -640,17 +658,22 @@ query_params = _snapshot_context_query_params(snapshot_context) - snapshot_url = reverse('browse-snapshot', - url_args={'snapshot_id': snapshot_id}, - query_params=query_params) + snapshot_url = reverse( + "browse-snapshot", + url_args={"snapshot_id": snapshot_id}, + query_params=query_params, + ) if not link_text: link_text = snapshot_id return gen_link(snapshot_url, link_text, link_attrs) -def gen_content_link(sha1_git, snapshot_context=None, link_text='Browse', - link_attrs={'class': 'btn btn-default btn-sm', - 'role': 'button'}): +def gen_content_link( + sha1_git, + snapshot_context=None, + link_text="Browse", + link_attrs={"class": "btn btn-default btn-sm", "role": "button"}, +): """ Utility function for generating a link to a content HTML view to insert in Django templates. @@ -671,9 +694,11 @@ query_params = _snapshot_context_query_params(snapshot_context) - content_url = reverse('browse-content', - url_args={'query_string': 'sha1_git:' + sha1_git}, - query_params=query_params) + content_url = reverse( + "browse-content", + url_args={"query_string": "sha1_git:" + sha1_git}, + query_params=query_params, + ) if not link_text: link_text = sha1_git return gen_link(content_url, link_text, link_attrs) @@ -691,34 +716,35 @@ Returns: The revision log view URL """ - query_params = {'revision': revision_id} - if snapshot_context and snapshot_context['origin_info']: - origin_info = snapshot_context['origin_info'] - url_args = {'origin_url': origin_info['url']} - if 'timestamp' in snapshot_context['url_args']: - url_args['timestamp'] = \ - snapshot_context['url_args']['timestamp'] - if 'visit_id' in snapshot_context['query_params']: - query_params['visit_id'] = \ - snapshot_context['query_params']['visit_id'] - revision_log_url = reverse('browse-origin-log', - url_args=url_args, - query_params=query_params) + query_params = {"revision": revision_id} + if snapshot_context and snapshot_context["origin_info"]: + origin_info = snapshot_context["origin_info"] + url_args = {"origin_url": origin_info["url"]} + if "timestamp" in snapshot_context["url_args"]: + url_args["timestamp"] = snapshot_context["url_args"]["timestamp"] + if "visit_id" in snapshot_context["query_params"]: + query_params["visit_id"] = snapshot_context["query_params"]["visit_id"] + revision_log_url = reverse( + "browse-origin-log", url_args=url_args, query_params=query_params + ) elif snapshot_context: - url_args = {'snapshot_id': snapshot_context['snapshot_id']} - revision_log_url = reverse('browse-snapshot-log', - url_args=url_args, - query_params=query_params) + url_args = {"snapshot_id": snapshot_context["snapshot_id"]} + revision_log_url = reverse( + "browse-snapshot-log", url_args=url_args, query_params=query_params + ) else: - revision_log_url = reverse('browse-revision-log', - url_args={'sha1_git': revision_id}) + revision_log_url = reverse( + "browse-revision-log", url_args={"sha1_git": revision_id} + ) return revision_log_url -def gen_revision_log_link(revision_id, snapshot_context=None, - link_text='Browse', - link_attrs={'class': 'btn btn-default btn-sm', - 'role': 'button'}): +def gen_revision_log_link( + revision_id, + snapshot_context=None, + link_text="Browse", + link_attrs={"class": "btn btn-default btn-sm", "role": "button"}, +): """ Utility function for generating a link to a revision log HTML view (possibly in the context of an origin) to insert in Django templates. @@ -761,22 +787,24 @@ str: A mail link to the person or the person name if no email is present in person data """ - person_name = person['name'] or person['fullname'] or 'None' + person_name = person["name"] or person["fullname"] or "None" if link_text is None: link_text = person_name - person_email = person['email'] if person['email'] else None - if person_email is None and '@' in person_name and ' ' not in person_name: + person_email = person["email"] if person["email"] else None + if person_email is None and "@" in person_name and " " not in person_name: person_email = person_name if person_email: - return gen_link(url='mailto:%s' % person_email, - link_text=link_text) + return gen_link(url="mailto:%s" % person_email, link_text=link_text) else: return person_name -def gen_release_link(sha1_git, snapshot_context=None, link_text='Browse', - link_attrs={'class': 'btn btn-default btn-sm', - 'role': 'button'}): +def gen_release_link( + sha1_git, + snapshot_context=None, + link_text="Browse", + link_attrs={"class": "btn btn-default btn-sm", "role": "button"}, +): """ Utility function for generating a link to a release HTML view to insert in Django templates. @@ -795,9 +823,9 @@ query_params = _snapshot_context_query_params(snapshot_context) - release_url = reverse('browse-release', - url_args={'sha1_git': sha1_git}, - query_params=query_params) + release_url = reverse( + "browse-release", url_args={"sha1_git": sha1_git}, query_params=query_params + ) if not link_text: link_text = sha1_git return gen_link(release_url, link_text, link_attrs) @@ -824,39 +852,42 @@ for i, rev in enumerate(revision_log): if i == per_page: break - author_name = 'None' - author_fullname = 'None' - committer_fullname = 'None' - if rev['author']: - author_name = gen_person_mail_link(rev['author']) - author_fullname = rev['author']['fullname'] - if rev['committer']: - committer_fullname = rev['committer']['fullname'] - author_date = format_utc_iso_date(rev['date']) - committer_date = format_utc_iso_date(rev['committer_date']) - - tooltip = 'revision %s\n' % rev['id'] - tooltip += 'author: %s\n' % author_fullname - tooltip += 'author date: %s\n' % author_date - tooltip += 'committer: %s\n' % committer_fullname - tooltip += 'committer date: %s\n\n' % committer_date - if rev['message']: - tooltip += textwrap.indent(rev['message'], ' '*4) - - revision_log_data.append({ - 'author': author_name, - 'id': rev['id'][:7], - 'message': rev['message'], - 'date': author_date, - 'commit_date': committer_date, - 'url': gen_revision_url(rev['id'], snapshot_context), - 'tooltip': tooltip - }) + author_name = "None" + author_fullname = "None" + committer_fullname = "None" + if rev["author"]: + author_name = gen_person_mail_link(rev["author"]) + author_fullname = rev["author"]["fullname"] + if rev["committer"]: + committer_fullname = rev["committer"]["fullname"] + author_date = format_utc_iso_date(rev["date"]) + committer_date = format_utc_iso_date(rev["committer_date"]) + + tooltip = "revision %s\n" % rev["id"] + tooltip += "author: %s\n" % author_fullname + tooltip += "author date: %s\n" % author_date + tooltip += "committer: %s\n" % committer_fullname + tooltip += "committer date: %s\n\n" % committer_date + if rev["message"]: + tooltip += textwrap.indent(rev["message"], " " * 4) + + revision_log_data.append( + { + "author": author_name, + "id": rev["id"][:7], + "message": rev["message"], + "date": author_date, + "commit_date": committer_date, + "url": gen_revision_url(rev["id"], snapshot_context), + "tooltip": tooltip, + } + ) return revision_log_data -def get_snapshot_context(snapshot_id=None, origin_url=None, - timestamp=None, visit_id=None): +def get_snapshot_context( + snapshot_id=None, origin_url=None, timestamp=None, visit_id=None +): """ Utility function to compute relevant information when navigating the archive in a snapshot context. The snapshot is either @@ -902,63 +933,59 @@ visit_url = None branches_url = None releases_url = None - swh_type = 'snapshot' + swh_type = "snapshot" if origin_url: - swh_type = 'origin' - origin_info = service.lookup_origin({'url': origin_url}) + swh_type = "origin" + origin_info = service.lookup_origin({"url": origin_url}) - visit_info = get_origin_visit(origin_info, timestamp, visit_id, - snapshot_id) - fmt_date = format_utc_iso_date(visit_info['date']) - visit_info['fmt_date'] = fmt_date - snapshot_id = visit_info['snapshot'] + visit_info = get_origin_visit(origin_info, timestamp, visit_id, snapshot_id) + fmt_date = format_utc_iso_date(visit_info["date"]) + visit_info["fmt_date"] = fmt_date + snapshot_id = visit_info["snapshot"] if not snapshot_id: - raise NotFoundExc('No snapshot associated to the visit of origin ' - '%s on %s' % (escape(origin_url), fmt_date)) + raise NotFoundExc( + "No snapshot associated to the visit of origin " + "%s on %s" % (escape(origin_url), fmt_date) + ) # provided timestamp is not necessarily equals to the one # of the retrieved visit, so get the exact one in order # use it in the urls generated below if timestamp: - timestamp = visit_info['date'] + timestamp = visit_info["date"] - branches, releases = \ - get_origin_visit_snapshot(origin_info, timestamp, visit_id, - snapshot_id) + branches, releases = get_origin_visit_snapshot( + origin_info, timestamp, visit_id, snapshot_id + ) - url_args = {'origin_url': origin_info['url']} + url_args = {"origin_url": origin_info["url"]} - query_params = {'visit_id': visit_id} + query_params = {"visit_id": visit_id} - browse_url = reverse('browse-origin-visits', - url_args=url_args) + browse_url = reverse("browse-origin-visits", url_args=url_args) if timestamp: - url_args['timestamp'] = format_utc_iso_date(timestamp, - '%Y-%m-%dT%H:%M:%S') - visit_url = reverse('browse-origin-directory', - url_args=url_args, - query_params=query_params) - visit_info['url'] = visit_url - - branches_url = reverse('browse-origin-branches', - url_args=url_args, - query_params=query_params) - - releases_url = reverse('browse-origin-releases', - url_args=url_args, - query_params=query_params) + url_args["timestamp"] = format_utc_iso_date(timestamp, "%Y-%m-%dT%H:%M:%S") + visit_url = reverse( + "browse-origin-directory", url_args=url_args, query_params=query_params + ) + visit_info["url"] = visit_url + + branches_url = reverse( + "browse-origin-branches", url_args=url_args, query_params=query_params + ) + + releases_url = reverse( + "browse-origin-releases", url_args=url_args, query_params=query_params + ) elif snapshot_id: branches, releases = get_snapshot_content(snapshot_id) - url_args = {'snapshot_id': snapshot_id} - browse_url = reverse('browse-snapshot', - url_args=url_args) - branches_url = reverse('browse-snapshot-branches', - url_args=url_args) + url_args = {"snapshot_id": snapshot_id} + browse_url = reverse("browse-snapshot", url_args=url_args) + branches_url = reverse("browse-snapshot-branches", url_args=url_args) - releases_url = reverse('browse-snapshot-releases', - url_args=url_args) + releases_url = reverse("browse-snapshot-releases", url_args=url_args) releases = list(reversed(releases)) @@ -966,25 +993,25 @@ is_empty = sum(snapshot_sizes.values()) == 0 - swh_snp_id = persistent_identifier('snapshot', snapshot_id) + swh_snp_id = persistent_identifier("snapshot", snapshot_id) return { - 'swh_type': swh_type, - 'swh_object_id': swh_snp_id, - 'snapshot_id': snapshot_id, - 'snapshot_sizes': snapshot_sizes, - 'is_empty': is_empty, - 'origin_info': origin_info, - 'visit_info': visit_info, - 'branches': branches, - 'releases': releases, - 'branch': None, - 'release': None, - 'browse_url': browse_url, - 'branches_url': branches_url, - 'releases_url': releases_url, - 'url_args': url_args, - 'query_params': query_params + "swh_type": swh_type, + "swh_object_id": swh_snp_id, + "snapshot_id": snapshot_id, + "snapshot_sizes": snapshot_sizes, + "is_empty": is_empty, + "origin_info": origin_info, + "visit_info": visit_info, + "branches": branches, + "releases": releases, + "branch": None, + "release": None, + "browse_url": browse_url, + "branches_url": branches_url, + "releases_url": releases_url, + "url_args": url_args, + "query_params": query_params, } @@ -995,7 +1022,7 @@ "readme.md", "readme.rst", "readme.txt", - "readme" + "readme", ] @@ -1016,32 +1043,35 @@ readme_sha1 = None readme_html = None - lc_readmes = {k.lower(): {'orig_name': k, 'sha1': v} - for k, v in readmes.items()} + lc_readmes = {k.lower(): {"orig_name": k, "sha1": v} for k, v in readmes.items()} # look for readme names according to the preference order # defined by the _common_readme_names list for common_readme_name in _common_readme_names: if common_readme_name in lc_readmes: - readme_name = lc_readmes[common_readme_name]['orig_name'] - readme_sha1 = lc_readmes[common_readme_name]['sha1'] - readme_url = reverse('browse-content-raw', - url_args={'query_string': readme_sha1}, - query_params={'re_encode': 'true'}) + readme_name = lc_readmes[common_readme_name]["orig_name"] + readme_sha1 = lc_readmes[common_readme_name]["sha1"] + readme_url = reverse( + "browse-content-raw", + url_args={"query_string": readme_sha1}, + query_params={"re_encode": "true"}, + ) break # otherwise pick the first readme like file if any if not readme_name and len(readmes.items()) > 0: readme_name = next(iter(readmes)) readme_sha1 = readmes[readme_name] - readme_url = reverse('browse-content-raw', - url_args={'query_string': readme_sha1}, - query_params={'re_encode': 'true'}) + readme_url = reverse( + "browse-content-raw", + url_args={"query_string": readme_sha1}, + query_params={"re_encode": "true"}, + ) # convert rst README to html server side as there is # no viable solution to perform that task client side - if readme_name and readme_name.endswith('.rst'): - cache_entry_id = 'readme_%s' % readme_sha1 + if readme_name and readme_name.endswith(".rst"): + cache_entry_id = "readme_%s" % readme_sha1 cache_entry = cache.get(cache_entry_id) if cache_entry: @@ -1049,11 +1079,11 @@ else: try: rst_doc = request_content(readme_sha1) - readme_html = rst_to_html(rst_doc['raw_data']) + readme_html = rst_to_html(rst_doc["raw_data"]) cache.set(cache_entry_id, readme_html) except Exception as exc: sentry_sdk.capture_exception(exc) - readme_html = 'Readme bytes are not available' + readme_html = "Readme bytes are not available" return readme_name, readme_url, readme_html @@ -1085,21 +1115,23 @@ """ swh_ids = [] for swh_object in swh_objects: - if not swh_object['id']: + if not swh_object["id"]: continue - swh_id = get_swh_persistent_id(swh_object['type'], swh_object['id']) - show_options = swh_object['type'] == 'content' or \ - (snapshot_context and snapshot_context['origin_info'] is not None) - - object_icon = swh_object_icons[swh_object['type']] - - swh_ids.append({ - 'object_type': swh_object['type'], - 'object_id': swh_object['id'], - 'object_icon': object_icon, - 'swh_id': swh_id, - 'swh_id_url': reverse('browse-swh-id', - url_args={'swh_id': swh_id}), - 'show_options': show_options - }) + swh_id = get_swh_persistent_id(swh_object["type"], swh_object["id"]) + show_options = swh_object["type"] == "content" or ( + snapshot_context and snapshot_context["origin_info"] is not None + ) + + object_icon = swh_object_icons[swh_object["type"]] + + swh_ids.append( + { + "object_type": swh_object["type"], + "object_id": swh_object["id"], + "object_icon": object_icon, + "swh_id": swh_id, + "swh_id_url": reverse("browse-swh-id", url_args={"swh_id": swh_id}), + "show_options": show_options, + } + ) return swh_ids diff --git a/swh/web/browse/views/content.py b/swh/web/browse/views/content.py --- a/swh/web/browse/views/content.py +++ b/swh/web/browse/views/content.py @@ -16,21 +16,25 @@ from swh.model.hashutil import hash_to_hex from swh.web.common import query, service, highlightjs -from swh.web.common.utils import ( - reverse, gen_path_info, swh_object_icons -) +from swh.web.common.utils import reverse, gen_path_info, swh_object_icons from swh.web.common.exc import NotFoundExc, handle_view_exception from swh.web.browse.utils import ( - request_content, prepare_content_for_display, - content_display_max_size, get_snapshot_context, - get_swh_persistent_ids, gen_link, gen_directory_link + request_content, + prepare_content_for_display, + content_display_max_size, + get_snapshot_context, + get_swh_persistent_ids, + gen_link, + gen_directory_link, ) from swh.web.browse.browseurls import browse_route -@browse_route(r'content/(?P[0-9a-z_:]*[0-9a-f]+.)/raw/', - view_name='browse-content-raw', - checksum_args=['query_string']) +@browse_route( + r"content/(?P[0-9a-z_:]*[0-9a-f]+.)/raw/", + view_name="browse-content-raw", + checksum_args=["query_string"], +) def content_raw(request, query_string): """Django view that produces a raw display of a content identified by its hash value. @@ -39,35 +43,38 @@ :http:get:`/browse/content/[(algo_hash):](hash)/raw/` """ try: - re_encode = bool(strtobool(request.GET.get('re_encode', 'false'))) + re_encode = bool(strtobool(request.GET.get("re_encode", "false"))) algo, checksum = query.parse_hash(query_string) checksum = hash_to_hex(checksum) - content_data = request_content(query_string, max_size=None, - re_encode=re_encode) + content_data = request_content(query_string, max_size=None, re_encode=re_encode) except Exception as exc: return handle_view_exception(request, exc) - filename = request.GET.get('filename', None) + filename = request.GET.get("filename", None) if not filename: - filename = '%s_%s' % (algo, checksum) - - if content_data['mimetype'].startswith('text/') or \ - content_data['mimetype'] == 'inode/x-empty': - response = HttpResponse(content_data['raw_data'], - content_type="text/plain") - response['Content-disposition'] = 'filename=%s' % filename + filename = "%s_%s" % (algo, checksum) + + if ( + content_data["mimetype"].startswith("text/") + or content_data["mimetype"] == "inode/x-empty" + ): + response = HttpResponse(content_data["raw_data"], content_type="text/plain") + response["Content-disposition"] = "filename=%s" % filename else: - response = HttpResponse(content_data['raw_data'], - content_type='application/octet-stream') - response['Content-disposition'] = 'attachment; filename=%s' % filename + response = HttpResponse( + content_data["raw_data"], content_type="application/octet-stream" + ) + response["Content-disposition"] = "attachment; filename=%s" % filename return response _auto_diff_size_limit = 20000 -@browse_route(r'content/(?P.*)/diff/(?P.*)', # noqa - view_name='diff-contents') +@browse_route( + r"content/(?P.*)/diff/(?P.*)", + view_name="diff-contents", +) def _contents_diff(request, from_query_string, to_query_string): """ Browse endpoint used to compute unified diffs between two contents. @@ -97,78 +104,83 @@ content_to_size = 0 content_from_lines = [] content_to_lines = [] - force = request.GET.get('force', 'false') - path = request.GET.get('path', None) - language = 'nohighlight' + force = request.GET.get("force", "false") + path = request.GET.get("path", None) + language = "nohighlight" force = bool(strtobool(force)) if from_query_string == to_query_string: - diff_str = 'File renamed without changes' + diff_str = "File renamed without changes" else: try: text_diff = True if from_query_string: - content_from = \ - request_content(from_query_string, max_size=None) - content_from_display_data = \ - prepare_content_for_display(content_from['raw_data'], - content_from['mimetype'], path) - language = content_from_display_data['language'] - content_from_size = content_from['length'] - if not (content_from['mimetype'].startswith('text/') or - content_from['mimetype'] == 'inode/x-empty'): + content_from = request_content(from_query_string, max_size=None) + content_from_display_data = prepare_content_for_display( + content_from["raw_data"], content_from["mimetype"], path + ) + language = content_from_display_data["language"] + content_from_size = content_from["length"] + if not ( + content_from["mimetype"].startswith("text/") + or content_from["mimetype"] == "inode/x-empty" + ): text_diff = False if text_diff and to_query_string: content_to = request_content(to_query_string, max_size=None) content_to_display_data = prepare_content_for_display( - content_to['raw_data'], content_to['mimetype'], path) - language = content_to_display_data['language'] - content_to_size = content_to['length'] - if not (content_to['mimetype'].startswith('text/') or - content_to['mimetype'] == 'inode/x-empty'): + content_to["raw_data"], content_to["mimetype"], path + ) + language = content_to_display_data["language"] + content_to_size = content_to["length"] + if not ( + content_to["mimetype"].startswith("text/") + or content_to["mimetype"] == "inode/x-empty" + ): text_diff = False diff_size = abs(content_to_size - content_from_size) if not text_diff: - diff_str = 'Diffs are not generated for non textual content' - language = 'nohighlight' + diff_str = "Diffs are not generated for non textual content" + language = "nohighlight" elif not force and diff_size > _auto_diff_size_limit: - diff_str = 'Large diffs are not automatically computed' - language = 'nohighlight' + diff_str = "Large diffs are not automatically computed" + language = "nohighlight" else: if content_from: - content_from_lines = \ - content_from['raw_data'].decode('utf-8')\ - .splitlines(True) - if content_from_lines and \ - content_from_lines[-1][-1] != '\n': - content_from_lines[-1] += '[swh-no-nl-marker]\n' + content_from_lines = ( + content_from["raw_data"].decode("utf-8").splitlines(True) + ) + if content_from_lines and content_from_lines[-1][-1] != "\n": + content_from_lines[-1] += "[swh-no-nl-marker]\n" if content_to: - content_to_lines = content_to['raw_data'].decode('utf-8')\ - .splitlines(True) - if content_to_lines and content_to_lines[-1][-1] != '\n': - content_to_lines[-1] += '[swh-no-nl-marker]\n' - - diff_lines = difflib.unified_diff(content_from_lines, - content_to_lines) - diff_str = ''.join(list(diff_lines)[2:]) + content_to_lines = ( + content_to["raw_data"].decode("utf-8").splitlines(True) + ) + if content_to_lines and content_to_lines[-1][-1] != "\n": + content_to_lines[-1] += "[swh-no-nl-marker]\n" + + diff_lines = difflib.unified_diff(content_from_lines, content_to_lines) + diff_str = "".join(list(diff_lines)[2:]) except Exception as exc: sentry_sdk.capture_exception(exc) diff_str = str(exc) - diff_data['diff_str'] = diff_str - diff_data['language'] = language - diff_data_json = json.dumps(diff_data, separators=(',', ': ')) - return HttpResponse(diff_data_json, content_type='application/json') + diff_data["diff_str"] = diff_str + diff_data["language"] = language + diff_data_json = json.dumps(diff_data, separators=(",", ": ")) + return HttpResponse(diff_data_json, content_type="application/json") -@browse_route(r'content/(?P[0-9a-z_:]*[0-9a-f]+.)/', - view_name='browse-content', - checksum_args=['query_string']) +@browse_route( + r"content/(?P[0-9a-z_:]*[0-9a-f]+.)/", + view_name="browse-content", + checksum_args=["query_string"], +) def content_display(request, query_string): """Django view that produces an HTML display of a content identified by its hash value. @@ -179,46 +191,48 @@ try: algo, checksum = query.parse_hash(query_string) checksum = hash_to_hex(checksum) - content_data = request_content(query_string, - raise_if_unavailable=False) - origin_url = request.GET.get('origin_url', None) - selected_language = request.GET.get('language', None) + content_data = request_content(query_string, raise_if_unavailable=False) + origin_url = request.GET.get("origin_url", None) + selected_language = request.GET.get("language", None) if not origin_url: - origin_url = request.GET.get('origin', None) + origin_url = request.GET.get("origin", None) snapshot_context = None if origin_url: try: snapshot_context = get_snapshot_context(origin_url=origin_url) except NotFoundExc: - raw_cnt_url = reverse('browse-content', - url_args={'query_string': query_string}) - error_message = \ - ('The Software Heritage archive has a content ' - 'with the hash you provided but the origin ' - 'mentioned in your request appears broken: %s. ' - 'Please check the URL and try again.\n\n' - 'Nevertheless, you can still browse the content ' - 'without origin information: %s' - % (gen_link(origin_url), gen_link(raw_cnt_url))) + raw_cnt_url = reverse( + "browse-content", url_args={"query_string": query_string} + ) + error_message = ( + "The Software Heritage archive has a content " + "with the hash you provided but the origin " + "mentioned in your request appears broken: %s. " + "Please check the URL and try again.\n\n" + "Nevertheless, you can still browse the content " + "without origin information: %s" + % (gen_link(origin_url), gen_link(raw_cnt_url)) + ) raise NotFoundExc(error_message) if snapshot_context: - snapshot_context['visit_info'] = None + snapshot_context["visit_info"] = None except Exception as exc: return handle_view_exception(request, exc) - path = request.GET.get('path', None) + path = request.GET.get("path", None) content = None language = None mimetype = None - if content_data['raw_data'] is not None: + if content_data["raw_data"] is not None: content_display_data = prepare_content_for_display( - content_data['raw_data'], content_data['mimetype'], path) - content = content_display_data['content_data'] - language = content_display_data['language'] - mimetype = content_display_data['mimetype'] + content_data["raw_data"], content_data["mimetype"], path + ) + content = content_display_data["content_data"] + language = content_display_data["language"] + mimetype = content_display_data["mimetype"] # Override language with user-selected language if selected_language is not None: @@ -226,7 +240,7 @@ available_languages = None - if mimetype and 'text/' in mimetype: + if mimetype and "text/" in mimetype: available_languages = highlightjs.get_supported_languages() root_dir = None @@ -235,37 +249,37 @@ directory_id = None directory_url = None - query_params = {'origin': origin_url} + query_params = {"origin": origin_url} breadcrumbs = [] if path: - split_path = path.split('/') + split_path = path.split("/") root_dir = split_path[0] filename = split_path[-1] if root_dir != path: - path = path.replace(root_dir + '/', '') - path = path[:-len(filename)] + path = path.replace(root_dir + "/", "") + path = path[: -len(filename)] path_info = gen_path_info(path) - dir_url = reverse('browse-directory', - url_args={'sha1_git': root_dir}, - query_params=query_params) - breadcrumbs.append({'name': root_dir[:7], - 'url': dir_url}) + dir_url = reverse( + "browse-directory", + url_args={"sha1_git": root_dir}, + query_params=query_params, + ) + breadcrumbs.append({"name": root_dir[:7], "url": dir_url}) for pi in path_info: - dir_url = reverse('browse-directory', - url_args={'sha1_git': root_dir, - 'path': pi['path']}, - query_params=query_params) - breadcrumbs.append({'name': pi['name'], - 'url': dir_url}) - breadcrumbs.append({'name': filename, - 'url': None}) + dir_url = reverse( + "browse-directory", + url_args={"sha1_git": root_dir, "path": pi["path"]}, + query_params=query_params, + ) + breadcrumbs.append({"name": pi["name"], "url": dir_url}) + breadcrumbs.append({"name": filename, "url": None}) if path and root_dir != path: try: dir_info = service.lookup_directory_with_path(root_dir, path) - directory_id = dir_info['target'] + directory_id = dir_info["target"] except Exception as exc: return handle_view_exception(request, exc) elif root_dir != path: @@ -274,61 +288,67 @@ if directory_id: directory_url = gen_directory_link(directory_id) - query_params = {'filename': filename} + query_params = {"filename": filename} - content_raw_url = reverse('browse-content-raw', - url_args={'query_string': query_string}, - query_params=query_params) + content_raw_url = reverse( + "browse-content-raw", + url_args={"query_string": query_string}, + query_params=query_params, + ) content_metadata = { - 'sha1': content_data['checksums']['sha1'], - 'sha1_git': content_data['checksums']['sha1_git'], - 'sha256': content_data['checksums']['sha256'], - 'blake2s256': content_data['checksums']['blake2s256'], - 'mimetype': content_data['mimetype'], - 'encoding': content_data['encoding'], - 'size': filesizeformat(content_data['length']), - 'language': content_data['language'], - 'licenses': content_data['licenses'], - 'filename': filename, - 'directory': directory_id, - 'context-independent directory': directory_url + "sha1": content_data["checksums"]["sha1"], + "sha1_git": content_data["checksums"]["sha1_git"], + "sha256": content_data["checksums"]["sha256"], + "blake2s256": content_data["checksums"]["blake2s256"], + "mimetype": content_data["mimetype"], + "encoding": content_data["encoding"], + "size": filesizeformat(content_data["length"]), + "language": content_data["language"], + "licenses": content_data["licenses"], + "filename": filename, + "directory": directory_id, + "context-independent directory": directory_url, } if filename: - content_metadata['filename'] = filename + content_metadata["filename"] = filename - sha1_git = content_data['checksums']['sha1_git'] - swh_ids = get_swh_persistent_ids([{'type': 'content', - 'id': sha1_git}]) + sha1_git = content_data["checksums"]["sha1_git"] + swh_ids = get_swh_persistent_ids([{"type": "content", "id": sha1_git}]) - heading = 'Content - %s' % sha1_git + heading = "Content - %s" % sha1_git if breadcrumbs: - content_path = '/'.join([bc['name'] for bc in breadcrumbs]) - heading += ' - %s' % content_path - - return render(request, 'browse/content.html', - {'heading': heading, - 'swh_object_id': swh_ids[0]['swh_id'], - 'swh_object_name': 'Content', - 'swh_object_metadata': content_metadata, - 'content': content, - 'content_size': content_data['length'], - 'max_content_size': content_display_max_size, - 'mimetype': mimetype, - 'language': language, - 'available_languages': available_languages, - 'breadcrumbs': breadcrumbs, - 'top_right_link': { - 'url': content_raw_url, - 'icon': swh_object_icons['content'], - 'text': 'Raw File' - }, - 'snapshot_context': snapshot_context, - 'vault_cooking': None, - 'show_actions_menu': True, - 'swh_ids': swh_ids, - 'error_code': content_data['error_code'], - 'error_message': content_data['error_message'], - 'error_description': content_data['error_description']}, - status=content_data['error_code']) + content_path = "/".join([bc["name"] for bc in breadcrumbs]) + heading += " - %s" % content_path + + return render( + request, + "browse/content.html", + { + "heading": heading, + "swh_object_id": swh_ids[0]["swh_id"], + "swh_object_name": "Content", + "swh_object_metadata": content_metadata, + "content": content, + "content_size": content_data["length"], + "max_content_size": content_display_max_size, + "mimetype": mimetype, + "language": language, + "available_languages": available_languages, + "breadcrumbs": breadcrumbs, + "top_right_link": { + "url": content_raw_url, + "icon": swh_object_icons["content"], + "text": "Raw File", + }, + "snapshot_context": snapshot_context, + "vault_cooking": None, + "show_actions_menu": True, + "swh_ids": swh_ids, + "error_code": content_data["error_code"], + "error_message": content_data["error_message"], + "error_description": content_data["error_description"], + }, + status=content_data["error_code"], + ) diff --git a/swh/web/browse/views/directory.py b/swh/web/browse/views/directory.py --- a/swh/web/browse/views/directory.py +++ b/swh/web/browse/views/directory.py @@ -11,23 +11,25 @@ import sentry_sdk from swh.web.common import service -from swh.web.common.utils import ( - reverse, gen_path_info -) +from swh.web.common.utils import reverse, gen_path_info from swh.web.common.exc import handle_view_exception, NotFoundExc from swh.web.browse.utils import ( - get_directory_entries, get_snapshot_context, - get_readme_to_display, get_swh_persistent_ids, - gen_link + get_directory_entries, + get_snapshot_context, + get_readme_to_display, + get_swh_persistent_ids, + gen_link, ) from swh.web.browse.browseurls import browse_route -@browse_route(r'directory/(?P[0-9a-f]+)/', - r'directory/(?P[0-9a-f]+)/(?P.+)/', - view_name='browse-directory', - checksum_args=['sha1_git']) +@browse_route( + r"directory/(?P[0-9a-f]+)/", + r"directory/(?P[0-9a-f]+)/(?P.+)/", + view_name="browse-directory", + checksum_args=["sha1_git"], +) def directory_browse(request, sha1_git, path=None): """Django view for browsing the content of a directory identified by its sha1_git value. @@ -39,127 +41,156 @@ try: if path: dir_info = service.lookup_directory_with_path(sha1_git, path) - sha1_git = dir_info['target'] + sha1_git = dir_info["target"] dirs, files = get_directory_entries(sha1_git) - origin_url = request.GET.get('origin_url', None) + origin_url = request.GET.get("origin_url", None) if not origin_url: - origin_url = request.GET.get('origin', None) + origin_url = request.GET.get("origin", None) snapshot_context = None if origin_url: try: snapshot_context = get_snapshot_context(origin_url=origin_url) except NotFoundExc: - raw_dir_url = reverse('browse-directory', - url_args={'sha1_git': sha1_git}) - error_message = \ - ('The Software Heritage archive has a directory ' - 'with the hash you provided but the origin ' - 'mentioned in your request appears broken: %s. ' - 'Please check the URL and try again.\n\n' - 'Nevertheless, you can still browse the directory ' - 'without origin information: %s' - % (gen_link(origin_url), gen_link(raw_dir_url))) + raw_dir_url = reverse( + "browse-directory", url_args={"sha1_git": sha1_git} + ) + error_message = ( + "The Software Heritage archive has a directory " + "with the hash you provided but the origin " + "mentioned in your request appears broken: %s. " + "Please check the URL and try again.\n\n" + "Nevertheless, you can still browse the directory " + "without origin information: %s" + % (gen_link(origin_url), gen_link(raw_dir_url)) + ) raise NotFoundExc(error_message) if snapshot_context: - snapshot_context['visit_info'] = None + snapshot_context["visit_info"] = None except Exception as exc: return handle_view_exception(request, exc) path_info = gen_path_info(path) - query_params = {'origin': origin_url} + query_params = {"origin": origin_url} breadcrumbs = [] - breadcrumbs.append({'name': root_sha1_git[:7], - 'url': reverse('browse-directory', - url_args={'sha1_git': root_sha1_git}, - query_params=query_params)}) + breadcrumbs.append( + { + "name": root_sha1_git[:7], + "url": reverse( + "browse-directory", + url_args={"sha1_git": root_sha1_git}, + query_params=query_params, + ), + } + ) for pi in path_info: - breadcrumbs.append({'name': pi['name'], - 'url': reverse('browse-directory', - url_args={'sha1_git': root_sha1_git, - 'path': pi['path']}, - query_params=query_params)}) - - path = '' if path is None else (path + '/') + breadcrumbs.append( + { + "name": pi["name"], + "url": reverse( + "browse-directory", + url_args={"sha1_git": root_sha1_git, "path": pi["path"]}, + query_params=query_params, + ), + } + ) + + path = "" if path is None else (path + "/") for d in dirs: - if d['type'] == 'rev': - d['url'] = reverse('browse-revision', - url_args={'sha1_git': d['target']}, - query_params=query_params) + if d["type"] == "rev": + d["url"] = reverse( + "browse-revision", + url_args={"sha1_git": d["target"]}, + query_params=query_params, + ) else: - d['url'] = reverse('browse-directory', - url_args={'sha1_git': root_sha1_git, - 'path': path + d['name']}, - query_params=query_params) + d["url"] = reverse( + "browse-directory", + url_args={"sha1_git": root_sha1_git, "path": path + d["name"]}, + query_params=query_params, + ) sum_file_sizes = 0 readmes = {} for f in files: - query_string = 'sha1_git:' + f['target'] - f['url'] = reverse('browse-content', - url_args={'query_string': query_string}, - query_params={'path': root_sha1_git + '/' + - path + f['name'], - 'origin': origin_url}) - if f['length'] is not None: - sum_file_sizes += f['length'] - f['length'] = filesizeformat(f['length']) - if f['name'].lower().startswith('readme'): - readmes[f['name']] = f['checksums']['sha1'] + query_string = "sha1_git:" + f["target"] + f["url"] = reverse( + "browse-content", + url_args={"query_string": query_string}, + query_params={ + "path": root_sha1_git + "/" + path + f["name"], + "origin": origin_url, + }, + ) + if f["length"] is not None: + sum_file_sizes += f["length"] + f["length"] = filesizeformat(f["length"]) + if f["name"].lower().startswith("readme"): + readmes[f["name"]] = f["checksums"]["sha1"] readme_name, readme_url, readme_html = get_readme_to_display(readmes) sum_file_sizes = filesizeformat(sum_file_sizes) - dir_metadata = {"directory": sha1_git, - "number of regular files": len(files), - "number of subdirectories": len(dirs), - "sum of regular file sizes": sum_file_sizes} + dir_metadata = { + "directory": sha1_git, + "number of regular files": len(files), + "number of subdirectories": len(dirs), + "sum of regular file sizes": sum_file_sizes, + } vault_cooking = { - 'directory_context': True, - 'directory_id': sha1_git, - 'revision_context': False, - 'revision_id': None + "directory_context": True, + "directory_id": sha1_git, + "revision_context": False, + "revision_id": None, } - swh_objects = [{'type': 'directory', 'id': sha1_git}] + swh_objects = [{"type": "directory", "id": sha1_git}] swh_ids = get_swh_persistent_ids( - swh_objects=swh_objects, snapshot_context=snapshot_context) + swh_objects=swh_objects, snapshot_context=snapshot_context + ) - heading = 'Directory - %s' % sha1_git + heading = "Directory - %s" % sha1_git if breadcrumbs: - dir_path = '/'.join([bc['name'] for bc in breadcrumbs]) + '/' - heading += ' - %s' % dir_path - - return render(request, 'browse/directory.html', - {'heading': heading, - 'swh_object_id': swh_ids[0]['swh_id'], - 'swh_object_name': 'Directory', - 'swh_object_metadata': dir_metadata, - 'dirs': dirs, - 'files': files, - 'breadcrumbs': breadcrumbs, - 'top_right_link': None, - 'readme_name': readme_name, - 'readme_url': readme_url, - 'readme_html': readme_html, - 'snapshot_context': snapshot_context, - 'vault_cooking': vault_cooking, - 'show_actions_menu': True, - 'swh_ids': swh_ids}) - - -@browse_route(r'directory/resolve/content-path/(?P[0-9a-f]+)/(?P.+)/', # noqa - view_name='browse-directory-resolve-content-path', - checksum_args=['sha1_git']) + dir_path = "/".join([bc["name"] for bc in breadcrumbs]) + "/" + heading += " - %s" % dir_path + + return render( + request, + "browse/directory.html", + { + "heading": heading, + "swh_object_id": swh_ids[0]["swh_id"], + "swh_object_name": "Directory", + "swh_object_metadata": dir_metadata, + "dirs": dirs, + "files": files, + "breadcrumbs": breadcrumbs, + "top_right_link": None, + "readme_name": readme_name, + "readme_url": readme_url, + "readme_html": readme_html, + "snapshot_context": snapshot_context, + "vault_cooking": vault_cooking, + "show_actions_menu": True, + "swh_ids": swh_ids, + }, + ) + + +@browse_route( + r"directory/resolve/content-path/(?P[0-9a-f]+)/(?P.+)/", + view_name="browse-directory-resolve-content-path", + checksum_args=["sha1_git"], +) def _directory_resolve_content_path(request, sha1_git, path): """ Internal endpoint redirecting to data url for a specific file path @@ -167,12 +198,13 @@ """ try: path = os.path.normpath(path) - if not path.startswith('../'): + if not path.startswith("../"): dir_info = service.lookup_directory_with_path(sha1_git, path) - if dir_info['type'] == 'file': - sha1 = dir_info['checksums']['sha1'] - data_url = reverse('browse-content-raw', - url_args={'query_string': sha1}) + if dir_info["type"] == "file": + sha1 = dir_info["checksums"]["sha1"] + data_url = reverse( + "browse-content-raw", url_args={"query_string": sha1} + ) return redirect(data_url) except Exception as exc: sentry_sdk.capture_exception(exc) diff --git a/swh/web/browse/views/origin.py b/swh/web/browse/views/origin.py --- a/swh/web/browse/views/origin.py +++ b/swh/web/browse/views/origin.py @@ -7,28 +7,28 @@ from swh.web.common import service from swh.web.common.origin_visits import get_origin_visits -from swh.web.common.utils import ( - reverse, format_utc_iso_date, parse_timestamp -) +from swh.web.common.utils import reverse, format_utc_iso_date, parse_timestamp from swh.web.common.exc import handle_view_exception from swh.web.browse.utils import get_snapshot_context from swh.web.browse.browseurls import browse_route from .utils.snapshot_context import ( - browse_snapshot_directory, browse_snapshot_content, - browse_snapshot_log, browse_snapshot_branches, - browse_snapshot_releases + browse_snapshot_directory, + browse_snapshot_content, + browse_snapshot_log, + browse_snapshot_branches, + browse_snapshot_releases, ) -@browse_route(r'origin/(?P.+)/visit/(?P.+)/directory/', - r'origin/(?P.+)/visit/(?P.+)' - '/directory/(?P.+)/', - r'origin/(?P.+)/directory/', - r'origin/(?P.+)/directory/(?P.+)/', - view_name='browse-origin-directory') -def origin_directory_browse(request, origin_url, - timestamp=None, path=None): +@browse_route( + r"origin/(?P.+)/visit/(?P.+)/directory/", + r"origin/(?P.+)/visit/(?P.+)" "/directory/(?P.+)/", + r"origin/(?P.+)/directory/", + r"origin/(?P.+)/directory/(?P.+)/", + view_name="browse-origin-directory", +) +def origin_directory_browse(request, origin_url, timestamp=None, path=None): """Django view for browsing the content of a directory associated to an origin for a given visit. @@ -36,17 +36,18 @@ * :http:get:`/browse/origin/(origin_url)/directory/[(path)/]` * :http:get:`/browse/origin/(origin_url)/visit/(timestamp)/directory/[(path)/]` - """ # noqa - return browse_snapshot_directory(request, origin_url=origin_url, - timestamp=timestamp, path=path) + """ + return browse_snapshot_directory( + request, origin_url=origin_url, timestamp=timestamp, path=path + ) -@browse_route(r'origin/(?P.+)/visit/(?P.+)' - '/content/(?P.+)/', - r'origin/(?P.+)/content/(?P.+)/', - view_name='browse-origin-content') -def origin_content_browse(request, origin_url, path=None, - timestamp=None): +@browse_route( + r"origin/(?P.+)/visit/(?P.+)" "/content/(?P.+)/", + r"origin/(?P.+)/content/(?P.+)/", + view_name="browse-origin-content", +) +def origin_content_browse(request, origin_url, path=None, timestamp=None): """Django view that produces an HTML display of a content associated to an origin for a given visit. @@ -55,19 +56,25 @@ * :http:get:`/browse/origin/(origin_url)/content/(path)/` * :http:get:`/browse/origin/(origin_url)/visit/(timestamp)/content/(path)/` - """ # noqa - language = request.GET.get('language', None) - return browse_snapshot_content(request, - origin_url=origin_url, timestamp=timestamp, - path=path, selected_language=language) + """ + language = request.GET.get("language", None) + return browse_snapshot_content( + request, + origin_url=origin_url, + timestamp=timestamp, + path=path, + selected_language=language, + ) PER_PAGE = 20 -@browse_route(r'origin/(?P.+)/visit/(?P.+)/log/', - r'origin/(?P.+)/log/', - view_name='browse-origin-log') +@browse_route( + r"origin/(?P.+)/visit/(?P.+)/log/", + r"origin/(?P.+)/log/", + view_name="browse-origin-log", +) def origin_log_browse(request, origin_url, timestamp=None): """Django view that produces an HTML display of revisions history (aka the commit log) associated to a software origin. @@ -76,14 +83,15 @@ * :http:get:`/browse/origin/(origin_url)/log/` * :http:get:`/browse/origin/(origin_url)/visit/(timestamp)/log/` - """ # noqa - return browse_snapshot_log(request, - origin_url=origin_url, timestamp=timestamp) + """ + return browse_snapshot_log(request, origin_url=origin_url, timestamp=timestamp) -@browse_route(r'origin/(?P.+)/visit/(?P.+)/branches/', - r'origin/(?P.+)/branches/', - view_name='browse-origin-branches') +@browse_route( + r"origin/(?P.+)/visit/(?P.+)/branches/", + r"origin/(?P.+)/branches/", + view_name="browse-origin-branches", +) def origin_branches_browse(request, origin_url, timestamp=None): """Django view that produces an HTML display of the list of branches associated to an origin for a given visit. @@ -93,14 +101,15 @@ * :http:get:`/browse/origin/(origin_url)/branches/` * :http:get:`/browse/origin/(origin_url)/visit/(timestamp)/branches/` - """ # noqa - return browse_snapshot_branches(request, - origin_url=origin_url, timestamp=timestamp) + """ + return browse_snapshot_branches(request, origin_url=origin_url, timestamp=timestamp) -@browse_route(r'origin/(?P.+)/visit/(?P.+)/releases/', - r'origin/(?P.+)/releases/', - view_name='browse-origin-releases') +@browse_route( + r"origin/(?P.+)/visit/(?P.+)/releases/", + r"origin/(?P.+)/releases/", + view_name="browse-origin-releases", +) def origin_releases_browse(request, origin_url, timestamp=None): """Django view that produces an HTML display of the list of releases associated to an origin for a given visit. @@ -110,13 +119,11 @@ * :http:get:`/browse/origin/(origin_url)/releases/` * :http:get:`/browse/origin/(origin_url)/visit/(timestamp)/releases/` - """ # noqa - return browse_snapshot_releases(request, - origin_url=origin_url, timestamp=timestamp) + """ + return browse_snapshot_releases(request, origin_url=origin_url, timestamp=timestamp) -@browse_route(r'origin/(?P.+)/visits/', - view_name='browse-origin-visits') +@browse_route(r"origin/(?P.+)/visits/", view_name="browse-origin-visits") def origin_visits_browse(request, origin_url): """Django view that produces an HTML display of visits reporting for a given origin. @@ -125,52 +132,58 @@ :http:get:`/browse/origin/(origin_url)/visits/`. """ try: - origin_info = service.lookup_origin({'url': origin_url}) + origin_info = service.lookup_origin({"url": origin_url}) origin_visits = get_origin_visits(origin_info) snapshot_context = get_snapshot_context(origin_url=origin_url) except Exception as exc: return handle_view_exception(request, exc) for i, visit in enumerate(origin_visits): - url_date = format_utc_iso_date(visit['date'], '%Y-%m-%dT%H:%M:%SZ') - visit['fmt_date'] = format_utc_iso_date(visit['date']) + url_date = format_utc_iso_date(visit["date"], "%Y-%m-%dT%H:%M:%SZ") + visit["fmt_date"] = format_utc_iso_date(visit["date"]) query_params = {} if i < len(origin_visits) - 1: - if visit['date'] == origin_visits[i+1]['date']: - query_params = {'visit_id': visit['visit']} + if visit["date"] == origin_visits[i + 1]["date"]: + query_params = {"visit_id": visit["visit"]} if i > 0: - if visit['date'] == origin_visits[i-1]['date']: - query_params = {'visit_id': visit['visit']} + if visit["date"] == origin_visits[i - 1]["date"]: + query_params = {"visit_id": visit["visit"]} - snapshot = visit['snapshot'] if visit['snapshot'] else '' + snapshot = visit["snapshot"] if visit["snapshot"] else "" - visit['browse_url'] = reverse('browse-origin-directory', - url_args={'origin_url': origin_url, - 'timestamp': url_date}, - query_params=query_params) + visit["browse_url"] = reverse( + "browse-origin-directory", + url_args={"origin_url": origin_url, "timestamp": url_date}, + query_params=query_params, + ) if not snapshot: - visit['snapshot'] = '' - visit['date'] = parse_timestamp(visit['date']).timestamp() - - heading = 'Origin visits - %s' % origin_url - - return render(request, 'browse/origin-visits.html', - {'heading': heading, - 'swh_object_name': 'Visits', - 'swh_object_metadata': origin_info, - 'origin_visits': origin_visits, - 'origin_info': origin_info, - 'snapshot_context': snapshot_context, - 'vault_cooking': None, - 'show_actions_menu': False}) - - -@browse_route(r'origin/(?P.+)/', - view_name='browse-origin') + visit["snapshot"] = "" + visit["date"] = parse_timestamp(visit["date"]).timestamp() + + heading = "Origin visits - %s" % origin_url + + return render( + request, + "browse/origin-visits.html", + { + "heading": heading, + "swh_object_name": "Visits", + "swh_object_metadata": origin_info, + "origin_visits": origin_visits, + "origin_info": origin_info, + "snapshot_context": snapshot_context, + "vault_cooking": None, + "show_actions_menu": False, + }, + ) + + +@browse_route(r"origin/(?P.+)/", view_name="browse-origin") def origin_browse(request, origin_url): """Django view that redirects to the display of the latest archived snapshot for a given software origin. """ - last_snapshot_url = reverse('browse-origin-directory', - url_args={'origin_url': origin_url}) + last_snapshot_url = reverse( + "browse-origin-directory", url_args={"origin_url": origin_url} + ) return redirect(last_snapshot_url) diff --git a/swh/web/browse/views/release.py b/swh/web/browse/views/release.py --- a/swh/web/browse/views/release.py +++ b/swh/web/browse/views/release.py @@ -7,21 +7,27 @@ import sentry_sdk from swh.web.common import service -from swh.web.common.utils import ( - reverse, format_utc_iso_date -) +from swh.web.common.utils import reverse, format_utc_iso_date from swh.web.common.exc import NotFoundExc, handle_view_exception from swh.web.browse.browseurls import browse_route from swh.web.browse.utils import ( - gen_revision_link, get_snapshot_context, gen_link, - gen_snapshot_link, get_swh_persistent_ids, gen_directory_link, - gen_content_link, gen_release_link, gen_person_mail_link + gen_revision_link, + get_snapshot_context, + gen_link, + gen_snapshot_link, + get_swh_persistent_ids, + gen_directory_link, + gen_content_link, + gen_release_link, + gen_person_mail_link, ) -@browse_route(r'release/(?P[0-9a-f]+)/', - view_name='browse-release', - checksum_args=['sha1_git']) +@browse_route( + r"release/(?P[0-9a-f]+)/", + view_name="browse-release", + checksum_args=["sha1_git"], +) def release_browse(request, sha1_git): """ Django view that produces an HTML display of a release @@ -33,30 +39,31 @@ release = service.lookup_release(sha1_git) snapshot_context = None origin_info = None - snapshot_id = request.GET.get('snapshot_id', None) - origin_url = request.GET.get('origin_url', None) + snapshot_id = request.GET.get("snapshot_id", None) + origin_url = request.GET.get("origin_url", None) if not origin_url: - origin_url = request.GET.get('origin', None) - timestamp = request.GET.get('timestamp', None) - visit_id = request.GET.get('visit_id', None) + origin_url = request.GET.get("origin", None) + timestamp = request.GET.get("timestamp", None) + visit_id = request.GET.get("visit_id", None) if origin_url: try: snapshot_context = get_snapshot_context( - snapshot_id, origin_url, timestamp, visit_id) + snapshot_id, origin_url, timestamp, visit_id + ) except NotFoundExc: - raw_rel_url = reverse('browse-release', - url_args={'sha1_git': sha1_git}) - error_message = \ - ('The Software Heritage archive has a release ' - 'with the hash you provided but the origin ' - 'mentioned in your request appears broken: %s. ' - 'Please check the URL and try again.\n\n' - 'Nevertheless, you can still browse the release ' - 'without origin information: %s' - % (gen_link(origin_url), gen_link(raw_rel_url))) + raw_rel_url = reverse("browse-release", url_args={"sha1_git": sha1_git}) + error_message = ( + "The Software Heritage archive has a release " + "with the hash you provided but the origin " + "mentioned in your request appears broken: %s. " + "Please check the URL and try again.\n\n" + "Nevertheless, you can still browse the release " + "without origin information: %s" + % (gen_link(origin_url), gen_link(raw_rel_url)) + ) raise NotFoundExc(error_message) - origin_info = snapshot_context['origin_info'] + origin_info = snapshot_context["origin_info"] elif snapshot_id: snapshot_context = get_snapshot_context(snapshot_id) except Exception as exc: @@ -64,147 +71,165 @@ release_data = {} - release_data['author'] = 'None' - if release['author']: - release_data['author'] = gen_person_mail_link(release['author']) - release_data['date'] = format_utc_iso_date(release['date']) - release_data['release'] = sha1_git - release_data['name'] = release['name'] - release_data['synthetic'] = release['synthetic'] - release_data['target'] = release['target'] - release_data['target type'] = release['target_type'] + release_data["author"] = "None" + if release["author"]: + release_data["author"] = gen_person_mail_link(release["author"]) + release_data["date"] = format_utc_iso_date(release["date"]) + release_data["release"] = sha1_git + release_data["name"] = release["name"] + release_data["synthetic"] = release["synthetic"] + release_data["target"] = release["target"] + release_data["target type"] = release["target_type"] if snapshot_context: - if release['target_type'] == 'revision': - release_data['context-independent target'] = \ - gen_revision_link(release['target']) - elif release['target_type'] == 'content': - release_data['context-independent target'] = \ - gen_content_link(release['target']) - elif release['target_type'] == 'directory': - release_data['context-independent target'] = \ - gen_directory_link(release['target']) - elif release['target_type'] == 'release': - release_data['context-independent target'] = \ - gen_release_link(release['target']) + if release["target_type"] == "revision": + release_data["context-independent target"] = gen_revision_link( + release["target"] + ) + elif release["target_type"] == "content": + release_data["context-independent target"] = gen_content_link( + release["target"] + ) + elif release["target_type"] == "directory": + release_data["context-independent target"] = gen_directory_link( + release["target"] + ) + elif release["target_type"] == "release": + release_data["context-independent target"] = gen_release_link( + release["target"] + ) release_note_lines = [] - if release['message']: - release_note_lines = release['message'].split('\n') + if release["message"]: + release_note_lines = release["message"].split("\n") vault_cooking = None rev_directory = None target_link = None - if release['target_type'] == 'revision': - target_link = gen_revision_link(release['target'], - snapshot_context=snapshot_context, - link_text=None, link_attrs=None) + if release["target_type"] == "revision": + target_link = gen_revision_link( + release["target"], + snapshot_context=snapshot_context, + link_text=None, + link_attrs=None, + ) try: - revision = service.lookup_revision(release['target']) - rev_directory = revision['directory'] + revision = service.lookup_revision(release["target"]) + rev_directory = revision["directory"] vault_cooking = { - 'directory_context': True, - 'directory_id': rev_directory, - 'revision_context': True, - 'revision_id': release['target'] + "directory_context": True, + "directory_id": rev_directory, + "revision_context": True, + "revision_id": release["target"], } except Exception as exc: sentry_sdk.capture_exception(exc) - elif release['target_type'] == 'directory': - target_link = gen_directory_link(release['target'], - snapshot_context=snapshot_context, - link_text=None, link_attrs=None) + elif release["target_type"] == "directory": + target_link = gen_directory_link( + release["target"], + snapshot_context=snapshot_context, + link_text=None, + link_attrs=None, + ) try: # check directory exists - service.lookup_directory(release['target']) + service.lookup_directory(release["target"]) vault_cooking = { - 'directory_context': True, - 'directory_id': release['target'], - 'revision_context': False, - 'revision_id': None + "directory_context": True, + "directory_id": release["target"], + "revision_context": False, + "revision_id": None, } except Exception as exc: sentry_sdk.capture_exception(exc) - elif release['target_type'] == 'content': - target_link = gen_content_link(release['target'], - snapshot_context=snapshot_context, - link_text=None, link_attrs=None) - elif release['target_type'] == 'release': - target_link = gen_release_link(release['target'], - snapshot_context=snapshot_context, - link_text=None, link_attrs=None) + elif release["target_type"] == "content": + target_link = gen_content_link( + release["target"], + snapshot_context=snapshot_context, + link_text=None, + link_attrs=None, + ) + elif release["target_type"] == "release": + target_link = gen_release_link( + release["target"], + snapshot_context=snapshot_context, + link_text=None, + link_attrs=None, + ) rev_directory_url = None if rev_directory is not None: if origin_info: rev_directory_url = reverse( - 'browse-origin-directory', - url_args={'origin_url': origin_info['url']}, - query_params={'release': release['name']}) + "browse-origin-directory", + url_args={"origin_url": origin_info["url"]}, + query_params={"release": release["name"]}, + ) elif snapshot_id: rev_directory_url = reverse( - 'browse-snapshot-directory', - url_args={'snapshot_id': snapshot_id}, - query_params={'release': release['name']}) + "browse-snapshot-directory", + url_args={"snapshot_id": snapshot_id}, + query_params={"release": release["name"]}, + ) else: rev_directory_url = reverse( - 'browse-directory', - url_args={'sha1_git': rev_directory}) + "browse-directory", url_args={"sha1_git": rev_directory} + ) directory_link = None if rev_directory_url is not None: directory_link = gen_link(rev_directory_url, rev_directory) - release['directory_link'] = directory_link - release['target_link'] = target_link + release["directory_link"] = directory_link + release["target_link"] = target_link if snapshot_context: - release_data['snapshot'] = snapshot_context['snapshot_id'] + release_data["snapshot"] = snapshot_context["snapshot_id"] if origin_info: - release_data['context-independent release'] = \ - gen_release_link(release['id']) - release_data['origin url'] = gen_link(origin_info['url'], - origin_info['url']) - browse_snapshot_link = \ - gen_snapshot_link(snapshot_context['snapshot_id']) - release_data['context-independent snapshot'] = browse_snapshot_link + release_data["context-independent release"] = gen_release_link(release["id"]) + release_data["origin url"] = gen_link(origin_info["url"], origin_info["url"]) + browse_snapshot_link = gen_snapshot_link(snapshot_context["snapshot_id"]) + release_data["context-independent snapshot"] = browse_snapshot_link - swh_objects = [{'type': 'release', - 'id': sha1_git}] + swh_objects = [{"type": "release", "id": sha1_git}] if snapshot_context: - snapshot_id = snapshot_context['snapshot_id'] + snapshot_id = snapshot_context["snapshot_id"] if snapshot_id: - swh_objects.append({'type': 'snapshot', - 'id': snapshot_id}) + swh_objects.append({"type": "snapshot", "id": snapshot_id}) swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context) - note_header = 'None' + note_header = "None" if len(release_note_lines) > 0: note_header = release_note_lines[0] - release['note_header'] = note_header - release['note_body'] = '\n'.join(release_note_lines[1:]) + release["note_header"] = note_header + release["note_body"] = "\n".join(release_note_lines[1:]) - heading = 'Release - %s' % release['name'] + heading = "Release - %s" % release["name"] if snapshot_context: - context_found = 'snapshot: %s' % snapshot_context['snapshot_id'] + context_found = "snapshot: %s" % snapshot_context["snapshot_id"] if origin_info: - context_found = 'origin: %s' % origin_info['url'] - heading += ' - %s' % context_found - - return render(request, 'browse/release.html', - {'heading': heading, - 'swh_object_id': swh_ids[0]['swh_id'], - 'swh_object_name': 'Release', - 'swh_object_metadata': release_data, - 'release': release, - 'snapshot_context': snapshot_context, - 'show_actions_menu': True, - 'breadcrumbs': None, - 'vault_cooking': vault_cooking, - 'top_right_link': None, - 'swh_ids': swh_ids}) + context_found = "origin: %s" % origin_info["url"] + heading += " - %s" % context_found + + return render( + request, + "browse/release.html", + { + "heading": heading, + "swh_object_id": swh_ids[0]["swh_id"], + "swh_object_name": "Release", + "swh_object_metadata": release_data, + "release": release, + "snapshot_context": snapshot_context, + "show_actions_menu": True, + "breadcrumbs": None, + "vault_cooking": vault_cooking, + "top_right_link": None, + "swh_ids": swh_ids, + }, + ) diff --git a/swh/web/browse/views/revision.py b/swh/web/browse/views/revision.py --- a/swh/web/browse/views/revision.py +++ b/swh/web/browse/views/revision.py @@ -16,33 +16,48 @@ from swh.model.identifiers import persistent_identifier from swh.web.common import service from swh.web.common.utils import ( - reverse, format_utc_iso_date, gen_path_info, swh_object_icons + reverse, + format_utc_iso_date, + gen_path_info, + swh_object_icons, ) from swh.web.common.exc import NotFoundExc, handle_view_exception from swh.web.browse.browseurls import browse_route from swh.web.browse.utils import ( - gen_link, gen_revision_link, gen_revision_url, - get_snapshot_context, get_revision_log_url, get_directory_entries, - gen_directory_link, request_content, prepare_content_for_display, - content_display_max_size, gen_snapshot_link, get_readme_to_display, - get_swh_persistent_ids, format_log_entries, gen_person_mail_link + gen_link, + gen_revision_link, + gen_revision_url, + get_snapshot_context, + get_revision_log_url, + get_directory_entries, + gen_directory_link, + request_content, + prepare_content_for_display, + content_display_max_size, + gen_snapshot_link, + get_readme_to_display, + get_swh_persistent_ids, + format_log_entries, + gen_person_mail_link, ) def _gen_content_url(revision, query_string, path, snapshot_context): if snapshot_context: - url_args = snapshot_context['url_args'] - url_args['path'] = path - query_params = snapshot_context['query_params'] - query_params['revision'] = revision['id'] - content_url = reverse('browse-origin-content', - url_args=url_args, - query_params=query_params) + url_args = snapshot_context["url_args"] + url_args["path"] = path + query_params = snapshot_context["query_params"] + query_params["revision"] = revision["id"] + content_url = reverse( + "browse-origin-content", url_args=url_args, query_params=query_params + ) else: - content_path = '%s/%s' % (revision['directory'], path) - content_url = reverse('browse-content', - url_args={'query_string': query_string}, - query_params={'path': content_path}) + content_path = "%s/%s" % (revision["directory"], path) + content_url = reverse( + "browse-content", + url_args={"query_string": query_string}, + query_params={"path": content_path}, + ) return content_url @@ -77,67 +92,72 @@ changes_msg = [] for i, change in enumerate(changes): hasher = hashlib.sha1() - from_query_string = '' - to_query_string = '' - diff_id = 'diff-' - if change['from']: - from_query_string = 'sha1_git:' + change['from']['target'] - diff_id += change['from']['target'] + '-' + change['from_path'] - diff_id += '-' - if change['to']: - to_query_string = 'sha1_git:' + change['to']['target'] - diff_id += change['to']['target'] + change['to_path'] - change['path'] = change['to_path'] or change['from_path'] - url_args = {'from_query_string': from_query_string, - 'to_query_string': to_query_string} - query_params = {'path': change['path']} - change['diff_url'] = reverse('diff-contents', - url_args=url_args, - query_params=query_params) - - hasher.update(diff_id.encode('utf-8')) + from_query_string = "" + to_query_string = "" + diff_id = "diff-" + if change["from"]: + from_query_string = "sha1_git:" + change["from"]["target"] + diff_id += change["from"]["target"] + "-" + change["from_path"] + diff_id += "-" + if change["to"]: + to_query_string = "sha1_git:" + change["to"]["target"] + diff_id += change["to"]["target"] + change["to_path"] + change["path"] = change["to_path"] or change["from_path"] + url_args = { + "from_query_string": from_query_string, + "to_query_string": to_query_string, + } + query_params = {"path": change["path"]} + change["diff_url"] = reverse( + "diff-contents", url_args=url_args, query_params=query_params + ) + + hasher.update(diff_id.encode("utf-8")) diff_id = hasher.hexdigest() - change['id'] = diff_id - panel_diff_link = '#panel_' + diff_id - - if change['type'] == 'modify': - change['content_url'] = \ - _gen_content_url(revision, to_query_string, - change['to_path'], snapshot_context) - changes_msg.append('modified: %s' % - _gen_diff_link(i, panel_diff_link, - change['to_path'])) - elif change['type'] == 'insert': - change['content_url'] = \ - _gen_content_url(revision, to_query_string, - change['to_path'], snapshot_context) - changes_msg.append('new file: %s' % - _gen_diff_link(i, panel_diff_link, - change['to_path'])) - elif change['type'] == 'delete': - parent = service.lookup_revision(revision['parents'][0]) - change['content_url'] = \ - _gen_content_url(parent, - from_query_string, - change['from_path'], snapshot_context) - changes_msg.append('deleted: %s' % - _gen_diff_link(i, panel_diff_link, - change['from_path'])) - elif change['type'] == 'rename': - change['content_url'] = \ - _gen_content_url(revision, to_query_string, - change['to_path'], snapshot_context) - link_text = change['from_path'] + ' → ' + change['to_path'] - changes_msg.append('renamed: %s' % - _gen_diff_link(i, panel_diff_link, link_text)) + change["id"] = diff_id + panel_diff_link = "#panel_" + diff_id + + if change["type"] == "modify": + change["content_url"] = _gen_content_url( + revision, to_query_string, change["to_path"], snapshot_context + ) + changes_msg.append( + "modified: %s" % _gen_diff_link(i, panel_diff_link, change["to_path"]) + ) + elif change["type"] == "insert": + change["content_url"] = _gen_content_url( + revision, to_query_string, change["to_path"], snapshot_context + ) + changes_msg.append( + "new file: %s" % _gen_diff_link(i, panel_diff_link, change["to_path"]) + ) + elif change["type"] == "delete": + parent = service.lookup_revision(revision["parents"][0]) + change["content_url"] = _gen_content_url( + parent, from_query_string, change["from_path"], snapshot_context + ) + changes_msg.append( + "deleted: %s" + % _gen_diff_link(i, panel_diff_link, change["from_path"]) + ) + elif change["type"] == "rename": + change["content_url"] = _gen_content_url( + revision, to_query_string, change["to_path"], snapshot_context + ) + link_text = change["from_path"] + " → " + change["to_path"] + changes_msg.append( + "renamed: %s" % _gen_diff_link(i, panel_diff_link, link_text) + ) if not changes: - changes_msg.append('No changes') - return mark_safe('\n'.join(changes_msg)) + changes_msg.append("No changes") + return mark_safe("\n".join(changes_msg)) -@browse_route(r'revision/(?P[0-9a-f]+)/diff/', - view_name='diff-revision', - checksum_args=['sha1_git']) +@browse_route( + r"revision/(?P[0-9a-f]+)/diff/", + view_name="diff-revision", + checksum_args=["sha1_git"], +) def _revision_diff(request, sha1_git): """ Browse internal endpoint to compute revision diff @@ -145,36 +165,38 @@ try: revision = service.lookup_revision(sha1_git) snapshot_context = None - origin_url = request.GET.get('origin_url', None) + origin_url = request.GET.get("origin_url", None) if not origin_url: - origin_url = request.GET.get('origin', None) - timestamp = request.GET.get('timestamp', None) - visit_id = request.GET.get('visit_id', None) + origin_url = request.GET.get("origin", None) + timestamp = request.GET.get("timestamp", None) + visit_id = request.GET.get("visit_id", None) if origin_url: snapshot_context = get_snapshot_context( - origin_url=origin_url, timestamp=timestamp, visit_id=visit_id) + origin_url=origin_url, timestamp=timestamp, visit_id=visit_id + ) except Exception as exc: return handle_view_exception(request, exc) changes = service.diff_revision(sha1_git) - changes_msg = _gen_revision_changes_list(revision, changes, - snapshot_context) + changes_msg = _gen_revision_changes_list(revision, changes, snapshot_context) diff_data = { - 'total_nb_changes': len(changes), - 'changes': changes[:_max_displayed_file_diffs], - 'changes_msg': changes_msg + "total_nb_changes": len(changes), + "changes": changes[:_max_displayed_file_diffs], + "changes_msg": changes_msg, } - diff_data_json = json.dumps(diff_data, separators=(',', ': ')) - return HttpResponse(diff_data_json, content_type='application/json') + diff_data_json = json.dumps(diff_data, separators=(",", ": ")) + return HttpResponse(diff_data_json, content_type="application/json") NB_LOG_ENTRIES = 100 -@browse_route(r'revision/(?P[0-9a-f]+)/log/', - view_name='browse-revision-log', - checksum_args=['sha1_git']) +@browse_route( + r"revision/(?P[0-9a-f]+)/log/", + view_name="browse-revision-log", + checksum_args=["sha1_git"], +) def revision_log_browse(request, sha1_git): """ Django view that produces an HTML display of the history @@ -183,79 +205,96 @@ The url that points to it is :http:get:`/browse/revision/(sha1_git)/log/` """ try: - per_page = int(request.GET.get('per_page', NB_LOG_ENTRIES)) - offset = int(request.GET.get('offset', 0)) - revs_ordering = request.GET.get('revs_ordering', 'committer_date') - session_key = 'rev_%s_log_ordering_%s' % (sha1_git, revs_ordering) + per_page = int(request.GET.get("per_page", NB_LOG_ENTRIES)) + offset = int(request.GET.get("offset", 0)) + revs_ordering = request.GET.get("revs_ordering", "committer_date") + session_key = "rev_%s_log_ordering_%s" % (sha1_git, revs_ordering) rev_log_session = request.session.get(session_key, None) rev_log = [] revs_walker_state = None if rev_log_session: - rev_log = rev_log_session['rev_log'] - revs_walker_state = rev_log_session['revs_walker_state'] - - if len(rev_log) < offset+per_page: - revs_walker = \ - service.get_revisions_walker(revs_ordering, sha1_git, - max_revs=offset+per_page+1, - state=revs_walker_state) - - rev_log += [rev['id'] for rev in revs_walker] + rev_log = rev_log_session["rev_log"] + revs_walker_state = rev_log_session["revs_walker_state"] + + if len(rev_log) < offset + per_page: + revs_walker = service.get_revisions_walker( + revs_ordering, + sha1_git, + max_revs=offset + per_page + 1, + state=revs_walker_state, + ) + + rev_log += [rev["id"] for rev in revs_walker] revs_walker_state = revs_walker.export_state() - revs = rev_log[offset:offset+per_page] + revs = rev_log[offset : offset + per_page] revision_log = service.lookup_revision_multiple(revs) request.session[session_key] = { - 'rev_log': rev_log, - 'revs_walker_state': revs_walker_state + "rev_log": rev_log, + "revs_walker_state": revs_walker_state, } except Exception as exc: return handle_view_exception(request, exc) - revs_ordering = request.GET.get('revs_ordering', '') + revs_ordering = request.GET.get("revs_ordering", "") prev_log_url = None if len(rev_log) > offset + per_page: - prev_log_url = reverse('browse-revision-log', - url_args={'sha1_git': sha1_git}, - query_params={'per_page': per_page, - 'offset': offset + per_page, - 'revs_ordering': revs_ordering}) + prev_log_url = reverse( + "browse-revision-log", + url_args={"sha1_git": sha1_git}, + query_params={ + "per_page": per_page, + "offset": offset + per_page, + "revs_ordering": revs_ordering, + }, + ) next_log_url = None if offset != 0: - next_log_url = reverse('browse-revision-log', - url_args={'sha1_git': sha1_git}, - query_params={'per_page': per_page, - 'offset': offset - per_page, - 'revs_ordering': revs_ordering}) + next_log_url = reverse( + "browse-revision-log", + url_args={"sha1_git": sha1_git}, + query_params={ + "per_page": per_page, + "offset": offset - per_page, + "revs_ordering": revs_ordering, + }, + ) revision_log_data = format_log_entries(revision_log, per_page) - swh_rev_id = persistent_identifier('revision', sha1_git) - - return render(request, 'browse/revision-log.html', - {'heading': 'Revision history', - 'swh_object_id': swh_rev_id, - 'swh_object_name': 'Revisions history', - 'swh_object_metadata': None, - 'revision_log': revision_log_data, - 'revs_ordering': revs_ordering, - 'next_log_url': next_log_url, - 'prev_log_url': prev_log_url, - 'breadcrumbs': None, - 'top_right_link': None, - 'snapshot_context': None, - 'vault_cooking': None, - 'show_actions_menu': True, - 'swh_ids': None}) - - -@browse_route(r'revision/(?P[0-9a-f]+)/', - r'revision/(?P[0-9a-f]+)/(?P.+)/', - view_name='browse-revision', - checksum_args=['sha1_git']) + swh_rev_id = persistent_identifier("revision", sha1_git) + + return render( + request, + "browse/revision-log.html", + { + "heading": "Revision history", + "swh_object_id": swh_rev_id, + "swh_object_name": "Revisions history", + "swh_object_metadata": None, + "revision_log": revision_log_data, + "revs_ordering": revs_ordering, + "next_log_url": next_log_url, + "prev_log_url": prev_log_url, + "breadcrumbs": None, + "top_right_link": None, + "snapshot_context": None, + "vault_cooking": None, + "show_actions_menu": True, + "swh_ids": None, + }, + ) + + +@browse_route( + r"revision/(?P[0-9a-f]+)/", + r"revision/(?P[0-9a-f]+)/(?P.+)/", + view_name="browse-revision", + checksum_args=["sha1_git"], +) def revision_browse(request, sha1_git, extra_path=None): """ Django view that produces an HTML display of a revision @@ -267,136 +306,149 @@ revision = service.lookup_revision(sha1_git) origin_info = None snapshot_context = None - origin_url = request.GET.get('origin_url', None) + origin_url = request.GET.get("origin_url", None) if not origin_url: - origin_url = request.GET.get('origin', None) - timestamp = request.GET.get('timestamp', None) - visit_id = request.GET.get('visit_id', None) - snapshot_id = request.GET.get('snapshot_id', None) - path = request.GET.get('path', None) + origin_url = request.GET.get("origin", None) + timestamp = request.GET.get("timestamp", None) + visit_id = request.GET.get("visit_id", None) + snapshot_id = request.GET.get("snapshot_id", None) + path = request.GET.get("path", None) dir_id = None dirs, files = None, None content_data = None if origin_url: try: snapshot_context = get_snapshot_context( - origin_url=origin_url, timestamp=timestamp, - visit_id=visit_id) + origin_url=origin_url, timestamp=timestamp, visit_id=visit_id + ) except NotFoundExc: - raw_rev_url = reverse('browse-revision', - url_args={'sha1_git': sha1_git}) - error_message = \ - ('The Software Heritage archive has a revision ' - 'with the hash you provided but the origin ' - 'mentioned in your request appears broken: %s. ' - 'Please check the URL and try again.\n\n' - 'Nevertheless, you can still browse the revision ' - 'without origin information: %s' - % (gen_link(origin_url), gen_link(raw_rev_url))) + raw_rev_url = reverse( + "browse-revision", url_args={"sha1_git": sha1_git} + ) + error_message = ( + "The Software Heritage archive has a revision " + "with the hash you provided but the origin " + "mentioned in your request appears broken: %s. " + "Please check the URL and try again.\n\n" + "Nevertheless, you can still browse the revision " + "without origin information: %s" + % (gen_link(origin_url), gen_link(raw_rev_url)) + ) raise NotFoundExc(error_message) - origin_info = snapshot_context['origin_info'] - snapshot_id = snapshot_context['snapshot_id'] + origin_info = snapshot_context["origin_info"] + snapshot_id = snapshot_context["snapshot_id"] elif snapshot_id: snapshot_context = get_snapshot_context(snapshot_id) if path: - file_info = \ - service.lookup_directory_with_path(revision['directory'], path) - if file_info['type'] == 'dir': - dir_id = file_info['target'] + file_info = service.lookup_directory_with_path(revision["directory"], path) + if file_info["type"] == "dir": + dir_id = file_info["target"] else: - query_string = 'sha1_git:' + file_info['target'] - content_data = request_content(query_string, - raise_if_unavailable=False) + query_string = "sha1_git:" + file_info["target"] + content_data = request_content(query_string, raise_if_unavailable=False) else: - dir_id = revision['directory'] + dir_id = revision["directory"] if dir_id: - path = '' if path is None else (path + '/') + path = "" if path is None else (path + "/") dirs, files = get_directory_entries(dir_id) except Exception as exc: return handle_view_exception(request, exc) revision_data = {} - revision_data['author'] = 'None' - if revision['author']: - author_link = gen_person_mail_link(revision['author']) - revision_data['author'] = author_link - revision_data['committer'] = 'None' - if revision['committer']: - committer_link = gen_person_mail_link(revision['committer']) - revision_data['committer'] = committer_link - revision_data['committer date'] = \ - format_utc_iso_date(revision['committer_date']) - revision_data['date'] = format_utc_iso_date(revision['date']) - revision_data['directory'] = revision['directory'] + revision_data["author"] = "None" + if revision["author"]: + author_link = gen_person_mail_link(revision["author"]) + revision_data["author"] = author_link + revision_data["committer"] = "None" + if revision["committer"]: + committer_link = gen_person_mail_link(revision["committer"]) + revision_data["committer"] = committer_link + revision_data["committer date"] = format_utc_iso_date(revision["committer_date"]) + revision_data["date"] = format_utc_iso_date(revision["date"]) + revision_data["directory"] = revision["directory"] if snapshot_context: - revision_data['snapshot'] = snapshot_id - browse_snapshot_link = \ - gen_snapshot_link(snapshot_id) - revision_data['context-independent snapshot'] = browse_snapshot_link - - revision_data['context-independent directory'] = \ - gen_directory_link(revision['directory']) - revision_data['revision'] = sha1_git - revision_data['merge'] = revision['merge'] - revision_data['metadata'] = escape(json.dumps(revision['metadata'], - sort_keys=True, - indent=4, separators=(',', ': '))) + revision_data["snapshot"] = snapshot_id + browse_snapshot_link = gen_snapshot_link(snapshot_id) + revision_data["context-independent snapshot"] = browse_snapshot_link + + revision_data["context-independent directory"] = gen_directory_link( + revision["directory"] + ) + revision_data["revision"] = sha1_git + revision_data["merge"] = revision["merge"] + revision_data["metadata"] = escape( + json.dumps( + revision["metadata"], sort_keys=True, indent=4, separators=(",", ": ") + ) + ) if origin_info: - revision_data['origin url'] = gen_link(origin_info['url'], - origin_info['url']) - revision_data['context-independent revision'] = \ - gen_revision_link(sha1_git) + revision_data["origin url"] = gen_link(origin_info["url"], origin_info["url"]) + revision_data["context-independent revision"] = gen_revision_link(sha1_git) - parents = '' - for p in revision['parents']: - parent_link = gen_revision_link(p, link_text=None, link_attrs=None, - snapshot_context=snapshot_context) - parents += parent_link + '
' + parents = "" + for p in revision["parents"]: + parent_link = gen_revision_link( + p, link_text=None, link_attrs=None, snapshot_context=snapshot_context + ) + parents += parent_link + "
" - revision_data['parents'] = mark_safe(parents) - revision_data['synthetic'] = revision['synthetic'] - revision_data['type'] = revision['type'] + revision_data["parents"] = mark_safe(parents) + revision_data["synthetic"] = revision["synthetic"] + revision_data["type"] = revision["type"] - message_lines = ['None'] - if revision['message']: - message_lines = revision['message'].split('\n') + message_lines = ["None"] + if revision["message"]: + message_lines = revision["message"].split("\n") parents = [] - for p in revision['parents']: + for p in revision["parents"]: parent_url = gen_revision_url(p, snapshot_context) - parents.append({'id': p, 'url': parent_url}) + parents.append({"id": p, "url": parent_url}) path_info = gen_path_info(path) - query_params = {'snapshot_id': snapshot_id, - 'origin': origin_url, - 'timestamp': timestamp, - 'visit_id': visit_id} + query_params = { + "snapshot_id": snapshot_id, + "origin": origin_url, + "timestamp": timestamp, + "visit_id": visit_id, + } breadcrumbs = [] - breadcrumbs.append({'name': revision['directory'][:7], - 'url': reverse('browse-revision', - url_args={'sha1_git': sha1_git}, - query_params=query_params)}) + breadcrumbs.append( + { + "name": revision["directory"][:7], + "url": reverse( + "browse-revision", + url_args={"sha1_git": sha1_git}, + query_params=query_params, + ), + } + ) for pi in path_info: - query_params['path'] = pi['path'] - breadcrumbs.append({'name': pi['name'], - 'url': reverse('browse-revision', - url_args={'sha1_git': sha1_git}, - query_params=query_params)}) + query_params["path"] = pi["path"] + breadcrumbs.append( + { + "name": pi["name"], + "url": reverse( + "browse-revision", + url_args={"sha1_git": sha1_git}, + query_params=query_params, + ), + } + ) vault_cooking = { - 'directory_context': False, - 'directory_id': None, - 'revision_context': True, - 'revision_id': sha1_git + "directory_context": False, + "directory_id": None, + "revision_context": True, + "revision_id": sha1_git, } - swh_objects = [{'type': 'revision', - 'id': sha1_git}] + swh_objects = [{"type": "revision", "id": sha1_git}] content = None content_size = None @@ -407,119 +459,135 @@ readme_html = None readmes = {} error_code = 200 - error_message = '' - error_description = '' + error_message = "" + error_description = "" if content_data: - breadcrumbs[-1]['url'] = None - content_size = content_data['length'] - mimetype = content_data['mimetype'] - if content_data['raw_data']: + breadcrumbs[-1]["url"] = None + content_size = content_data["length"] + mimetype = content_data["mimetype"] + if content_data["raw_data"]: content_display_data = prepare_content_for_display( - content_data['raw_data'], content_data['mimetype'], path) - content = content_display_data['content_data'] - language = content_display_data['language'] - mimetype = content_display_data['mimetype'] + content_data["raw_data"], content_data["mimetype"], path + ) + content = content_display_data["content_data"] + language = content_display_data["language"] + mimetype = content_display_data["mimetype"] query_params = {} if path: - filename = path_info[-1]['name'] - query_params['filename'] = path_info[-1]['name'] - revision_data['filename'] = filename + filename = path_info[-1]["name"] + query_params["filename"] = path_info[-1]["name"] + revision_data["filename"] = filename top_right_link = { - 'url': reverse('browse-content-raw', - url_args={'query_string': query_string}, - query_params=query_params), - 'icon': swh_object_icons['content'], - 'text': 'Raw File' + "url": reverse( + "browse-content-raw", + url_args={"query_string": query_string}, + query_params=query_params, + ), + "icon": swh_object_icons["content"], + "text": "Raw File", } - swh_objects.append({'type': 'content', - 'id': file_info['target']}) + swh_objects.append({"type": "content", "id": file_info["target"]}) - error_code = content_data['error_code'] - error_message = content_data['error_message'] - error_description = content_data['error_description'] + error_code = content_data["error_code"] + error_message = content_data["error_message"] + error_description = content_data["error_description"] else: for d in dirs: - if d['type'] == 'rev': - d['url'] = reverse('browse-revision', - url_args={'sha1_git': d['target']}) + if d["type"] == "rev": + d["url"] = reverse( + "browse-revision", url_args={"sha1_git": d["target"]} + ) else: - query_params['path'] = path + d['name'] - d['url'] = reverse('browse-revision', - url_args={'sha1_git': sha1_git}, - query_params=query_params) + query_params["path"] = path + d["name"] + d["url"] = reverse( + "browse-revision", + url_args={"sha1_git": sha1_git}, + query_params=query_params, + ) for f in files: - query_params['path'] = path + f['name'] - f['url'] = reverse('browse-revision', - url_args={'sha1_git': sha1_git}, - query_params=query_params) - if f['length'] is not None: - f['length'] = filesizeformat(f['length']) - if f['name'].lower().startswith('readme'): - readmes[f['name']] = f['checksums']['sha1'] + query_params["path"] = path + f["name"] + f["url"] = reverse( + "browse-revision", + url_args={"sha1_git": sha1_git}, + query_params=query_params, + ) + if f["length"] is not None: + f["length"] = filesizeformat(f["length"]) + if f["name"].lower().startswith("readme"): + readmes[f["name"]] = f["checksums"]["sha1"] readme_name, readme_url, readme_html = get_readme_to_display(readmes) top_right_link = { - 'url': get_revision_log_url(sha1_git, snapshot_context), - 'icon': swh_object_icons['revisions history'], - 'text': 'History' + "url": get_revision_log_url(sha1_git, snapshot_context), + "icon": swh_object_icons["revisions history"], + "text": "History", } - vault_cooking['directory_context'] = True - vault_cooking['directory_id'] = dir_id + vault_cooking["directory_context"] = True + vault_cooking["directory_id"] = dir_id - swh_objects.append({'type': 'directory', - 'id': dir_id}) + swh_objects.append({"type": "directory", "id": dir_id}) - diff_revision_url = reverse('diff-revision', - url_args={'sha1_git': sha1_git}, - query_params={'origin': origin_url, - 'timestamp': timestamp, - 'visit_id': visit_id}) + diff_revision_url = reverse( + "diff-revision", + url_args={"sha1_git": sha1_git}, + query_params={ + "origin": origin_url, + "timestamp": timestamp, + "visit_id": visit_id, + }, + ) if snapshot_id: - swh_objects.append({'type': 'snapshot', - 'id': snapshot_id}) + swh_objects.append({"type": "snapshot", "id": snapshot_id}) swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context) - heading = 'Revision - %s - %s' %\ - (sha1_git[:7], textwrap.shorten(message_lines[0], width=70)) + heading = "Revision - %s - %s" % ( + sha1_git[:7], + textwrap.shorten(message_lines[0], width=70), + ) if snapshot_context: - context_found = 'snapshot: %s' % snapshot_context['snapshot_id'] + context_found = "snapshot: %s" % snapshot_context["snapshot_id"] if origin_info: - context_found = 'origin: %s' % origin_info['url'] - heading += ' - %s' % context_found - - return render(request, 'browse/revision.html', - {'heading': heading, - 'swh_object_id': swh_ids[0]['swh_id'], - 'swh_object_name': 'Revision', - 'swh_object_metadata': revision_data, - 'message_header': message_lines[0], - 'message_body': '\n'.join(message_lines[1:]), - 'parents': parents, - 'snapshot_context': snapshot_context, - 'dirs': dirs, - 'files': files, - 'content': content, - 'content_size': content_size, - 'max_content_size': content_display_max_size, - 'mimetype': mimetype, - 'language': language, - 'readme_name': readme_name, - 'readme_url': readme_url, - 'readme_html': readme_html, - 'breadcrumbs': breadcrumbs, - 'top_right_link': top_right_link, - 'vault_cooking': vault_cooking, - 'diff_revision_url': diff_revision_url, - 'show_actions_menu': True, - 'swh_ids': swh_ids, - 'error_code': error_code, - 'error_message': error_message, - 'error_description': error_description}, - status=error_code) + context_found = "origin: %s" % origin_info["url"] + heading += " - %s" % context_found + + return render( + request, + "browse/revision.html", + { + "heading": heading, + "swh_object_id": swh_ids[0]["swh_id"], + "swh_object_name": "Revision", + "swh_object_metadata": revision_data, + "message_header": message_lines[0], + "message_body": "\n".join(message_lines[1:]), + "parents": parents, + "snapshot_context": snapshot_context, + "dirs": dirs, + "files": files, + "content": content, + "content_size": content_size, + "max_content_size": content_display_max_size, + "mimetype": mimetype, + "language": language, + "readme_name": readme_name, + "readme_url": readme_url, + "readme_html": readme_html, + "breadcrumbs": breadcrumbs, + "top_right_link": top_right_link, + "vault_cooking": vault_cooking, + "diff_revision_url": diff_revision_url, + "show_actions_menu": True, + "swh_ids": swh_ids, + "error_code": error_code, + "error_message": error_message, + "error_description": error_description, + }, + status=error_code, + ) diff --git a/swh/web/browse/views/snapshot.py b/swh/web/browse/views/snapshot.py --- a/swh/web/browse/views/snapshot.py +++ b/swh/web/browse/views/snapshot.py @@ -10,30 +10,38 @@ from swh.web.common.utils import reverse from .utils.snapshot_context import ( - browse_snapshot_directory, browse_snapshot_content, - browse_snapshot_log, browse_snapshot_branches, - browse_snapshot_releases + browse_snapshot_directory, + browse_snapshot_content, + browse_snapshot_log, + browse_snapshot_branches, + browse_snapshot_releases, ) -@browse_route(r'snapshot/(?P[0-9a-f]+)/', - view_name='browse-snapshot', - checksum_args=['snapshot_id']) +@browse_route( + r"snapshot/(?P[0-9a-f]+)/", + view_name="browse-snapshot", + checksum_args=["snapshot_id"], +) def snapshot_browse(request, snapshot_id): """Django view for browsing the content of a snapshot. The url that points to it is :http:get:`/browse/snapshot/(snapshot_id)/` """ - browse_snapshot_url = reverse('browse-snapshot-directory', - url_args={'snapshot_id': snapshot_id}, - query_params=request.GET) + browse_snapshot_url = reverse( + "browse-snapshot-directory", + url_args={"snapshot_id": snapshot_id}, + query_params=request.GET, + ) return redirect(browse_snapshot_url) -@browse_route(r'snapshot/(?P[0-9a-f]+)/directory/', - r'snapshot/(?P[0-9a-f]+)/directory/(?P.+)/', - view_name='browse-snapshot-directory', - checksum_args=['snapshot_id']) +@browse_route( + r"snapshot/(?P[0-9a-f]+)/directory/", + r"snapshot/(?P[0-9a-f]+)/directory/(?P.+)/", + view_name="browse-snapshot-directory", + checksum_args=["snapshot_id"], +) def snapshot_directory_browse(request, snapshot_id, path=None): """Django view for browsing the content of a directory collected in a snapshot. @@ -41,16 +49,19 @@ The url that points to it is :http:get:`/browse/snapshot/(snapshot_id)/directory/[(path)/]` """ - origin_url = request.GET.get('origin_url', None) + origin_url = request.GET.get("origin_url", None) if not origin_url: - origin_url = request.GET.get('origin', None) - return browse_snapshot_directory(request, snapshot_id=snapshot_id, - path=path, origin_url=origin_url) + origin_url = request.GET.get("origin", None) + return browse_snapshot_directory( + request, snapshot_id=snapshot_id, path=path, origin_url=origin_url + ) -@browse_route(r'snapshot/(?P[0-9a-f]+)/content/(?P.+)/', - view_name='browse-snapshot-content', - checksum_args=['snapshot_id']) +@browse_route( + r"snapshot/(?P[0-9a-f]+)/content/(?P.+)/", + view_name="browse-snapshot-content", + checksum_args=["snapshot_id"], +) def snapshot_content_browse(request, snapshot_id, path): """Django view that produces an HTML display of a content collected in a snapshot. @@ -58,14 +69,17 @@ The url that points to it is :http:get:`/browse/snapshot/(snapshot_id)/content/(path)/` """ - language = request.GET.get('language', None) - return browse_snapshot_content(request, snapshot_id=snapshot_id, path=path, - selected_language=language) + language = request.GET.get("language", None) + return browse_snapshot_content( + request, snapshot_id=snapshot_id, path=path, selected_language=language + ) -@browse_route(r'snapshot/(?P[0-9a-f]+)/log/', - view_name='browse-snapshot-log', - checksum_args=['snapshot_id']) +@browse_route( + r"snapshot/(?P[0-9a-f]+)/log/", + view_name="browse-snapshot-log", + checksum_args=["snapshot_id"], +) def snapshot_log_browse(request, snapshot_id): """Django view that produces an HTML display of revisions history (aka the commit log) collected in a snapshot. @@ -76,9 +90,11 @@ return browse_snapshot_log(request, snapshot_id=snapshot_id) -@browse_route(r'snapshot/(?P[0-9a-f]+)/branches/', - view_name='browse-snapshot-branches', - checksum_args=['snapshot_id']) +@browse_route( + r"snapshot/(?P[0-9a-f]+)/branches/", + view_name="browse-snapshot-branches", + checksum_args=["snapshot_id"], +) def snapshot_branches_browse(request, snapshot_id): """Django view that produces an HTML display of the list of releases collected in a snapshot. @@ -89,9 +105,11 @@ return browse_snapshot_branches(request, snapshot_id=snapshot_id) -@browse_route(r'snapshot/(?P[0-9a-f]+)/releases/', - view_name='browse-snapshot-releases', - checksum_args=['snapshot_id']) +@browse_route( + r"snapshot/(?P[0-9a-f]+)/releases/", + view_name="browse-snapshot-releases", + checksum_args=["snapshot_id"], +) def snapshot_releases_browse(request, snapshot_id): """Django view that produces an HTML display of the list of releases collected in a snapshot. diff --git a/swh/web/browse/views/utils/snapshot_context.py b/swh/web/browse/views/utils/snapshot_context.py --- a/swh/web/browse/views/utils/snapshot_context.py +++ b/swh/web/browse/views/utils/snapshot_context.py @@ -16,23 +16,33 @@ from swh.model.identifiers import snapshot_identifier from swh.web.browse.utils import ( - get_snapshot_context, get_directory_entries, gen_directory_link, - gen_revision_link, request_content, gen_content_link, - prepare_content_for_display, content_display_max_size, - format_log_entries, gen_revision_log_link, gen_release_link, - get_readme_to_display, get_swh_persistent_ids, - gen_snapshot_link, process_snapshot_branches + get_snapshot_context, + get_directory_entries, + gen_directory_link, + gen_revision_link, + request_content, + gen_content_link, + prepare_content_for_display, + content_display_max_size, + format_log_entries, + gen_revision_log_link, + gen_release_link, + get_readme_to_display, + get_swh_persistent_ids, + gen_snapshot_link, + process_snapshot_branches, ) from swh.web.common import service, highlightjs -from swh.web.common.exc import ( - handle_view_exception, NotFoundExc -) +from swh.web.common.exc import handle_view_exception, NotFoundExc from swh.web.common.utils import ( - reverse, gen_path_info, format_utc_iso_date, swh_object_icons + reverse, + gen_path_info, + format_utc_iso_date, + swh_object_icons, ) -_empty_snapshot_id = snapshot_identifier({'branches': {}}) +_empty_snapshot_id = snapshot_identifier({"branches": {}}) def _get_branch(branches, branch_name, snapshot_id): @@ -42,24 +52,25 @@ (e.g those with svn type) does not have it. In that latter case, check if there is a master branch instead and returns it. """ - filtered_branches = [b for b in branches if b['name'] == branch_name] + filtered_branches = [b for b in branches if b["name"] == branch_name] if filtered_branches: return filtered_branches[0] - elif branch_name == 'HEAD': - filtered_branches = [b for b in branches - if b['name'].endswith('master')] + elif branch_name == "HEAD": + filtered_branches = [b for b in branches if b["name"].endswith("master")] if filtered_branches: return filtered_branches[0] elif branches: return branches[0] else: # case where a large branches list has been truncated - snp = service.lookup_snapshot(snapshot_id, - branches_from=branch_name, - branches_count=1, - target_types=['revision', 'alias']) + snp = service.lookup_snapshot( + snapshot_id, + branches_from=branch_name, + branches_count=1, + target_types=["revision", "alias"], + ) snp_branch, _ = process_snapshot_branches(snp) - if snp_branch and snp_branch[0]['name'] == branch_name: + if snp_branch and snp_branch[0]["name"] == branch_name: branches.append(snp_branch[0]) return snp_branch[0] @@ -69,191 +80,237 @@ Utility function to get a specific release from a releases list. Returns None if the release can not be found in the list. """ - filtered_releases = [r for r in releases if r['name'] == release_name] + filtered_releases = [r for r in releases if r["name"] == release_name] if filtered_releases: return filtered_releases[0] else: # case where a large branches list has been truncated - for branch_name in (release_name, f'refs/tags/{release_name}'): - snp = service.lookup_snapshot(snapshot_id, - branches_from=branch_name, - branches_count=1, - target_types=['release']) + for branch_name in (release_name, f"refs/tags/{release_name}"): + snp = service.lookup_snapshot( + snapshot_id, + branches_from=branch_name, + branches_count=1, + target_types=["release"], + ) _, snp_release = process_snapshot_branches(snp) - if snp_release and snp_release[0]['name'] == release_name: + if snp_release and snp_release[0]["name"] == release_name: releases.append(snp_release[0]) return snp_release[0] -def _branch_not_found(branch_type, branch, snapshot_id, snapshot_sizes, - origin_info, timestamp, visit_id): +def _branch_not_found( + branch_type, branch, snapshot_id, snapshot_sizes, origin_info, timestamp, visit_id +): """ Utility function to raise an exception when a specified branch/release can not be found. """ - if branch_type == 'branch': - branch_type = 'Branch' - branch_type_plural = 'branches' - target_type = 'revision' + if branch_type == "branch": + branch_type = "Branch" + branch_type_plural = "branches" + target_type = "revision" else: - branch_type = 'Release' - branch_type_plural = 'releases' - target_type = 'release' + branch_type = "Release" + branch_type_plural = "releases" + target_type = "release" if snapshot_id and snapshot_sizes[target_type] == 0: - msg = ('Snapshot with id %s has an empty list' - ' of %s!' % (snapshot_id, branch_type_plural)) + msg = "Snapshot with id %s has an empty list" " of %s!" % ( + snapshot_id, + branch_type_plural, + ) elif snapshot_id: - msg = ('%s %s for snapshot with id %s' - ' not found!' % (branch_type, branch, snapshot_id)) + msg = "%s %s for snapshot with id %s" " not found!" % ( + branch_type, + branch, + snapshot_id, + ) elif visit_id and snapshot_sizes[target_type] == 0: - msg = ('Origin with url %s' - ' for visit with id %s has an empty list' - ' of %s!' % (origin_info['url'], visit_id, - branch_type_plural)) + msg = ( + "Origin with url %s" + " for visit with id %s has an empty list" + " of %s!" % (origin_info["url"], visit_id, branch_type_plural) + ) elif visit_id: - msg = ('%s %s associated to visit with' - ' id %s for origin with url %s' - ' not found!' % (branch_type, branch, visit_id, - origin_info['url'])) + msg = ( + "%s %s associated to visit with" + " id %s for origin with url %s" + " not found!" % (branch_type, branch, visit_id, origin_info["url"]) + ) elif snapshot_sizes[target_type] == 0: - msg = ('Origin with url %s' - ' for visit with timestamp %s has an empty list' - ' of %s!' % (origin_info['url'], - timestamp, branch_type_plural)) + msg = ( + "Origin with url %s" + " for visit with timestamp %s has an empty list" + " of %s!" % (origin_info["url"], timestamp, branch_type_plural) + ) else: - msg = ('%s %s associated to visit with' - ' timestamp %s for origin with ' - 'url %s not found!' % (branch_type, branch, timestamp, - origin_info['url'])) + msg = ( + "%s %s associated to visit with" + " timestamp %s for origin with " + "url %s not found!" % (branch_type, branch, timestamp, origin_info["url"]) + ) raise NotFoundExc(escape(msg)) -def _process_snapshot_request(request, snapshot_id=None, - origin_url=None, timestamp=None, path=None, - browse_context='directory'): +def _process_snapshot_request( + request, + snapshot_id=None, + origin_url=None, + timestamp=None, + path=None, + browse_context="directory", +): """ Utility function to perform common input request processing for snapshot context views. """ - visit_id = request.GET.get('visit_id', None) + visit_id = request.GET.get("visit_id", None) - snapshot_context = get_snapshot_context(snapshot_id, - origin_url, timestamp, visit_id) + snapshot_context = get_snapshot_context( + snapshot_id, origin_url, timestamp, visit_id + ) - swh_type = snapshot_context['swh_type'] - origin_info = snapshot_context['origin_info'] - branches = snapshot_context['branches'] - releases = snapshot_context['releases'] - url_args = snapshot_context['url_args'] - query_params = snapshot_context['query_params'] + swh_type = snapshot_context["swh_type"] + origin_info = snapshot_context["origin_info"] + branches = snapshot_context["branches"] + releases = snapshot_context["releases"] + url_args = snapshot_context["url_args"] + query_params = snapshot_context["query_params"] - if snapshot_context['visit_info']: - timestamp = format_utc_iso_date(snapshot_context['visit_info']['date'], - '%Y-%m-%dT%H:%M:%SZ') - snapshot_context['timestamp'] = format_utc_iso_date( - snapshot_context['visit_info']['date']) + if snapshot_context["visit_info"]: + timestamp = format_utc_iso_date( + snapshot_context["visit_info"]["date"], "%Y-%m-%dT%H:%M:%SZ" + ) + snapshot_context["timestamp"] = format_utc_iso_date( + snapshot_context["visit_info"]["date"] + ) - browse_view_name = 'browse-' + swh_type + '-' + browse_context + browse_view_name = "browse-" + swh_type + "-" + browse_context root_sha1_git = None - revision_id = request.GET.get('revision', None) - release_name = request.GET.get('release', None) + revision_id = request.GET.get("revision", None) + release_name = request.GET.get("release", None) release_id = None branch_name = None - snapshot_sizes = snapshot_context['snapshot_sizes'] + snapshot_sizes = snapshot_context["snapshot_sizes"] snapshot_total_size = sum(snapshot_sizes.values()) if snapshot_total_size and revision_id: revision = service.lookup_revision(revision_id) - root_sha1_git = revision['directory'] - branches.append({'name': revision_id, - 'revision': revision_id, - 'directory': root_sha1_git, - 'url': None}) + root_sha1_git = revision["directory"] + branches.append( + { + "name": revision_id, + "revision": revision_id, + "directory": root_sha1_git, + "url": None, + } + ) branch_name = revision_id - query_params['revision'] = revision_id + query_params["revision"] = revision_id elif snapshot_total_size and release_name: - release = _get_release(releases, release_name, - snapshot_context['snapshot_id']) + release = _get_release(releases, release_name, snapshot_context["snapshot_id"]) try: - root_sha1_git = release['directory'] - revision_id = release['target'] - release_id = release['id'] - query_params['release'] = release_name + root_sha1_git = release["directory"] + revision_id = release["target"] + release_id = release["id"] + query_params["release"] = release_name except Exception as exc: sentry_sdk.capture_exception(exc) - _branch_not_found('release', release_name, snapshot_id, - snapshot_sizes, origin_info, timestamp, visit_id) + _branch_not_found( + "release", + release_name, + snapshot_id, + snapshot_sizes, + origin_info, + timestamp, + visit_id, + ) elif snapshot_total_size: - branch_name = request.GET.get('branch', None) + branch_name = request.GET.get("branch", None) if branch_name: - query_params['branch'] = branch_name - branch = _get_branch(branches, branch_name or 'HEAD', - snapshot_context['snapshot_id']) + query_params["branch"] = branch_name + branch = _get_branch( + branches, branch_name or "HEAD", snapshot_context["snapshot_id"] + ) try: - branch_name = branch['name'] - revision_id = branch['revision'] - root_sha1_git = branch['directory'] + branch_name = branch["name"] + revision_id = branch["revision"] + root_sha1_git = branch["directory"] except Exception as exc: sentry_sdk.capture_exception(exc) - _branch_not_found('branch', branch_name, snapshot_id, - snapshot_sizes, origin_info, timestamp, visit_id) + _branch_not_found( + "branch", + branch_name, + snapshot_id, + snapshot_sizes, + origin_info, + timestamp, + visit_id, + ) for b in branches: branch_url_args = dict(url_args) branch_query_params = dict(query_params) - if 'release' in branch_query_params: - del branch_query_params['release'] - branch_query_params['branch'] = b['name'] + if "release" in branch_query_params: + del branch_query_params["release"] + branch_query_params["branch"] = b["name"] if path: - b['path'] = path - branch_url_args['path'] = path - b['url'] = reverse(browse_view_name, - url_args=branch_url_args, - query_params=branch_query_params) + b["path"] = path + branch_url_args["path"] = path + b["url"] = reverse( + browse_view_name, url_args=branch_url_args, query_params=branch_query_params + ) for r in releases: release_url_args = dict(url_args) release_query_params = dict(query_params) - if 'branch' in release_query_params: - del release_query_params['branch'] - release_query_params['release'] = r['name'] + if "branch" in release_query_params: + del release_query_params["branch"] + release_query_params["release"] = r["name"] if path: - r['path'] = path - release_url_args['path'] = path - r['url'] = reverse(browse_view_name, - url_args=release_url_args, - query_params=release_query_params) - - snapshot_context['query_params'] = query_params - snapshot_context['root_sha1_git'] = root_sha1_git - snapshot_context['revision_id'] = revision_id - snapshot_context['branch'] = branch_name - snapshot_context['release'] = release_name - snapshot_context['release_id'] = release_id + r["path"] = path + release_url_args["path"] = path + r["url"] = reverse( + browse_view_name, + url_args=release_url_args, + query_params=release_query_params, + ) + + snapshot_context["query_params"] = query_params + snapshot_context["root_sha1_git"] = root_sha1_git + snapshot_context["revision_id"] = revision_id + snapshot_context["branch"] = branch_name + snapshot_context["release"] = release_name + snapshot_context["release_id"] = release_id return snapshot_context -def browse_snapshot_directory(request, snapshot_id=None, - origin_url=None, timestamp=None, path=None): +def browse_snapshot_directory( + request, snapshot_id=None, origin_url=None, timestamp=None, path=None +): """ Django view implementation for browsing a directory in a snapshot context. """ try: snapshot_context = _process_snapshot_request( - request, snapshot_id, origin_url, - timestamp, path, browse_context='directory') - - root_sha1_git = snapshot_context['root_sha1_git'] + request, + snapshot_id, + origin_url, + timestamp, + path, + browse_context="directory", + ) + + root_sha1_git = snapshot_context["root_sha1_git"] sha1_git = root_sha1_git if root_sha1_git and path: dir_info = service.lookup_directory_with_path(root_sha1_git, path) - sha1_git = dir_info['target'] + sha1_git = dir_info["target"] dirs = [] files = [] @@ -263,72 +320,79 @@ except Exception as exc: return handle_view_exception(request, exc) - swh_type = snapshot_context['swh_type'] - origin_info = snapshot_context['origin_info'] - visit_info = snapshot_context['visit_info'] - url_args = snapshot_context['url_args'] - query_params = snapshot_context['query_params'] - revision_id = snapshot_context['revision_id'] - snapshot_id = snapshot_context['snapshot_id'] + swh_type = snapshot_context["swh_type"] + origin_info = snapshot_context["origin_info"] + visit_info = snapshot_context["visit_info"] + url_args = snapshot_context["url_args"] + query_params = snapshot_context["query_params"] + revision_id = snapshot_context["revision_id"] + snapshot_id = snapshot_context["snapshot_id"] path_info = gen_path_info(path) - browse_view_name = 'browse-' + swh_type + '-directory' + browse_view_name = "browse-" + swh_type + "-directory" breadcrumbs = [] if root_sha1_git: - breadcrumbs.append({'name': root_sha1_git[:7], - 'url': reverse(browse_view_name, - url_args=url_args, - query_params=query_params)}) + breadcrumbs.append( + { + "name": root_sha1_git[:7], + "url": reverse( + browse_view_name, url_args=url_args, query_params=query_params + ), + } + ) for pi in path_info: bc_url_args = dict(url_args) - bc_url_args['path'] = pi['path'] - breadcrumbs.append({'name': pi['name'], - 'url': reverse(browse_view_name, - url_args=bc_url_args, - query_params=query_params)}) - - path = '' if path is None else (path + '/') + bc_url_args["path"] = pi["path"] + breadcrumbs.append( + { + "name": pi["name"], + "url": reverse( + browse_view_name, url_args=bc_url_args, query_params=query_params + ), + } + ) + + path = "" if path is None else (path + "/") for d in dirs: - if d['type'] == 'rev': - d['url'] = reverse('browse-revision', - url_args={'sha1_git': d['target']}) + if d["type"] == "rev": + d["url"] = reverse("browse-revision", url_args={"sha1_git": d["target"]}) else: bc_url_args = dict(url_args) - bc_url_args['path'] = path + d['name'] - d['url'] = reverse(browse_view_name, - url_args=bc_url_args, - query_params=query_params) + bc_url_args["path"] = path + d["name"] + d["url"] = reverse( + browse_view_name, url_args=bc_url_args, query_params=query_params + ) sum_file_sizes = 0 readmes = {} - browse_view_name = 'browse-' + swh_type + '-content' + browse_view_name = "browse-" + swh_type + "-content" for f in files: bc_url_args = dict(url_args) - bc_url_args['path'] = path + f['name'] - f['url'] = reverse(browse_view_name, - url_args=bc_url_args, - query_params=query_params) - if f['length'] is not None: - sum_file_sizes += f['length'] - f['length'] = filesizeformat(f['length']) - if f['name'].lower().startswith('readme'): - readmes[f['name']] = f['checksums']['sha1'] + bc_url_args["path"] = path + f["name"] + f["url"] = reverse( + browse_view_name, url_args=bc_url_args, query_params=query_params + ) + if f["length"] is not None: + sum_file_sizes += f["length"] + f["length"] = filesizeformat(f["length"]) + if f["name"].lower().startswith("readme"): + readmes[f["name"]] = f["checksums"]["sha1"] readme_name, readme_url, readme_html = get_readme_to_display(readmes) - browse_view_name = 'browse-' + swh_type + '-log' + browse_view_name = "browse-" + swh_type + "-log" history_url = None if snapshot_id != _empty_snapshot_id: - history_url = reverse(browse_view_name, - url_args=url_args, - query_params=query_params) + history_url = reverse( + browse_view_name, url_args=url_args, query_params=query_params + ) nb_files = None nb_dirs = None @@ -337,142 +401,151 @@ nb_files = len(files) nb_dirs = len(dirs) sum_file_sizes = filesizeformat(sum_file_sizes) - dir_path = '/' + path + dir_path = "/" + path browse_dir_link = gen_directory_link(sha1_git) browse_rev_link = gen_revision_link(revision_id) browse_snp_link = gen_snapshot_link(snapshot_id) - dir_metadata = {"directory": sha1_git, - "context-independent directory": browse_dir_link, - "number of regular files": nb_files, - "number of subdirectories": nb_dirs, - "sum of regular file sizes": sum_file_sizes, - "path": dir_path, - "revision": revision_id, - "context-independent revision": browse_rev_link, - "snapshot": snapshot_id, - "context-independent snapshot": browse_snp_link} + dir_metadata = { + "directory": sha1_git, + "context-independent directory": browse_dir_link, + "number of regular files": nb_files, + "number of subdirectories": nb_dirs, + "sum of regular file sizes": sum_file_sizes, + "path": dir_path, + "revision": revision_id, + "context-independent revision": browse_rev_link, + "snapshot": snapshot_id, + "context-independent snapshot": browse_snp_link, + } if origin_info: - dir_metadata['origin url'] = origin_info['url'] - dir_metadata['origin visit date'] = format_utc_iso_date( - visit_info['date']) - dir_metadata['origin visit type'] = visit_info['type'] + dir_metadata["origin url"] = origin_info["url"] + dir_metadata["origin visit date"] = format_utc_iso_date(visit_info["date"]) + dir_metadata["origin visit type"] = visit_info["type"] vault_cooking = { - 'directory_context': True, - 'directory_id': sha1_git, - 'revision_context': True, - 'revision_id': revision_id + "directory_context": True, + "directory_id": sha1_git, + "revision_context": True, + "revision_id": revision_id, } - swh_objects = [{'type': 'directory', - 'id': sha1_git}, - {'type': 'revision', - 'id': revision_id}, - {'type': 'snapshot', - 'id': snapshot_id}] + swh_objects = [ + {"type": "directory", "id": sha1_git}, + {"type": "revision", "id": revision_id}, + {"type": "snapshot", "id": snapshot_id}, + ] - release_id = snapshot_context['release_id'] + release_id = snapshot_context["release_id"] if release_id: - swh_objects.append({'type': 'release', - 'id': release_id}) + swh_objects.append({"type": "release", "id": release_id}) browse_rel_link = gen_release_link(release_id) - dir_metadata['release'] = release_id - dir_metadata['context-independent release'] = browse_rel_link + dir_metadata["release"] = release_id + dir_metadata["context-independent release"] = browse_rel_link swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context) - dir_path = '/'.join([bc['name'] for bc in breadcrumbs]) + '/' - context_found = 'snapshot: %s' % snapshot_context['snapshot_id'] + dir_path = "/".join([bc["name"] for bc in breadcrumbs]) + "/" + context_found = "snapshot: %s" % snapshot_context["snapshot_id"] if origin_info: - context_found = 'origin: %s' % origin_info['url'] - heading = ('Directory - %s - %s - %s' % - (dir_path, snapshot_context['branch'], context_found)) + context_found = "origin: %s" % origin_info["url"] + heading = "Directory - %s - %s - %s" % ( + dir_path, + snapshot_context["branch"], + context_found, + ) top_right_link = None - if not snapshot_context['is_empty']: + if not snapshot_context["is_empty"]: top_right_link = { - 'url': history_url, - 'icon': swh_object_icons['revisions history'], - 'text': 'History' + "url": history_url, + "icon": swh_object_icons["revisions history"], + "text": "History", } - return render(request, 'browse/directory.html', - {'heading': heading, - 'swh_object_name': 'Directory', - 'swh_object_metadata': dir_metadata, - 'dirs': dirs, - 'files': files, - 'breadcrumbs': breadcrumbs if root_sha1_git else [], - 'top_right_link': top_right_link, - 'readme_name': readme_name, - 'readme_url': readme_url, - 'readme_html': readme_html, - 'snapshot_context': snapshot_context, - 'vault_cooking': vault_cooking, - 'show_actions_menu': True, - 'swh_ids': swh_ids}) - - -def browse_snapshot_content(request, snapshot_id=None, - origin_url=None, timestamp=None, path=None, - selected_language=None): + return render( + request, + "browse/directory.html", + { + "heading": heading, + "swh_object_name": "Directory", + "swh_object_metadata": dir_metadata, + "dirs": dirs, + "files": files, + "breadcrumbs": breadcrumbs if root_sha1_git else [], + "top_right_link": top_right_link, + "readme_name": readme_name, + "readme_url": readme_url, + "readme_html": readme_html, + "snapshot_context": snapshot_context, + "vault_cooking": vault_cooking, + "show_actions_menu": True, + "swh_ids": swh_ids, + }, + ) + + +def browse_snapshot_content( + request, + snapshot_id=None, + origin_url=None, + timestamp=None, + path=None, + selected_language=None, +): """ Django view implementation for browsing a content in a snapshot context. """ try: - snapshot_context = _process_snapshot_request(request, snapshot_id, - origin_url, - timestamp, path, - browse_context='content') + snapshot_context = _process_snapshot_request( + request, snapshot_id, origin_url, timestamp, path, browse_context="content" + ) - root_sha1_git = snapshot_context['root_sha1_git'] + root_sha1_git = snapshot_context["root_sha1_git"] sha1_git = None query_string = None content_data = None directory_id = None - split_path = path.split('/') + split_path = path.split("/") filename = split_path[-1] - filepath = path[:-len(filename)] + filepath = path[: -len(filename)] if root_sha1_git: - content_info = service.lookup_directory_with_path(root_sha1_git, - path) - sha1_git = content_info['target'] - query_string = 'sha1_git:' + sha1_git - content_data = request_content(query_string, - raise_if_unavailable=False) + content_info = service.lookup_directory_with_path(root_sha1_git, path) + sha1_git = content_info["target"] + query_string = "sha1_git:" + sha1_git + content_data = request_content(query_string, raise_if_unavailable=False) if filepath: - dir_info = service.lookup_directory_with_path(root_sha1_git, - filepath) - directory_id = dir_info['target'] + dir_info = service.lookup_directory_with_path(root_sha1_git, filepath) + directory_id = dir_info["target"] else: directory_id = root_sha1_git except Exception as exc: return handle_view_exception(request, exc) - swh_type = snapshot_context['swh_type'] - url_args = snapshot_context['url_args'] - query_params = snapshot_context['query_params'] - revision_id = snapshot_context['revision_id'] - origin_info = snapshot_context['origin_info'] - visit_info = snapshot_context['visit_info'] - snapshot_id = snapshot_context['snapshot_id'] + swh_type = snapshot_context["swh_type"] + url_args = snapshot_context["url_args"] + query_params = snapshot_context["query_params"] + revision_id = snapshot_context["revision_id"] + origin_info = snapshot_context["origin_info"] + visit_info = snapshot_context["visit_info"] + snapshot_id = snapshot_context["snapshot_id"] content = None language = None mimetype = None - if content_data and content_data['raw_data'] is not None: + if content_data and content_data["raw_data"] is not None: content_display_data = prepare_content_for_display( - content_data['raw_data'], content_data['mimetype'], path) - content = content_display_data['content_data'] - language = content_display_data['language'] - mimetype = content_display_data['mimetype'] + content_data["raw_data"], content_data["mimetype"], path + ) + content = content_display_data["content_data"] + language = content_display_data["language"] + mimetype = content_display_data["mimetype"] # Override language with user-selected language if selected_language is not None: @@ -480,144 +553,156 @@ available_languages = None - if mimetype and 'text/' in mimetype: + if mimetype and "text/" in mimetype: available_languages = highlightjs.get_supported_languages() - browse_view_name = 'browse-' + swh_type + '-directory' + browse_view_name = "browse-" + swh_type + "-directory" breadcrumbs = [] path_info = gen_path_info(filepath) if root_sha1_git: - breadcrumbs.append({'name': root_sha1_git[:7], - 'url': reverse(browse_view_name, - url_args=url_args, - query_params=query_params)}) + breadcrumbs.append( + { + "name": root_sha1_git[:7], + "url": reverse( + browse_view_name, url_args=url_args, query_params=query_params + ), + } + ) for pi in path_info: bc_url_args = dict(url_args) - bc_url_args['path'] = pi['path'] - breadcrumbs.append({'name': pi['name'], - 'url': reverse(browse_view_name, - url_args=bc_url_args, - query_params=query_params)}) - - breadcrumbs.append({'name': filename, - 'url': None}) + bc_url_args["path"] = pi["path"] + breadcrumbs.append( + { + "name": pi["name"], + "url": reverse( + browse_view_name, url_args=bc_url_args, query_params=query_params + ), + } + ) + + breadcrumbs.append({"name": filename, "url": None}) browse_content_link = gen_content_link(sha1_git) content_raw_url = None if query_string: - content_raw_url = reverse('browse-content-raw', - url_args={'query_string': query_string}, - query_params={'filename': filename}) + content_raw_url = reverse( + "browse-content-raw", + url_args={"query_string": query_string}, + query_params={"filename": filename}, + ) browse_rev_link = gen_revision_link(revision_id) browse_dir_link = gen_directory_link(directory_id) content_metadata = { - 'context-independent content': browse_content_link, - 'path': None, - 'filename': None, - 'directory': directory_id, - 'context-independent directory': browse_dir_link, - 'revision': revision_id, - 'context-independent revision': browse_rev_link, - 'snapshot': snapshot_id + "context-independent content": browse_content_link, + "path": None, + "filename": None, + "directory": directory_id, + "context-independent directory": browse_dir_link, + "revision": revision_id, + "context-independent revision": browse_rev_link, + "snapshot": snapshot_id, } cnt_sha1_git = None content_size = None error_code = 200 - error_description = '' - error_message = '' + error_description = "" + error_message = "" if content_data: - for checksum in content_data['checksums'].keys(): - content_metadata[checksum] = content_data['checksums'][checksum] - content_metadata['mimetype'] = content_data['mimetype'] - content_metadata['encoding'] = content_data['encoding'] - content_metadata['size'] = filesizeformat(content_data['length']) - content_metadata['language'] = content_data['language'] - content_metadata['licenses'] = content_data['licenses'] - content_metadata['path'] = '/' + filepath - content_metadata['filename'] = filename - - cnt_sha1_git = content_data['checksums']['sha1_git'] - content_size = content_data['length'] - error_code = content_data['error_code'] - error_message = content_data['error_message'] - error_description = content_data['error_description'] + for checksum in content_data["checksums"].keys(): + content_metadata[checksum] = content_data["checksums"][checksum] + content_metadata["mimetype"] = content_data["mimetype"] + content_metadata["encoding"] = content_data["encoding"] + content_metadata["size"] = filesizeformat(content_data["length"]) + content_metadata["language"] = content_data["language"] + content_metadata["licenses"] = content_data["licenses"] + content_metadata["path"] = "/" + filepath + content_metadata["filename"] = filename + + cnt_sha1_git = content_data["checksums"]["sha1_git"] + content_size = content_data["length"] + error_code = content_data["error_code"] + error_message = content_data["error_message"] + error_description = content_data["error_description"] if origin_info: - content_metadata['origin url'] = origin_info['url'] - content_metadata['origin visit date'] = format_utc_iso_date( - visit_info['date']) - content_metadata['origin visit type'] = visit_info['type'] + content_metadata["origin url"] = origin_info["url"] + content_metadata["origin visit date"] = format_utc_iso_date(visit_info["date"]) + content_metadata["origin visit type"] = visit_info["type"] browse_snapshot_link = gen_snapshot_link(snapshot_id) - content_metadata['context-independent snapshot'] = browse_snapshot_link - - swh_objects = [{'type': 'content', - 'id': cnt_sha1_git}, - {'type': 'directory', - 'id': directory_id}, - {'type': 'revision', - 'id': revision_id}, - {'type': 'snapshot', - 'id': snapshot_id}] - - release_id = snapshot_context['release_id'] + content_metadata["context-independent snapshot"] = browse_snapshot_link + + swh_objects = [ + {"type": "content", "id": cnt_sha1_git}, + {"type": "directory", "id": directory_id}, + {"type": "revision", "id": revision_id}, + {"type": "snapshot", "id": snapshot_id}, + ] + + release_id = snapshot_context["release_id"] if release_id: - swh_objects.append({'type': 'release', - 'id': release_id}) + swh_objects.append({"type": "release", "id": release_id}) browse_rel_link = gen_release_link(release_id) - content_metadata['release'] = release_id - content_metadata['context-independent release'] = browse_rel_link + content_metadata["release"] = release_id + content_metadata["context-independent release"] = browse_rel_link swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context) - content_path = '/'.join([bc['name'] for bc in breadcrumbs]) - context_found = 'snapshot: %s' % snapshot_context['snapshot_id'] + content_path = "/".join([bc["name"] for bc in breadcrumbs]) + context_found = "snapshot: %s" % snapshot_context["snapshot_id"] if origin_info: - context_found = 'origin: %s' % origin_info['url'] - heading = ('Content - %s - %s - %s' % - (content_path, snapshot_context['branch'], context_found)) + context_found = "origin: %s" % origin_info["url"] + heading = "Content - %s - %s - %s" % ( + content_path, + snapshot_context["branch"], + context_found, + ) top_right_link = None - if not snapshot_context['is_empty']: + if not snapshot_context["is_empty"]: top_right_link = { - 'url': content_raw_url, - 'icon': swh_object_icons['content'], - 'text': 'Raw File' + "url": content_raw_url, + "icon": swh_object_icons["content"], + "text": "Raw File", } - return render(request, 'browse/content.html', - {'heading': heading, - 'swh_object_name': 'Content', - 'swh_object_metadata': content_metadata, - 'content': content, - 'content_size': content_size, - 'max_content_size': content_display_max_size, - 'mimetype': mimetype, - 'language': language, - 'available_languages': available_languages, - 'breadcrumbs': breadcrumbs if root_sha1_git else [], - 'top_right_link': top_right_link, - 'snapshot_context': snapshot_context, - 'vault_cooking': None, - 'show_actions_menu': True, - 'swh_ids': swh_ids, - 'error_code': error_code, - 'error_message': error_message, - 'error_description': error_description}, - status=error_code) + return render( + request, + "browse/content.html", + { + "heading": heading, + "swh_object_name": "Content", + "swh_object_metadata": content_metadata, + "content": content, + "content_size": content_size, + "max_content_size": content_display_max_size, + "mimetype": mimetype, + "language": language, + "available_languages": available_languages, + "breadcrumbs": breadcrumbs if root_sha1_git else [], + "top_right_link": top_right_link, + "snapshot_context": snapshot_context, + "vault_cooking": None, + "show_actions_menu": True, + "swh_ids": swh_ids, + "error_code": error_code, + "error_message": error_message, + "error_description": error_description, + }, + status=error_code, + ) PER_PAGE = 100 -def browse_snapshot_log(request, snapshot_id=None, - origin_url=None, timestamp=None): +def browse_snapshot_log(request, snapshot_id=None, origin_url=None, timestamp=None): """ Django view implementation for browsing a revision history in a snapshot context. @@ -625,69 +710,71 @@ try: snapshot_context = _process_snapshot_request( - request, snapshot_id, origin_url, timestamp, browse_context='log') + request, snapshot_id, origin_url, timestamp, browse_context="log" + ) - revision_id = snapshot_context['revision_id'] + revision_id = snapshot_context["revision_id"] - per_page = int(request.GET.get('per_page', PER_PAGE)) - offset = int(request.GET.get('offset', 0)) - revs_ordering = request.GET.get('revs_ordering', 'committer_date') - session_key = 'rev_%s_log_ordering_%s' % (revision_id, revs_ordering) + per_page = int(request.GET.get("per_page", PER_PAGE)) + offset = int(request.GET.get("offset", 0)) + revs_ordering = request.GET.get("revs_ordering", "committer_date") + session_key = "rev_%s_log_ordering_%s" % (revision_id, revs_ordering) rev_log_session = request.session.get(session_key, None) rev_log = [] revs_walker_state = None if rev_log_session: - rev_log = rev_log_session['rev_log'] - revs_walker_state = rev_log_session['revs_walker_state'] + rev_log = rev_log_session["rev_log"] + revs_walker_state = rev_log_session["revs_walker_state"] - if len(rev_log) < offset+per_page: + if len(rev_log) < offset + per_page: revs_walker = service.get_revisions_walker( - revs_ordering, revision_id, - max_revs=offset+per_page+1, - state=revs_walker_state) - rev_log += [rev['id'] for rev in revs_walker] + revs_ordering, + revision_id, + max_revs=offset + per_page + 1, + state=revs_walker_state, + ) + rev_log += [rev["id"] for rev in revs_walker] revs_walker_state = revs_walker.export_state() - revs = rev_log[offset:offset+per_page] + revs = rev_log[offset : offset + per_page] revision_log = service.lookup_revision_multiple(revs) request.session[session_key] = { - 'rev_log': rev_log, - 'revs_walker_state': revs_walker_state + "rev_log": rev_log, + "revs_walker_state": revs_walker_state, } except Exception as exc: return handle_view_exception(request, exc) - swh_type = snapshot_context['swh_type'] - origin_info = snapshot_context['origin_info'] - visit_info = snapshot_context['visit_info'] - url_args = snapshot_context['url_args'] - query_params = snapshot_context['query_params'] - snapshot_id = snapshot_context['snapshot_id'] + swh_type = snapshot_context["swh_type"] + origin_info = snapshot_context["origin_info"] + visit_info = snapshot_context["visit_info"] + url_args = snapshot_context["url_args"] + query_params = snapshot_context["query_params"] + snapshot_id = snapshot_context["snapshot_id"] - query_params['per_page'] = per_page - revs_ordering = request.GET.get('revs_ordering', '') - query_params['revs_ordering'] = revs_ordering + query_params["per_page"] = per_page + revs_ordering = request.GET.get("revs_ordering", "") + query_params["revs_ordering"] = revs_ordering - browse_view_name = 'browse-' + swh_type + '-log' + browse_view_name = "browse-" + swh_type + "-log" prev_log_url = None if len(rev_log) > offset + per_page: - query_params['offset'] = offset + per_page - prev_log_url = reverse(browse_view_name, - url_args=url_args, - query_params=query_params) + query_params["offset"] = offset + per_page + prev_log_url = reverse( + browse_view_name, url_args=url_args, query_params=query_params + ) next_log_url = None if offset != 0: - query_params['offset'] = offset - per_page - next_log_url = reverse(browse_view_name, - url_args=url_args, - query_params=query_params) + query_params["offset"] = offset - per_page + next_log_url = reverse( + browse_view_name, url_args=url_args, query_params=query_params + ) - revision_log_data = format_log_entries(revision_log, per_page, - snapshot_context) + revision_log_data = format_log_entries(revision_log, per_page, snapshot_context) browse_rev_link = gen_revision_link(revision_id) @@ -696,79 +783,86 @@ browse_snp_link = gen_snapshot_link(snapshot_id) revision_metadata = { - 'context-independent revision': browse_rev_link, - 'context-independent revision history': browse_log_link, - 'context-independent snapshot': browse_snp_link, - 'snapshot': snapshot_id + "context-independent revision": browse_rev_link, + "context-independent revision history": browse_log_link, + "context-independent snapshot": browse_snp_link, + "snapshot": snapshot_id, } if origin_info: - revision_metadata['origin url'] = origin_info['url'] - revision_metadata['origin visit date'] = format_utc_iso_date( - visit_info['date']) - revision_metadata['origin visit type'] = visit_info['type'] + revision_metadata["origin url"] = origin_info["url"] + revision_metadata["origin visit date"] = format_utc_iso_date(visit_info["date"]) + revision_metadata["origin visit type"] = visit_info["type"] - swh_objects = [{'type': 'revision', - 'id': revision_id}, - {'type': 'snapshot', - 'id': snapshot_id}] + swh_objects = [ + {"type": "revision", "id": revision_id}, + {"type": "snapshot", "id": snapshot_id}, + ] - release_id = snapshot_context['release_id'] + release_id = snapshot_context["release_id"] if release_id: - swh_objects.append({'type': 'release', - 'id': release_id}) + swh_objects.append({"type": "release", "id": release_id}) browse_rel_link = gen_release_link(release_id) - revision_metadata['release'] = release_id - revision_metadata['context-independent release'] = browse_rel_link + revision_metadata["release"] = release_id + revision_metadata["context-independent release"] = browse_rel_link swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context) - context_found = 'snapshot: %s' % snapshot_context['snapshot_id'] + context_found = "snapshot: %s" % snapshot_context["snapshot_id"] if origin_info: - context_found = 'origin: %s' % origin_info['url'] - heading = ('Revision history - %s - %s' % - (snapshot_context['branch'], context_found)) - - return render(request, 'browse/revision-log.html', - {'heading': heading, - 'swh_object_name': 'Revisions history', - 'swh_object_metadata': revision_metadata, - 'revision_log': revision_log_data, - 'revs_ordering': revs_ordering, - 'next_log_url': next_log_url, - 'prev_log_url': prev_log_url, - 'breadcrumbs': None, - 'top_right_link': None, - 'snapshot_context': snapshot_context, - 'vault_cooking': None, - 'show_actions_menu': True, - 'swh_ids': swh_ids}) - - -def browse_snapshot_branches(request, snapshot_id=None, - origin_url=None, timestamp=None): + context_found = "origin: %s" % origin_info["url"] + heading = "Revision history - %s - %s" % (snapshot_context["branch"], context_found) + + return render( + request, + "browse/revision-log.html", + { + "heading": heading, + "swh_object_name": "Revisions history", + "swh_object_metadata": revision_metadata, + "revision_log": revision_log_data, + "revs_ordering": revs_ordering, + "next_log_url": next_log_url, + "prev_log_url": prev_log_url, + "breadcrumbs": None, + "top_right_link": None, + "snapshot_context": snapshot_context, + "vault_cooking": None, + "show_actions_menu": True, + "swh_ids": swh_ids, + }, + ) + + +def browse_snapshot_branches( + request, snapshot_id=None, origin_url=None, timestamp=None +): """ Django view implementation for browsing a list of branches in a snapshot context. """ try: - snapshot_context = _process_snapshot_request(request, snapshot_id, - origin_url, timestamp) + snapshot_context = _process_snapshot_request( + request, snapshot_id, origin_url, timestamp + ) - branches_bc = request.GET.get('branches_breadcrumbs', '') - branches_bc = branches_bc.split(',') if branches_bc else [] - branches_from = branches_bc[-1] if branches_bc else '' + branches_bc = request.GET.get("branches_breadcrumbs", "") + branches_bc = branches_bc.split(",") if branches_bc else [] + branches_from = branches_bc[-1] if branches_bc else "" - swh_type = snapshot_context['swh_type'] - origin_info = snapshot_context['origin_info'] - url_args = snapshot_context['url_args'] - query_params = snapshot_context['query_params'] + swh_type = snapshot_context["swh_type"] + origin_info = snapshot_context["origin_info"] + url_args = snapshot_context["url_args"] + query_params = snapshot_context["query_params"] - browse_view_name = 'browse-' + swh_type + '-directory' + browse_view_name = "browse-" + swh_type + "-directory" - snapshot = service.lookup_snapshot(snapshot_context['snapshot_id'], - branches_from, PER_PAGE+1, - target_types=['revision', 'alias']) + snapshot = service.lookup_snapshot( + snapshot_context["snapshot_id"], + branches_from, + PER_PAGE + 1, + target_types=["revision", "alias"], + ) displayed_branches, _ = process_snapshot_branches(snapshot) @@ -777,22 +871,26 @@ for branch in displayed_branches: if snapshot_id: - revision_url = reverse('browse-revision', - url_args={'sha1_git': branch['revision']}, - query_params={'snapshot_id': snapshot_id}) + revision_url = reverse( + "browse-revision", + url_args={"sha1_git": branch["revision"]}, + query_params={"snapshot_id": snapshot_id}, + ) else: - revision_url = reverse('browse-revision', - url_args={'sha1_git': branch['revision']}, - query_params={'origin': origin_info['url']}) - query_params['branch'] = branch['name'] - directory_url = reverse(browse_view_name, - url_args=url_args, - query_params=query_params) - del query_params['branch'] - branch['revision_url'] = revision_url - branch['directory_url'] = directory_url - - browse_view_name = 'browse-' + swh_type + '-branches' + revision_url = reverse( + "browse-revision", + url_args={"sha1_git": branch["revision"]}, + query_params={"origin": origin_info["url"]}, + ) + query_params["branch"] = branch["name"] + directory_url = reverse( + browse_view_name, url_args=url_args, query_params=query_params + ) + del query_params["branch"] + branch["revision_url"] = revision_url + branch["directory_url"] = directory_url + + browse_view_name = "browse-" + swh_type + "-branches" prev_branches_url = None next_branches_url = None @@ -800,61 +898,74 @@ if branches_bc: query_params_prev = dict(query_params) - query_params_prev['branches_breadcrumbs'] = ','.join(branches_bc[:-1]) - prev_branches_url = reverse(browse_view_name, url_args=url_args, - query_params=query_params_prev) + query_params_prev["branches_breadcrumbs"] = ",".join(branches_bc[:-1]) + prev_branches_url = reverse( + browse_view_name, url_args=url_args, query_params=query_params_prev + ) elif branches_from: - prev_branches_url = reverse(browse_view_name, url_args=url_args, - query_params=query_params) + prev_branches_url = reverse( + browse_view_name, url_args=url_args, query_params=query_params + ) - if snapshot['next_branch'] is not None: + if snapshot["next_branch"] is not None: query_params_next = dict(query_params) - next_branch = displayed_branches[-1]['name'] + next_branch = displayed_branches[-1]["name"] del displayed_branches[-1] branches_bc.append(next_branch) - query_params_next['branches_breadcrumbs'] = ','.join(branches_bc) - next_branches_url = reverse(browse_view_name, url_args=url_args, - query_params=query_params_next) + query_params_next["branches_breadcrumbs"] = ",".join(branches_bc) + next_branches_url = reverse( + browse_view_name, url_args=url_args, query_params=query_params_next + ) - heading = 'Branches - ' + heading = "Branches - " if origin_info: - heading += 'origin: %s' % origin_info['url'] + heading += "origin: %s" % origin_info["url"] else: - heading += 'snapshot: %s' % snapshot_id - - return render(request, 'browse/branches.html', - {'heading': heading, - 'swh_object_name': 'Branches', - 'swh_object_metadata': {}, - 'top_right_link': None, - 'displayed_branches': displayed_branches, - 'prev_branches_url': prev_branches_url, - 'next_branches_url': next_branches_url, - 'snapshot_context': snapshot_context}) - - -def browse_snapshot_releases(request, snapshot_id=None, - origin_url=None, timestamp=None): + heading += "snapshot: %s" % snapshot_id + + return render( + request, + "browse/branches.html", + { + "heading": heading, + "swh_object_name": "Branches", + "swh_object_metadata": {}, + "top_right_link": None, + "displayed_branches": displayed_branches, + "prev_branches_url": prev_branches_url, + "next_branches_url": next_branches_url, + "snapshot_context": snapshot_context, + }, + ) + + +def browse_snapshot_releases( + request, snapshot_id=None, origin_url=None, timestamp=None +): """ Django view implementation for browsing a list of releases in a snapshot context. """ try: - snapshot_context = _process_snapshot_request(request, snapshot_id, - origin_url, timestamp) + snapshot_context = _process_snapshot_request( + request, snapshot_id, origin_url, timestamp + ) - rel_bc = request.GET.get('releases_breadcrumbs', '') - rel_bc = rel_bc.split(',') if rel_bc else [] - rel_from = rel_bc[-1] if rel_bc else '' + rel_bc = request.GET.get("releases_breadcrumbs", "") + rel_bc = rel_bc.split(",") if rel_bc else [] + rel_from = rel_bc[-1] if rel_bc else "" - swh_type = snapshot_context['swh_type'] - origin_info = snapshot_context['origin_info'] - url_args = snapshot_context['url_args'] - query_params = snapshot_context['query_params'] + swh_type = snapshot_context["swh_type"] + origin_info = snapshot_context["origin_info"] + url_args = snapshot_context["url_args"] + query_params = snapshot_context["query_params"] - snapshot = service.lookup_snapshot(snapshot_context['snapshot_id'], - rel_from, PER_PAGE+1, - target_types=['release', 'alias']) + snapshot = service.lookup_snapshot( + snapshot_context["snapshot_id"], + rel_from, + PER_PAGE + 1, + target_types=["release", "alias"], + ) _, displayed_releases = process_snapshot_branches(snapshot) @@ -863,35 +974,45 @@ for release in displayed_releases: if snapshot_id: - query_params_tgt = {'snapshot_id': snapshot_id} + query_params_tgt = {"snapshot_id": snapshot_id} else: - query_params_tgt = {'origin': origin_info['url']} - release_url = reverse('browse-release', - url_args={'sha1_git': release['id']}, - query_params=query_params_tgt) - - target_url = '' - if release['target_type'] == 'revision': - target_url = reverse('browse-revision', - url_args={'sha1_git': release['target']}, - query_params=query_params_tgt) - elif release['target_type'] == 'directory': - target_url = reverse('browse-directory', - url_args={'sha1_git': release['target']}, - query_params=query_params_tgt) - elif release['target_type'] == 'content': - target_url = reverse('browse-content', - url_args={'query_string': release['target']}, - query_params=query_params_tgt) - elif release['target_type'] == 'release': - target_url = reverse('browse-release', - url_args={'sha1_git': release['target']}, - query_params=query_params_tgt) - - release['release_url'] = release_url - release['target_url'] = target_url - - browse_view_name = 'browse-' + swh_type + '-releases' + query_params_tgt = {"origin": origin_info["url"]} + release_url = reverse( + "browse-release", + url_args={"sha1_git": release["id"]}, + query_params=query_params_tgt, + ) + + target_url = "" + if release["target_type"] == "revision": + target_url = reverse( + "browse-revision", + url_args={"sha1_git": release["target"]}, + query_params=query_params_tgt, + ) + elif release["target_type"] == "directory": + target_url = reverse( + "browse-directory", + url_args={"sha1_git": release["target"]}, + query_params=query_params_tgt, + ) + elif release["target_type"] == "content": + target_url = reverse( + "browse-content", + url_args={"query_string": release["target"]}, + query_params=query_params_tgt, + ) + elif release["target_type"] == "release": + target_url = reverse( + "browse-release", + url_args={"sha1_git": release["target"]}, + query_params=query_params_tgt, + ) + + release["release_url"] = release_url + release["target_url"] = target_url + + browse_view_name = "browse-" + swh_type + "-releases" prev_releases_url = None next_releases_url = None @@ -899,38 +1020,46 @@ if rel_bc: query_params_prev = dict(query_params) - query_params_prev['releases_breadcrumbs'] = ','.join(rel_bc[:-1]) - prev_releases_url = reverse(browse_view_name, url_args=url_args, - query_params=query_params_prev) + query_params_prev["releases_breadcrumbs"] = ",".join(rel_bc[:-1]) + prev_releases_url = reverse( + browse_view_name, url_args=url_args, query_params=query_params_prev + ) elif rel_from: - prev_releases_url = reverse(browse_view_name, url_args=url_args, - query_params=query_params) + prev_releases_url = reverse( + browse_view_name, url_args=url_args, query_params=query_params + ) - if snapshot['next_branch'] is not None: + if snapshot["next_branch"] is not None: query_params_next = dict(query_params) - next_rel = displayed_releases[-1]['branch_name'] + next_rel = displayed_releases[-1]["branch_name"] del displayed_releases[-1] rel_bc.append(next_rel) - query_params_next['releases_breadcrumbs'] = ','.join(rel_bc) - next_releases_url = reverse(browse_view_name, url_args=url_args, - query_params=query_params_next) + query_params_next["releases_breadcrumbs"] = ",".join(rel_bc) + next_releases_url = reverse( + browse_view_name, url_args=url_args, query_params=query_params_next + ) - heading = 'Releases - ' + heading = "Releases - " if origin_info: - heading += 'origin: %s' % origin_info['url'] + heading += "origin: %s" % origin_info["url"] else: - heading += 'snapshot: %s' % snapshot_id - - return render(request, 'browse/releases.html', - {'heading': heading, - 'top_panel_visible': False, - 'top_panel_collapsible': False, - 'swh_object_name': 'Releases', - 'swh_object_metadata': {}, - 'top_right_link': None, - 'displayed_releases': displayed_releases, - 'prev_releases_url': prev_releases_url, - 'next_releases_url': next_releases_url, - 'snapshot_context': snapshot_context, - 'vault_cooking': None, - 'show_actions_menu': False}) + heading += "snapshot: %s" % snapshot_id + + return render( + request, + "browse/releases.html", + { + "heading": heading, + "top_panel_visible": False, + "top_panel_collapsible": False, + "swh_object_name": "Releases", + "swh_object_metadata": {}, + "top_right_link": None, + "displayed_releases": displayed_releases, + "prev_releases_url": prev_releases_url, + "next_releases_url": next_releases_url, + "snapshot_context": snapshot_context, + "vault_cooking": None, + "show_actions_menu": False, + }, + ) diff --git a/swh/web/common/__init__.py b/swh/web/common/__init__.py --- a/swh/web/common/__init__.py +++ b/swh/web/common/__init__.py @@ -3,4 +3,4 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -default_app_config = 'swh.web.common.apps.SwhWebCommonConfig' +default_app_config = "swh.web.common.apps.SwhWebCommonConfig" diff --git a/swh/web/common/apps.py b/swh/web/common/apps.py --- a/swh/web/common/apps.py +++ b/swh/web/common/apps.py @@ -7,5 +7,5 @@ class SwhWebCommonConfig(AppConfig): - name = 'swh.web.common' - label = 'swh.web.common' + name = "swh.web.common" + label = "swh.web.common" diff --git a/swh/web/common/converters.py b/swh/web/common/converters.py --- a/swh/web/common/converters.py +++ b/swh/web/common/converters.py @@ -21,7 +21,7 @@ checksums[hash] = data[hash] del data[hash] if len(checksums) > 0: - data['checksums'] = checksums + data["checksums"] = checksums def fmap(f, data): @@ -53,9 +53,18 @@ return f(data) -def from_swh(dict_swh, hashess={}, bytess={}, dates={}, blacklist={}, - removables_if_empty={}, empty_dict={}, empty_list={}, - convert={}, convert_fn=lambda x: x): +def from_swh( + dict_swh, + hashess={}, + bytess={}, + dates={}, + blacklist={}, + removables_if_empty={}, + empty_dict={}, + empty_list={}, + convert={}, + convert_fn=lambda x: x, +): """Convert from a swh dictionary to something reasonably json serializable. @@ -78,6 +87,7 @@ dictionary equivalent as dict_swh only with its keys converted. """ + def convert_hashes_bytes(v): """v is supposedly a hash as bytes, returns it converted in hex. @@ -94,7 +104,7 @@ """ if isinstance(v, bytes): - return v.decode('utf-8') + return v.decode("utf-8") return v def convert_date(v): @@ -118,20 +128,18 @@ if isinstance(v, datetime.datetime): return v.isoformat() - tz = datetime.timezone(datetime.timedelta(minutes=v['offset'])) - swh_timestamp = v['timestamp'] + tz = datetime.timezone(datetime.timedelta(minutes=v["offset"])) + swh_timestamp = v["timestamp"] if isinstance(swh_timestamp, dict): - date = datetime.datetime.fromtimestamp( - swh_timestamp['seconds'], tz=tz) + date = datetime.datetime.fromtimestamp(swh_timestamp["seconds"], tz=tz) else: - date = datetime.datetime.fromtimestamp( - swh_timestamp, tz=tz) + date = datetime.datetime.fromtimestamp(swh_timestamp, tz=tz) datestr = date.isoformat() - if v['offset'] == 0 and v['negative_utc']: + if v["offset"] == 0 and v["negative_utc"]: # remove the rightmost + and replace it with a - - return '-'.join(datestr.rsplit('+', 1)) + return "-".join(datestr.rsplit("+", 1)) return datestr @@ -148,24 +156,28 @@ elif key in convert: new_dict[key] = convert_fn(value) elif isinstance(value, dict): - new_dict[key] = from_swh(value, - hashess=hashess, bytess=bytess, - dates=dates, blacklist=blacklist, - removables_if_empty=removables_if_empty, - empty_dict=empty_dict, - empty_list=empty_list, - convert=convert, - convert_fn=convert_fn) + new_dict[key] = from_swh( + value, + hashess=hashess, + bytess=bytess, + dates=dates, + blacklist=blacklist, + removables_if_empty=removables_if_empty, + empty_dict=empty_dict, + empty_list=empty_list, + convert=convert, + convert_fn=convert_fn, + ) elif key in hashess: new_dict[key] = fmap(convert_hashes_bytes, value) elif key in bytess: try: new_dict[key] = fmap(convert_bytes, value) except UnicodeDecodeError: - if 'decoding_failures' not in new_dict: - new_dict['decoding_failures'] = [key] + if "decoding_failures" not in new_dict: + new_dict["decoding_failures"] = [key] else: - new_dict['decoding_failures'].append(key) + new_dict["decoding_failures"].append(key) new_dict[key] = fmap(decode_with_escape, value) elif key in empty_dict and not value: new_dict[key] = {} @@ -214,9 +226,9 @@ """ return from_swh( release, - hashess={'id', 'target'}, - bytess={'message', 'name', 'fullname', 'email'}, - dates={'date'}, + hashess={"id", "target"}, + bytess={"message", "name", "fullname", "email"}, + dates={"date"}, ) @@ -225,10 +237,11 @@ encoded value. """ + def default(self, obj): if isinstance(obj, bytes): try: - return obj.decode('utf-8') + return obj.decode("utf-8") except UnicodeDecodeError: # fallback to binary representation to avoid display errors return repr(obj) @@ -277,22 +290,24 @@ Remaining keys are left as is """ - revision = from_swh(revision, - hashess={'id', 'directory', 'parents', 'children'}, - bytess={'name', 'fullname', 'email'}, - convert={'metadata'}, - convert_fn=convert_revision_metadata, - dates={'date', 'committer_date'}) + revision = from_swh( + revision, + hashess={"id", "directory", "parents", "children"}, + bytess={"name", "fullname", "email"}, + convert={"metadata"}, + convert_fn=convert_revision_metadata, + dates={"date", "committer_date"}, + ) if revision: - if 'parents' in revision: - revision['merge'] = len(revision['parents']) > 1 - if 'message' in revision: + if "parents" in revision: + revision["merge"] = len(revision["parents"]) > 1 + if "message" in revision: try: - revision['message'] = revision['message'].decode('utf-8') + revision["message"] = revision["message"].decode("utf-8") except UnicodeDecodeError: - revision['message_decoding_failed'] = True - revision['message'] = None + revision["message_decoding_failed"] = True + revision["message"] = None return revision @@ -301,30 +316,33 @@ """Convert swh content to serializable content dictionary. """ - return from_swh(content, - hashess={'sha1', 'sha1_git', 'sha256', 'blake2s256'}, - blacklist={'ctime'}, - convert={'status'}, - convert_fn=lambda v: 'absent' if v == 'hidden' else v) + return from_swh( + content, + hashess={"sha1", "sha1_git", "sha256", "blake2s256"}, + blacklist={"ctime"}, + convert={"status"}, + convert_fn=lambda v: "absent" if v == "hidden" else v, + ) def from_person(person): """Convert swh person to serializable person dictionary. """ - return from_swh(person, - bytess={'name', 'fullname', 'email'}) + return from_swh(person, bytess={"name", "fullname", "email"}) def from_origin_visit(visit): """Convert swh origin_visit to serializable origin_visit dictionary. """ - ov = from_swh(visit, - hashess={'target', 'snapshot'}, - bytess={'branch'}, - dates={'date'}, - empty_dict={'metadata'}) + ov = from_swh( + visit, + hashess={"target", "snapshot"}, + bytess={"branch"}, + dates={"date"}, + empty_dict={"metadata"}, + ) return ov @@ -333,21 +351,16 @@ """Convert swh snapshot to serializable snapshot dictionary. """ - sv = from_swh(snapshot, - hashess={'id', 'target'}, - bytess={'next_branch'}) - - if sv and 'branches' in sv: - sv['branches'] = { - decode_with_escape(k): v - for k, v in sv['branches'].items() - } - for k, v in snapshot['branches'].items(): + sv = from_swh(snapshot, hashess={"id", "target"}, bytess={"next_branch"}) + + if sv and "branches" in sv: + sv["branches"] = {decode_with_escape(k): v for k, v in sv["branches"].items()} + for k, v in snapshot["branches"].items(): # alias target existing branch names, not a sha1 - if v and v['target_type'] == 'alias': + if v and v["target_type"] == "alias": branch = decode_with_escape(k) - target = decode_with_escape(v['target']) - sv['branches'][branch]['target'] = target + target = decode_with_escape(v["target"]) + sv["branches"][branch]["target"] = target return sv @@ -356,14 +369,14 @@ """Convert swh directory to serializable directory dictionary. """ - return from_swh(dir_entry, - hashess={'dir_id', 'sha1_git', 'sha1', 'sha256', - 'blake2s256', 'target'}, - bytess={'name'}, - removables_if_empty={ - 'sha1', 'sha1_git', 'sha256', 'blake2s256', 'status'}, - convert={'status'}, - convert_fn=lambda v: 'absent' if v == 'hidden' else v) + return from_swh( + dir_entry, + hashess={"dir_id", "sha1_git", "sha1", "sha256", "blake2s256", "target"}, + bytess={"name"}, + removables_if_empty={"sha1", "sha1_git", "sha256", "blake2s256", "status"}, + convert={"status"}, + convert_fn=lambda v: "absent" if v == "hidden" else v, + ) def from_filetype(content_entry): @@ -371,5 +384,4 @@ 'id', 'encoding', and 'mimetype'. """ - return from_swh(content_entry, - hashess={'id'}) + return from_swh(content_entry, hashess={"id"}) diff --git a/swh/web/common/exc.py b/swh/web/common/exc.py --- a/swh/web/common/exc.py +++ b/swh/web/common/exc.py @@ -20,6 +20,7 @@ Example: Asking a content with the wrong identifier format. """ + pass @@ -30,6 +31,7 @@ that content does not exist. """ + pass @@ -41,6 +43,7 @@ is not text. """ + pass @@ -50,37 +53,45 @@ Example: Asking to resolve 10000 persistent identifier when the limit is 1000. """ + pass http_status_code_message = { - 400: 'Bad Request', - 401: 'Unauthorized', - 403: 'Access Denied', - 404: 'Resource not found', - 413: 'Payload Too Large', - 500: 'Internal Server Error', - 501: 'Not Implemented', - 502: 'Bad Gateway', - 503: 'Service unavailable' + 400: "Bad Request", + 401: "Unauthorized", + 403: "Access Denied", + 404: "Resource not found", + 413: "Payload Too Large", + 500: "Internal Server Error", + 501: "Not Implemented", + 502: "Bad Gateway", + 503: "Service unavailable", } def _generate_error_page(request, error_code, error_description): - return render(request, 'error.html', - {'error_code': error_code, - 'error_message': http_status_code_message[error_code], - 'error_description': mark_safe(error_description)}, - status=error_code) + return render( + request, + "error.html", + { + "error_code": error_code, + "error_message": http_status_code_message[error_code], + "error_description": mark_safe(error_description), + }, + status=error_code, + ) def swh_handle400(request, exception=None): """ Custom Django HTTP error 400 handler for swh-web. """ - error_description = ('The server cannot process the request to %s due to ' - 'something that is perceived to be a client error.' % - escape(request.META['PATH_INFO'])) + error_description = ( + "The server cannot process the request to %s due to " + "something that is perceived to be a client error." + % escape(request.META["PATH_INFO"]) + ) return _generate_error_page(request, 400, error_description) @@ -88,8 +99,9 @@ """ Custom Django HTTP error 403 handler for swh-web. """ - error_description = ('The resource %s requires an authentication.' % - escape(request.META['PATH_INFO'])) + error_description = "The resource %s requires an authentication." % escape( + request.META["PATH_INFO"] + ) return _generate_error_page(request, 403, error_description) @@ -97,8 +109,9 @@ """ Custom Django HTTP error 404 handler for swh-web. """ - error_description = ('The resource %s could not be found on the server.' % - escape(request.META['PATH_INFO'])) + error_description = "The resource %s could not be found on the server." % escape( + request.META["PATH_INFO"] + ) return _generate_error_page(request, 404, error_description) @@ -106,9 +119,10 @@ """ Custom Django HTTP error 500 handler for swh-web. """ - error_description = ('An unexpected condition was encountered when ' - 'requesting resource %s.' % - escape(request.META['PATH_INFO'])) + error_description = ( + "An unexpected condition was encountered when " + "requesting resource %s." % escape(request.META["PATH_INFO"]) + ) return _generate_error_page(request, 500, error_description) @@ -119,8 +133,8 @@ """ sentry_sdk.capture_exception(exc) error_code = 500 - error_description = '%s: %s' % (type(exc).__name__, str(exc)) - if get_config()['debug']: + error_description = "%s: %s" % (type(exc).__name__, str(exc)) + if get_config()["debug"]: error_description = traceback.format_exc() if isinstance(exc, BadInputExc): error_code = 400 @@ -131,5 +145,6 @@ if html_response: return _generate_error_page(request, error_code, error_description) else: - return HttpResponse(error_description, content_type='text/plain', - status=error_code) + return HttpResponse( + error_description, content_type="text/plain", status=error_code + ) diff --git a/swh/web/common/highlightjs.py b/swh/web/common/highlightjs.py --- a/swh/web/common/highlightjs.py +++ b/swh/web/common/highlightjs.py @@ -6,238 +6,396 @@ import functools from typing import Dict -from pygments.lexers import ( - get_all_lexers, - get_lexer_for_filename -) +from pygments.lexers import get_all_lexers, get_lexer_for_filename import sentry_sdk # set of languages ids that can be highlighted # by highlight.js library -_hljs_languages = set([ - '1c', 'abnf', 'accesslog', 'actionscript', 'ada', 'angelscript', 'apache', - 'applescript', 'arcade', 'arduino', 'armasm', 'asciidoc', 'aspectj', - 'autohotkey', 'autoit', 'avrasm', 'awk', 'axapta', 'bash', 'basic', 'bnf', - 'brainfuck', 'cal', 'capnproto', 'ceylon', 'clean', 'clojure', - 'clojure-repl', 'cmake', 'coffeescript', 'coq', 'cos', 'cpp', 'crmsh', - 'crystal', 'cs', 'csp', 'css', 'd', 'dart', 'delphi', 'diff', 'django', - 'dns', 'dockerfile', 'dos', 'dsconfig', 'dts', 'dust', 'ebnf', 'elixir', - 'elm', 'erb', 'erlang', 'erlang-repl', 'excel', 'fix', 'flix', 'fortran', - 'fsharp', 'gams', 'gauss', 'gcode', 'gherkin', 'glsl', 'gml', 'go', - 'golo', 'gradle', 'groovy', 'haml', 'handlebars', 'haskell', 'haxe', - 'hsp', 'htmlbars', 'http', 'hy', 'inform7', 'ini', 'irpf90', 'isbl', - 'java', 'javascript', 'jboss-cli', 'json', 'julia', 'julia-repl', - 'kotlin', 'lasso', 'ldif', 'leaf', 'less', 'lisp', 'livecodeserver', - 'livescript', 'llvm', 'lsl', 'lua', 'makefile', 'markdown', 'mathematica', - 'matlab', 'maxima', 'mel', 'mercury', 'mipsasm', 'mizar', 'mojolicious', - 'monkey', 'moonscript', 'n1ql', 'nginx', 'nimrod', 'nix', 'nsis', - 'objectivec', 'ocaml', 'openscad', 'oxygene', 'parser3', 'perl', 'pf', - 'pgsql', 'php', 'plaintext', 'pony', 'powershell', 'processing', - 'profile', 'prolog', 'properties', 'protobuf', 'puppet', 'purebasic', - 'python', 'q', 'qml', 'r', 'reasonml', 'rib', 'roboconf', 'routeros', - 'rsl', 'ruby', 'ruleslanguage', 'rust', 'sas', 'scala', 'scheme', - 'scilab', 'scss', 'shell', 'smali', 'smalltalk', 'sml', 'sqf', 'sql', - 'stan', 'stata', 'step21', 'stylus', 'subunit', 'swift', 'taggerscript', - 'tap', 'tcl', 'tex', 'thrift', 'tp', 'twig', 'typescript', 'vala', - 'vbnet', 'vbscript', 'vbscript-html', 'verilog', 'vhdl', 'vim', 'x86asm', - 'xl', 'xml', 'xquery', 'yaml', 'zephir' -]) +_hljs_languages = set( + [ + "1c", + "abnf", + "accesslog", + "actionscript", + "ada", + "angelscript", + "apache", + "applescript", + "arcade", + "arduino", + "armasm", + "asciidoc", + "aspectj", + "autohotkey", + "autoit", + "avrasm", + "awk", + "axapta", + "bash", + "basic", + "bnf", + "brainfuck", + "cal", + "capnproto", + "ceylon", + "clean", + "clojure", + "clojure-repl", + "cmake", + "coffeescript", + "coq", + "cos", + "cpp", + "crmsh", + "crystal", + "cs", + "csp", + "css", + "d", + "dart", + "delphi", + "diff", + "django", + "dns", + "dockerfile", + "dos", + "dsconfig", + "dts", + "dust", + "ebnf", + "elixir", + "elm", + "erb", + "erlang", + "erlang-repl", + "excel", + "fix", + "flix", + "fortran", + "fsharp", + "gams", + "gauss", + "gcode", + "gherkin", + "glsl", + "gml", + "go", + "golo", + "gradle", + "groovy", + "haml", + "handlebars", + "haskell", + "haxe", + "hsp", + "htmlbars", + "http", + "hy", + "inform7", + "ini", + "irpf90", + "isbl", + "java", + "javascript", + "jboss-cli", + "json", + "julia", + "julia-repl", + "kotlin", + "lasso", + "ldif", + "leaf", + "less", + "lisp", + "livecodeserver", + "livescript", + "llvm", + "lsl", + "lua", + "makefile", + "markdown", + "mathematica", + "matlab", + "maxima", + "mel", + "mercury", + "mipsasm", + "mizar", + "mojolicious", + "monkey", + "moonscript", + "n1ql", + "nginx", + "nimrod", + "nix", + "nsis", + "objectivec", + "ocaml", + "openscad", + "oxygene", + "parser3", + "perl", + "pf", + "pgsql", + "php", + "plaintext", + "pony", + "powershell", + "processing", + "profile", + "prolog", + "properties", + "protobuf", + "puppet", + "purebasic", + "python", + "q", + "qml", + "r", + "reasonml", + "rib", + "roboconf", + "routeros", + "rsl", + "ruby", + "ruleslanguage", + "rust", + "sas", + "scala", + "scheme", + "scilab", + "scss", + "shell", + "smali", + "smalltalk", + "sml", + "sqf", + "sql", + "stan", + "stata", + "step21", + "stylus", + "subunit", + "swift", + "taggerscript", + "tap", + "tcl", + "tex", + "thrift", + "tp", + "twig", + "typescript", + "vala", + "vbnet", + "vbscript", + "vbscript-html", + "verilog", + "vhdl", + "vim", + "x86asm", + "xl", + "xml", + "xquery", + "yaml", + "zephir", + ] +) # languages aliases defined in highlight.js _hljs_languages_aliases = { - 'ado': 'stata', - 'adoc': 'asciidoc', - 'ahk': 'autohotkey', - 'aj': 'aspectj', - 'apacheconf': 'apache', - 'arm': 'armasm', - 'as': 'actionscript', - 'asc': 'asciidoc', - 'atom': 'xml', - 'bas': 'basic', - 'bat': 'dos', - 'bf': 'brainfuck', - 'bind': 'dns', - 'bsl': '1c', - 'c-al': 'cal', - 'c': 'cpp', - 'c++': 'cpp', - 'capnp': 'capnproto', - 'cc': 'cpp', - 'clj': 'clojure', - 'cls': 'cos', - 'cmake.in': 'cmake', - 'cmd': 'dos', - 'coffee': 'coffeescript', - 'console': 'shell', - 'cr': 'crystal', - 'craftcms': 'twig', - 'crm': 'crmsh', - 'csharp': 'cs', - 'cson': 'coffeescript', - 'dcl': 'clean', - 'dfm': 'delphi', - 'do': 'stata', - 'docker': 'dockerfile', - 'dpr': 'delphi', - 'dst': 'dust', - 'dtsi': 'dts', - 'ep': 'mojolicious', - 'erl': 'erlang', - 'ex': 'elixir', - 'exs': 'elixir', - 'f90': 'fortran', - 'f95': 'fortran', - 'feature': 'gherkin', - 'freepascal': 'delphi', - 'fs': 'fsharp', - 'fsx': 'fsharp', - 'gemspec': 'ruby', - 'GML': 'gml', - 'gms': 'gams', - 'golang': 'go', - 'graph': 'roboconf', - 'gss': 'gauss', - 'gyp': 'python', - 'h': 'cpp', - 'h++': 'cpp', - 'hbs': 'handlebars', - 'hpp': 'cpp', - 'hs': 'haskell', - 'html': 'xml', - 'html.handlebars': 'handlebars', - 'html.hbs': 'handlebars', - 'https': 'http', - 'hx': 'haxe', - 'hylang': 'hy', - 'i7': 'inform7', - 'i7x': 'inform7', - 'iced': 'coffeescript', - 'icl': 'clean', - 'ino': 'arduino', - 'instances': 'roboconf', - 'ipynb': 'json', - 'irb': 'ruby', - 'jinja': 'django', - 'js': 'javascript', - 'jsp': 'java', - 'jsx': 'javascript', - 'k': 'q', - 'kdb': 'q', - 'kt': 'kotlin', - 'lassoscript': 'lasso', - 'lazarus': 'delphi', - 'lc': 'livecode', - 'lfm': 'delphi', - 'll': 'llvm', - 'lpr': 'delphi', - 'ls': 'livescript', - 'm': 'matlab', - 'mak': 'makefile', - 'md': 'markdown', - 'mikrotik': 'routeros', - 'mips': 'mipsasm', - 'mk': 'monkey', - 'mkd': 'markdown', - 'mkdown': 'markdown', - 'ml': 'ocaml', - 'mli': 'ocaml', - 'mm': 'objectivec', - 'mma': 'mathematica', - 'moo': 'mercury', - 'moon': 'moonscript', - 'nav': 'cal', - 'nb': 'mathematica', - 'nc': 'gcode', - 'nginxconf': 'nginx', - 'ni': 'inform7', - 'nim': 'nimrod', - 'nixos': 'nix', - 'nsi': 'nsis', - 'obj-c': 'objectivec', - 'objc': 'objectivec', - 'osascript': 'applescript', - 'osl': 'rsl', - 'p': 'parser3', - 'p21': 'step21', - 'pas': 'delphi', - 'pascal': 'delphi', - 'patch': 'diff', - 'pb': 'purebasic', - 'pbi': 'purebasic', - 'pcmk': 'crmsh', - 'pde': 'processing', - 'pf.conf': 'pf', - 'php3': 'php', - 'php4': 'php', - 'php5': 'php', - 'php6': 'php', - 'php7': 'php', - 'pl': 'perl', - 'plist': 'xml', - 'pm': 'perl', - 'podspec': 'ruby', - 'postgres': 'pgsql', - 'postgresql': 'pgsql', - 'pp': 'puppet', - 'proto': 'protobuf', - 'ps': 'powershell', - 'ps1': 'powershell', - 'psd1': 'powershell', - 'psm1': 'powershell', - 'py': 'python', - 'qt': 'qml', - 'rb': 'ruby', - 're': 'reasonml', - 'rei': 'reasonml', - 'rs': 'rust', - 'rsc': 'routeros', - 'rss': 'xml', - 'rst': 'nohighlight', - 's': 'armasm', - 'SAS': 'sas', - 'scad': 'openscad', - 'sci': 'scilab', - 'scm': 'scheme', - 'sh': 'bash', - 'sig': 'sml', - 'sl': 'rsl', - 'st': 'smalltalk', - 'step': 'step21', - 'stp': 'step21', - 'styl': 'stylus', - 'sv': 'verilog', - 'svh': 'verilog', - 'tao': 'xl', - 'thor': 'ruby', - 'tk': 'tcl', - 'toml': 'ini', - 'ts': 'typescript', - 'txt': 'nohighlight', - 'v': 'coq', - 'vb': 'vbnet', - 'vbs': 'vbscript', - 'vhd': 'vhdl', - 'wildfly-cli': 'jboss-cli', - 'wl': 'mathematica', - 'wls': 'mathematica', - 'xhtml': 'xml', - 'xjb': 'xml', - 'xls': 'excel', - 'xlsx': 'excel', - 'xpath': 'xquery', - 'xpo': 'axapta', - 'xpp': 'axapta', - 'xq': 'xquery', - 'xqy': 'xquery', - 'xsd': 'xml', - 'xsl': 'xml', - 'YAML': 'yaml', - 'yml': 'yaml', - 'zep': 'zephir', - 'zone': 'dns', - 'zsh': 'bash' + "ado": "stata", + "adoc": "asciidoc", + "ahk": "autohotkey", + "aj": "aspectj", + "apacheconf": "apache", + "arm": "armasm", + "as": "actionscript", + "asc": "asciidoc", + "atom": "xml", + "bas": "basic", + "bat": "dos", + "bf": "brainfuck", + "bind": "dns", + "bsl": "1c", + "c-al": "cal", + "c": "cpp", + "c++": "cpp", + "capnp": "capnproto", + "cc": "cpp", + "clj": "clojure", + "cls": "cos", + "cmake.in": "cmake", + "cmd": "dos", + "coffee": "coffeescript", + "console": "shell", + "cr": "crystal", + "craftcms": "twig", + "crm": "crmsh", + "csharp": "cs", + "cson": "coffeescript", + "dcl": "clean", + "dfm": "delphi", + "do": "stata", + "docker": "dockerfile", + "dpr": "delphi", + "dst": "dust", + "dtsi": "dts", + "ep": "mojolicious", + "erl": "erlang", + "ex": "elixir", + "exs": "elixir", + "f90": "fortran", + "f95": "fortran", + "feature": "gherkin", + "freepascal": "delphi", + "fs": "fsharp", + "fsx": "fsharp", + "gemspec": "ruby", + "GML": "gml", + "gms": "gams", + "golang": "go", + "graph": "roboconf", + "gss": "gauss", + "gyp": "python", + "h": "cpp", + "h++": "cpp", + "hbs": "handlebars", + "hpp": "cpp", + "hs": "haskell", + "html": "xml", + "html.handlebars": "handlebars", + "html.hbs": "handlebars", + "https": "http", + "hx": "haxe", + "hylang": "hy", + "i7": "inform7", + "i7x": "inform7", + "iced": "coffeescript", + "icl": "clean", + "ino": "arduino", + "instances": "roboconf", + "ipynb": "json", + "irb": "ruby", + "jinja": "django", + "js": "javascript", + "jsp": "java", + "jsx": "javascript", + "k": "q", + "kdb": "q", + "kt": "kotlin", + "lassoscript": "lasso", + "lazarus": "delphi", + "lc": "livecode", + "lfm": "delphi", + "ll": "llvm", + "lpr": "delphi", + "ls": "livescript", + "m": "matlab", + "mak": "makefile", + "md": "markdown", + "mikrotik": "routeros", + "mips": "mipsasm", + "mk": "monkey", + "mkd": "markdown", + "mkdown": "markdown", + "ml": "ocaml", + "mli": "ocaml", + "mm": "objectivec", + "mma": "mathematica", + "moo": "mercury", + "moon": "moonscript", + "nav": "cal", + "nb": "mathematica", + "nc": "gcode", + "nginxconf": "nginx", + "ni": "inform7", + "nim": "nimrod", + "nixos": "nix", + "nsi": "nsis", + "obj-c": "objectivec", + "objc": "objectivec", + "osascript": "applescript", + "osl": "rsl", + "p": "parser3", + "p21": "step21", + "pas": "delphi", + "pascal": "delphi", + "patch": "diff", + "pb": "purebasic", + "pbi": "purebasic", + "pcmk": "crmsh", + "pde": "processing", + "pf.conf": "pf", + "php3": "php", + "php4": "php", + "php5": "php", + "php6": "php", + "php7": "php", + "pl": "perl", + "plist": "xml", + "pm": "perl", + "podspec": "ruby", + "postgres": "pgsql", + "postgresql": "pgsql", + "pp": "puppet", + "proto": "protobuf", + "ps": "powershell", + "ps1": "powershell", + "psd1": "powershell", + "psm1": "powershell", + "py": "python", + "qt": "qml", + "rb": "ruby", + "re": "reasonml", + "rei": "reasonml", + "rs": "rust", + "rsc": "routeros", + "rss": "xml", + "rst": "nohighlight", + "s": "armasm", + "SAS": "sas", + "scad": "openscad", + "sci": "scilab", + "scm": "scheme", + "sh": "bash", + "sig": "sml", + "sl": "rsl", + "st": "smalltalk", + "step": "step21", + "stp": "step21", + "styl": "stylus", + "sv": "verilog", + "svh": "verilog", + "tao": "xl", + "thor": "ruby", + "tk": "tcl", + "toml": "ini", + "ts": "typescript", + "txt": "nohighlight", + "v": "coq", + "vb": "vbnet", + "vbs": "vbscript", + "vhd": "vhdl", + "wildfly-cli": "jboss-cli", + "wl": "mathematica", + "wls": "mathematica", + "xhtml": "xml", + "xjb": "xml", + "xls": "excel", + "xlsx": "excel", + "xpath": "xquery", + "xpo": "axapta", + "xpp": "axapta", + "xq": "xquery", + "xqy": "xquery", + "xsd": "xml", + "xsl": "xml", + "YAML": "yaml", + "yml": "yaml", + "zep": "zephir", + "zone": "dns", + "zsh": "bash", } # dictionary mapping pygment lexers to hljs languages @@ -246,24 +404,24 @@ # dictionary mapping mime types to hljs languages _mime_type_to_hljs_language = { - 'text/x-c': 'cpp', - 'text/x-c++': 'cpp', - 'text/x-msdos-batch': 'dos', - 'text/x-lisp': 'lisp', - 'text/x-shellscript': 'bash', + "text/x-c": "cpp", + "text/x-c++": "cpp", + "text/x-msdos-batch": "dos", + "text/x-lisp": "lisp", + "text/x-shellscript": "bash", } # dictionary mapping filenames to hljs languages _filename_to_hljs_language = { - 'cmakelists.txt': 'cmake', - '.htaccess': 'apache', - 'httpd.conf': 'apache', - 'access.log': 'accesslog', - 'nginx.log': 'accesslog', - 'resolv.conf': 'dns', - 'dockerfile': 'docker', - 'nginx.conf': 'nginx', - 'pf.conf': 'pf' + "cmakelists.txt": "cmake", + ".htaccess": "apache", + "httpd.conf": "apache", + "access.log": "accesslog", + "nginx.log": "accesslog", + "resolv.conf": "dns", + "dockerfile": "docker", + "nginx.conf": "nginx", + "pf.conf": "pf", } @@ -303,7 +461,7 @@ return _filename_to_hljs_language[filename_lower] if filename_lower in _hljs_languages: return filename_lower - exts = filename_lower.split('.') + exts = filename_lower.split(".") # check if file extension matches an hljs language # also handle .ext.in cases for ext in reversed(exts[-2:]): @@ -326,7 +484,7 @@ # otherwise, try to find a match between the file extensions # associated to the lexer and the hljs language aliases if lexer: - exts = [ext.replace('*.', '') for ext in lexer.filenames] + exts = [ext.replace("*.", "") for ext in lexer.filenames] for ext in exts: if ext in _hljs_languages_aliases: return _hljs_languages_aliases[ext] diff --git a/swh/web/common/identifiers.py b/swh/web/common/identifiers.py --- a/swh/web/common/identifiers.py +++ b/swh/web/common/identifiers.py @@ -11,9 +11,15 @@ from swh.model.exceptions import ValidationError from swh.model.hashutil import hash_to_bytes from swh.model.identifiers import ( - persistent_identifier, parse_persistent_identifier, - CONTENT, DIRECTORY, ORIGIN, RELEASE, REVISION, SNAPSHOT, - PersistentId + persistent_identifier, + parse_persistent_identifier, + CONTENT, + DIRECTORY, + ORIGIN, + RELEASE, + REVISION, + SNAPSHOT, + PersistentId, ) from swh.web.common.exc import BadInputExc @@ -21,8 +27,9 @@ from swh.web.common.utils import reverse -def get_swh_persistent_id(object_type: str, object_id: str, - scheme_version: int = 1) -> str: +def get_swh_persistent_id( + object_type: str, object_id: str, scheme_version: int = 1 +) -> str: """ Returns the persistent identifier for a swh object based on: @@ -48,21 +55,21 @@ try: swh_id = persistent_identifier(object_type, object_id, scheme_version) except ValidationError as e: - raise BadInputExc('Invalid object (%s) for swh persistent id. %s' % - (object_id, e)) + raise BadInputExc( + "Invalid object (%s) for swh persistent id. %s" % (object_id, e) + ) else: return swh_id -ResolvedPersistentId = TypedDict('ResolvedPersistentId', { - 'swh_id_parsed': PersistentId, - 'browse_url': Optional[str] -}) +ResolvedPersistentId = TypedDict( + "ResolvedPersistentId", {"swh_id_parsed": PersistentId, "browse_url": Optional[str]} +) -def resolve_swh_persistent_id(swh_id: str, - query_params: Optional[QueryParameters] = None - ) -> ResolvedPersistentId: +def resolve_swh_persistent_id( + swh_id: str, query_params: Optional[QueryParameters] = None +) -> ResolvedPersistentId: """ Try to resolve a Software Heritage persistent id into an url for browsing the targeted object. @@ -82,48 +89,58 @@ object_type = swh_id_parsed.object_type object_id = swh_id_parsed.object_id browse_url = None - query_dict = QueryDict('', mutable=True) + query_dict = QueryDict("", mutable=True) if query_params and len(query_params) > 0: for k in sorted(query_params.keys()): query_dict[k] = query_params[k] - if 'origin' in swh_id_parsed.metadata: - query_dict['origin'] = swh_id_parsed.metadata['origin'] + if "origin" in swh_id_parsed.metadata: + query_dict["origin"] = swh_id_parsed.metadata["origin"] if object_type == CONTENT: - query_string = 'sha1_git:' + object_id - fragment = '' - if 'lines' in swh_id_parsed.metadata: - lines = swh_id_parsed.metadata['lines'].split('-') - fragment += '#L' + lines[0] + query_string = "sha1_git:" + object_id + fragment = "" + if "lines" in swh_id_parsed.metadata: + lines = swh_id_parsed.metadata["lines"].split("-") + fragment += "#L" + lines[0] if len(lines) > 1: - fragment += '-L' + lines[1] - browse_url = reverse('browse-content', - url_args={'query_string': query_string}, - query_params=query_dict) + fragment + fragment += "-L" + lines[1] + browse_url = ( + reverse( + "browse-content", + url_args={"query_string": query_string}, + query_params=query_dict, + ) + + fragment + ) elif object_type == DIRECTORY: - browse_url = reverse('browse-directory', - url_args={'sha1_git': object_id}, - query_params=query_dict) + browse_url = reverse( + "browse-directory", + url_args={"sha1_git": object_id}, + query_params=query_dict, + ) elif object_type == RELEASE: - browse_url = reverse('browse-release', - url_args={'sha1_git': object_id}, - query_params=query_dict) + browse_url = reverse( + "browse-release", url_args={"sha1_git": object_id}, query_params=query_dict + ) elif object_type == REVISION: - browse_url = reverse('browse-revision', - url_args={'sha1_git': object_id}, - query_params=query_dict) + browse_url = reverse( + "browse-revision", url_args={"sha1_git": object_id}, query_params=query_dict + ) elif object_type == SNAPSHOT: - browse_url = reverse('browse-snapshot', - url_args={'snapshot_id': object_id}, - query_params=query_dict) + browse_url = reverse( + "browse-snapshot", + url_args={"snapshot_id": object_id}, + query_params=query_dict, + ) elif object_type == ORIGIN: - raise BadInputExc(('Origin PIDs (Persistent Identifiers) are not ' - 'publicly resolvable because they are for ' - 'internal usage only')) + raise BadInputExc( + ( + "Origin PIDs (Persistent Identifiers) are not " + "publicly resolvable because they are for " + "internal usage only" + ) + ) - return { - 'swh_id_parsed': swh_id_parsed, - 'browse_url': browse_url - } + return {"swh_id_parsed": swh_id_parsed, "browse_url": browse_url} def get_persistent_identifier(persistent_id: str) -> PersistentId: @@ -143,14 +160,14 @@ try: pid_object = parse_persistent_identifier(persistent_id) except ValidationError as ve: - raise BadInputExc('Error when parsing identifier: %s' % - ' '.join(ve.messages)) + raise BadInputExc("Error when parsing identifier: %s" % " ".join(ve.messages)) else: return pid_object -def group_swh_persistent_identifiers(persistent_ids: Iterable[PersistentId] - ) -> Dict[str, List[bytes]]: +def group_swh_persistent_identifiers( + persistent_ids: Iterable[PersistentId], +) -> Dict[str, List[bytes]]: """ Groups many Software Heritage persistent identifiers into a dictionary depending on their type. @@ -169,7 +186,7 @@ DIRECTORY: [], REVISION: [], RELEASE: [], - SNAPSHOT: [] + SNAPSHOT: [], } for pid in persistent_ids: diff --git a/swh/web/common/middlewares.py b/swh/web/common/middlewares.py --- a/swh/web/common/middlewares.py +++ b/swh/web/common/middlewares.py @@ -20,12 +20,12 @@ def __call__(self, request): response = self.get_response(request) - if 'text/html' in response.get('Content-Type', ''): - if hasattr(response, 'content'): + if "text/html" in response.get("Content-Type", ""): + if hasattr(response, "content"): content = response.content response.content = prettify_html(content) - elif hasattr(response, 'streaming_content'): - content = b''.join(response.streaming_content) + elif hasattr(response, "streaming_content"): + content = b"".join(response.streaming_content) response.streaming_content = prettify_html(content) return response @@ -42,10 +42,10 @@ def __call__(self, request): response = self.get_response(request) - if 'text/html' in response.get('Content-Type', ''): + if "text/html" in response.get("Content-Type", ""): try: - minified_html = minify(response.content.decode('utf-8')) - response.content = minified_html.encode('utf-8') + minified_html = minify(response.content.decode("utf-8")) + response.content = minified_html.encode("utf-8") except Exception as exc: sentry_sdk.capture_exception(exc) return response @@ -62,10 +62,10 @@ def __call__(self, request): resp = self.get_response(request) - if 'RateLimit-Limit' in request.META: - resp['X-RateLimit-Limit'] = request.META['RateLimit-Limit'] - if 'RateLimit-Remaining' in request.META: - resp['X-RateLimit-Remaining'] = request.META['RateLimit-Remaining'] - if 'RateLimit-Reset' in request.META: - resp['X-RateLimit-Reset'] = request.META['RateLimit-Reset'] + if "RateLimit-Limit" in request.META: + resp["X-RateLimit-Limit"] = request.META["RateLimit-Limit"] + if "RateLimit-Remaining" in request.META: + resp["X-RateLimit-Remaining"] = request.META["RateLimit-Remaining"] + if "RateLimit-Reset" in request.META: + resp["X-RateLimit-Reset"] = request.META["RateLimit-Reset"] return resp diff --git a/swh/web/common/migrations/0001_initial.py b/swh/web/common/migrations/0001_initial.py --- a/swh/web/common/migrations/0001_initial.py +++ b/swh/web/common/migrations/0001_initial.py @@ -3,27 +3,25 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -# flake8: noqa - from __future__ import unicode_literals from django.db import migrations, models _authorized_origins = [ - 'https://github.com/', - 'https://gitlab.com/', - 'https://bitbucket.org/', - 'https://git.code.sf.net/', - 'http://git.code.sf.net/', - 'https://hg.code.sf.net/', - 'http://hg.code.sf.net/', - 'https://svn.code.sf.net/', - 'http://svn.code.sf.net/' + "https://github.com/", + "https://gitlab.com/", + "https://bitbucket.org/", + "https://git.code.sf.net/", + "http://git.code.sf.net/", + "https://hg.code.sf.net/", + "http://hg.code.sf.net/", + "https://svn.code.sf.net/", + "http://svn.code.sf.net/", ] def _populate_save_authorized_origins(apps, schema_editor): - SaveAuthorizedOrigin = apps.get_model('swh.web.common', 'SaveAuthorizedOrigin') + SaveAuthorizedOrigin = apps.get_model("swh.web.common", "SaveAuthorizedOrigin") for origin_url in _authorized_origins: SaveAuthorizedOrigin.objects.create(url=origin_url) @@ -34,39 +32,58 @@ operations = [ migrations.CreateModel( - name='SaveAuthorizedOrigin', + name="SaveAuthorizedOrigin", fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('url', models.CharField(max_length=200)), + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("url", models.CharField(max_length=200)), ], - options={ - 'db_table': 'save_authorized_origin', - }, + options={"db_table": "save_authorized_origin",}, ), migrations.CreateModel( - name='SaveOriginRequest', + name="SaveOriginRequest", fields=[ - ('id', models.BigAutoField(primary_key=True, serialize=False)), - ('request_date', models.DateTimeField(auto_now_add=True)), - ('origin_type', models.CharField(max_length=200)), - ('origin_url', models.CharField(max_length=200)), - ('status', models.TextField(choices=[('accepted', 'accepted'), ('rejected', 'rejected'), ('pending', 'pending')], default='pending')), - ('loading_task_id', models.IntegerField(default=-1)), + ("id", models.BigAutoField(primary_key=True, serialize=False)), + ("request_date", models.DateTimeField(auto_now_add=True)), + ("origin_type", models.CharField(max_length=200)), + ("origin_url", models.CharField(max_length=200)), + ( + "status", + models.TextField( + choices=[ + ("accepted", "accepted"), + ("rejected", "rejected"), + ("pending", "pending"), + ], + default="pending", + ), + ), + ("loading_task_id", models.IntegerField(default=-1)), ], - options={ - 'db_table': 'save_origin_request', - 'ordering': ['-id'], - }, + options={"db_table": "save_origin_request", "ordering": ["-id"],}, ), migrations.CreateModel( - name='SaveUnauthorizedOrigin', + name="SaveUnauthorizedOrigin", fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('url', models.CharField(max_length=200)), + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("url", models.CharField(max_length=200)), ], - options={ - 'db_table': 'save_unauthorized_origin', - }, + options={"db_table": "save_unauthorized_origin",}, ), - migrations.RunPython(_populate_save_authorized_origins) + migrations.RunPython(_populate_save_authorized_origins), ] diff --git a/swh/web/common/migrations/0002_saveoriginrequest_visit_date.py b/swh/web/common/migrations/0002_saveoriginrequest_visit_date.py --- a/swh/web/common/migrations/0002_saveoriginrequest_visit_date.py +++ b/swh/web/common/migrations/0002_saveoriginrequest_visit_date.py @@ -11,13 +11,13 @@ class Migration(migrations.Migration): dependencies = [ - ('swh.web.common', '0001_initial'), + ("swh.web.common", "0001_initial"), ] operations = [ migrations.AddField( - model_name='saveoriginrequest', - name='visit_date', + model_name="saveoriginrequest", + name="visit_date", field=models.DateTimeField(null=True), ), ] diff --git a/swh/web/common/migrations/0003_saveoriginrequest_loading_task_status.py b/swh/web/common/migrations/0003_saveoriginrequest_loading_task_status.py --- a/swh/web/common/migrations/0003_saveoriginrequest_loading_task_status.py +++ b/swh/web/common/migrations/0003_saveoriginrequest_loading_task_status.py @@ -3,14 +3,13 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -# flake8: noqa - from __future__ import unicode_literals from django.db import migrations, models from swh.web.config import scheduler + def _remove_archived_tasks_with_no_saved_status(apps, schema_editor): """ Scheduler tasks are archived on a regular basis so their completion @@ -19,7 +18,7 @@ So remove the rows associated to already archived tasks as the loading status can not be retrieved anymore. """ - SaveOriginRequest = apps.get_model('swh.web.common', 'SaveOriginRequest') + SaveOriginRequest = apps.get_model("swh.web.common", "SaveOriginRequest") no_saved_status_tasks = [] for sor in SaveOriginRequest.objects.all(): tasks = scheduler().get_tasks([sor.loading_task_id]) @@ -31,14 +30,23 @@ class Migration(migrations.Migration): dependencies = [ - ('swh.web.common', '0002_saveoriginrequest_visit_date'), + ("swh.web.common", "0002_saveoriginrequest_visit_date"), ] operations = [ migrations.AddField( - model_name='saveoriginrequest', - name='loading_task_status', - field=models.TextField(choices=[('not created', 'not created'), ('not yet scheduled', 'not yet scheduled'), ('scheduled', 'scheduled'), ('succeed', 'succeed'), ('failed', 'failed')], default='not created'), + model_name="saveoriginrequest", + name="loading_task_status", + field=models.TextField( + choices=[ + ("not created", "not created"), + ("not yet scheduled", "not yet scheduled"), + ("scheduled", "scheduled"), + ("succeed", "succeed"), + ("failed", "failed"), + ], + default="not created", + ), ), - migrations.RunPython(_remove_archived_tasks_with_no_saved_status) + migrations.RunPython(_remove_archived_tasks_with_no_saved_status), ] diff --git a/swh/web/common/migrations/0004_auto_20190204_1324.py b/swh/web/common/migrations/0004_auto_20190204_1324.py --- a/swh/web/common/migrations/0004_auto_20190204_1324.py +++ b/swh/web/common/migrations/0004_auto_20190204_1324.py @@ -3,8 +3,6 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -# flake8: noqa - from __future__ import unicode_literals from django.db import migrations, models @@ -13,13 +11,23 @@ class Migration(migrations.Migration): dependencies = [ - ('swh.web.common', '0003_saveoriginrequest_loading_task_status'), + ("swh.web.common", "0003_saveoriginrequest_loading_task_status"), ] operations = [ migrations.AlterField( - model_name='saveoriginrequest', - name='loading_task_status', - field=models.TextField(choices=[('not created', 'not created'), ('not yet scheduled', 'not yet scheduled'), ('scheduled', 'scheduled'), ('succeed', 'succeed'), ('failed', 'failed'), ('running', 'running')], default='not created'), + model_name="saveoriginrequest", + name="loading_task_status", + field=models.TextField( + choices=[ + ("not created", "not created"), + ("not yet scheduled", "not yet scheduled"), + ("scheduled", "scheduled"), + ("succeed", "succeed"), + ("failed", "failed"), + ("running", "running"), + ], + default="not created", + ), ), ] diff --git a/swh/web/common/migrations/0005_remove_duplicated_authorized_origins.py b/swh/web/common/migrations/0005_remove_duplicated_authorized_origins.py --- a/swh/web/common/migrations/0005_remove_duplicated_authorized_origins.py +++ b/swh/web/common/migrations/0005_remove_duplicated_authorized_origins.py @@ -12,17 +12,14 @@ def _remove_duplicated_urls_in_authorized_list(apps, schema_editor): sao = SaveAuthorizedOrigin.objects - for url in sao.values_list('url', flat=True).distinct(): - sao.filter(pk__in=sao.filter( - url=url).values_list('id', flat=True)[1:]).delete() + for url in sao.values_list("url", flat=True).distinct(): + sao.filter(pk__in=sao.filter(url=url).values_list("id", flat=True)[1:]).delete() class Migration(migrations.Migration): dependencies = [ - ('swh.web.common', '0004_auto_20190204_1324'), + ("swh.web.common", "0004_auto_20190204_1324"), ] - operations = [ - migrations.RunPython(_remove_duplicated_urls_in_authorized_list) - ] + operations = [migrations.RunPython(_remove_duplicated_urls_in_authorized_list)] diff --git a/swh/web/common/migrations/0006_rename_origin_type.py b/swh/web/common/migrations/0006_rename_origin_type.py --- a/swh/web/common/migrations/0006_rename_origin_type.py +++ b/swh/web/common/migrations/0006_rename_origin_type.py @@ -11,13 +11,13 @@ class Migration(migrations.Migration): dependencies = [ - ('swh.web.common', '0005_remove_duplicated_authorized_origins'), + ("swh.web.common", "0005_remove_duplicated_authorized_origins"), ] operations = [ migrations.RenameField( - model_name='saveoriginrequest', - old_name='origin_type', - new_name='visit_type', + model_name="saveoriginrequest", + old_name="origin_type", + new_name="visit_type", ), ] diff --git a/swh/web/common/models.py b/swh/web/common/models.py --- a/swh/web/common/models.py +++ b/swh/web/common/models.py @@ -10,11 +10,12 @@ """ Model table holding origin urls authorized to be loaded into the archive. """ + url = models.CharField(max_length=200, null=False) class Meta: - app_label = 'swh.web.common' - db_table = 'save_authorized_origin' + app_label = "swh.web.common" + db_table = "save_authorized_origin" def __str__(self): return self.url @@ -25,32 +26,33 @@ Model table holding origin urls not authorized to be loaded into the archive. """ + url = models.CharField(max_length=200, null=False) class Meta: - app_label = 'swh.web.common' - db_table = 'save_unauthorized_origin' + app_label = "swh.web.common" + db_table = "save_unauthorized_origin" def __str__(self): return self.url -SAVE_REQUEST_ACCEPTED = 'accepted' -SAVE_REQUEST_REJECTED = 'rejected' -SAVE_REQUEST_PENDING = 'pending' +SAVE_REQUEST_ACCEPTED = "accepted" +SAVE_REQUEST_REJECTED = "rejected" +SAVE_REQUEST_PENDING = "pending" SAVE_REQUEST_STATUS = [ (SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_ACCEPTED), (SAVE_REQUEST_REJECTED, SAVE_REQUEST_REJECTED), - (SAVE_REQUEST_PENDING, SAVE_REQUEST_PENDING) + (SAVE_REQUEST_PENDING, SAVE_REQUEST_PENDING), ] -SAVE_TASK_NOT_CREATED = 'not created' -SAVE_TASK_NOT_YET_SCHEDULED = 'not yet scheduled' -SAVE_TASK_SCHEDULED = 'scheduled' -SAVE_TASK_SUCCEED = 'succeed' -SAVE_TASK_FAILED = 'failed' -SAVE_TASK_RUNNING = 'running' +SAVE_TASK_NOT_CREATED = "not created" +SAVE_TASK_NOT_YET_SCHEDULED = "not yet scheduled" +SAVE_TASK_SCHEDULED = "scheduled" +SAVE_TASK_SUCCEED = "succeed" +SAVE_TASK_FAILED = "failed" +SAVE_TASK_RUNNING = "running" SAVE_TASK_STATUS = [ (SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_CREATED), @@ -58,7 +60,7 @@ (SAVE_TASK_SCHEDULED, SAVE_TASK_SCHEDULED), (SAVE_TASK_SUCCEED, SAVE_TASK_SUCCEED), (SAVE_TASK_FAILED, SAVE_TASK_FAILED), - (SAVE_TASK_RUNNING, SAVE_TASK_RUNNING) + (SAVE_TASK_RUNNING, SAVE_TASK_RUNNING), ] @@ -66,27 +68,32 @@ """ Model table holding all the save origin requests issued by users. """ + id = models.BigAutoField(primary_key=True) request_date = models.DateTimeField(auto_now_add=True) visit_type = models.CharField(max_length=200, null=False) origin_url = models.CharField(max_length=200, null=False) - status = models.TextField(choices=SAVE_REQUEST_STATUS, - default=SAVE_REQUEST_PENDING) + status = models.TextField(choices=SAVE_REQUEST_STATUS, default=SAVE_REQUEST_PENDING) loading_task_id = models.IntegerField(default=-1) visit_date = models.DateTimeField(null=True) - loading_task_status = models.TextField(choices=SAVE_TASK_STATUS, - default=SAVE_TASK_NOT_CREATED) + loading_task_status = models.TextField( + choices=SAVE_TASK_STATUS, default=SAVE_TASK_NOT_CREATED + ) class Meta: - app_label = 'swh.web.common' - db_table = 'save_origin_request' - ordering = ['-id'] + app_label = "swh.web.common" + db_table = "save_origin_request" + ordering = ["-id"] def __str__(self): - return str({'id': self.id, - 'request_date': self.request_date, - 'visit_type': self.visit_type, - 'origin_url': self.origin_url, - 'status': self.status, - 'loading_task_id': self.loading_task_id, - 'visit_date': self.visit_date}) + return str( + { + "id": self.id, + "request_date": self.request_date, + "visit_type": self.visit_type, + "origin_url": self.origin_url, + "status": self.status, + "loading_task_id": self.loading_task_id, + "visit_date": self.visit_date, + } + ) diff --git a/swh/web/common/origin_save.py b/swh/web/common/origin_save.py --- a/swh/web/common/origin_save.py +++ b/swh/web/common/origin_save.py @@ -23,11 +23,18 @@ from swh.web.common import service from swh.web.common.exc import BadInputExc, ForbiddenExc, NotFoundExc from swh.web.common.models import ( - SaveUnauthorizedOrigin, SaveAuthorizedOrigin, SaveOriginRequest, - SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING, - SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, - SAVE_TASK_SUCCEED, SAVE_TASK_FAILED, SAVE_TASK_RUNNING, - SAVE_TASK_NOT_CREATED + SaveUnauthorizedOrigin, + SaveAuthorizedOrigin, + SaveOriginRequest, + SAVE_REQUEST_ACCEPTED, + SAVE_REQUEST_REJECTED, + SAVE_REQUEST_PENDING, + SAVE_TASK_NOT_YET_SCHEDULED, + SAVE_TASK_SCHEDULED, + SAVE_TASK_SUCCEED, + SAVE_TASK_FAILED, + SAVE_TASK_RUNNING, + SAVE_TASK_NOT_CREATED, ) from swh.web.common.origin_visits import get_origin_visits from swh.web.common.utils import parse_timestamp, SWH_WEB_METRICS_REGISTRY @@ -47,8 +54,7 @@ Returns: list: The list of authorized origin url prefix """ - return [origin.url - for origin in SaveAuthorizedOrigin.objects.all()] + return [origin.url for origin in SaveAuthorizedOrigin.objects.all()] def get_origin_save_unauthorized_urls(): @@ -59,8 +65,7 @@ Returns: list: the list of unauthorized origin url prefix """ - return [origin.url - for origin in SaveUnauthorizedOrigin.objects.all()] + return [origin.url for origin in SaveUnauthorizedOrigin.objects.all()] def can_save_origin(origin_url): @@ -96,19 +101,15 @@ # map visit type to scheduler task # TODO: do not hardcode the task name here (T1157) -_visit_type_task = { - 'git': 'load-git', - 'hg': 'load-hg', - 'svn': 'load-svn' -} +_visit_type_task = {"git": "load-git", "hg": "load-hg", "svn": "load-svn"} # map scheduler task status to origin save status _save_task_status = { - 'next_run_not_scheduled': SAVE_TASK_NOT_YET_SCHEDULED, - 'next_run_scheduled': SAVE_TASK_SCHEDULED, - 'completed': SAVE_TASK_SUCCEED, - 'disabled': SAVE_TASK_FAILED + "next_run_not_scheduled": SAVE_TASK_NOT_YET_SCHEDULED, + "next_run_scheduled": SAVE_TASK_SCHEDULED, + "completed": SAVE_TASK_SUCCEED, + "disabled": SAVE_TASK_FAILED, } @@ -124,38 +125,39 @@ Returns: list: the list of saveable visit types """ - allowed_visit_types = ', '.join(get_savable_visit_types()) + allowed_visit_types = ", ".join(get_savable_visit_types()) if visit_type not in _visit_type_task: - raise BadInputExc('Visit of type %s can not be saved! ' - 'Allowed types are the following: %s' % - (visit_type, allowed_visit_types)) + raise BadInputExc( + "Visit of type %s can not be saved! " + "Allowed types are the following: %s" % (visit_type, allowed_visit_types) + ) -_validate_url = URLValidator(schemes=['http', 'https', 'svn', 'git']) +_validate_url = URLValidator(schemes=["http", "https", "svn", "git"]) def _check_origin_url_valid(origin_url): try: _validate_url(origin_url) except ValidationError: - raise BadInputExc('The provided origin url (%s) is not valid!' % - escape(origin_url)) + raise BadInputExc( + "The provided origin url (%s) is not valid!" % escape(origin_url) + ) def _get_visit_info_for_save_request(save_request): visit_date = None visit_status = None try: - origin = {'url': save_request.origin_url} + origin = {"url": save_request.origin_url} origin_info = service.lookup_origin(origin) origin_visits = get_origin_visits(origin_info) - visit_dates = [parse_timestamp(v['date']) - for v in origin_visits] + visit_dates = [parse_timestamp(v["date"]) for v in origin_visits] i = bisect_right(visit_dates, save_request.request_date) if i != len(visit_dates): visit_date = visit_dates[i] - visit_status = origin_visits[i]['status'] - if origin_visits[i]['status'] == 'ongoing': + visit_status = origin_visits[i]["status"] + if origin_visits[i]["status"] == "ongoing": visit_date = None except Exception as exc: sentry_sdk.capture_exception(exc) @@ -168,7 +170,7 @@ # visit has been performed, mark the saving task as succeed if visit_date and visit_status is not None: save_task_status = SAVE_TASK_SUCCEED - elif visit_status == 'ongoing': + elif visit_status == "ongoing": save_task_status = SAVE_TASK_RUNNING else: time_now = datetime.now(tz=timezone.utc) @@ -185,29 +187,28 @@ visit_date = save_request.visit_date # save task still in scheduler db if task: - save_task_status = _save_task_status[task['status']] + save_task_status = _save_task_status[task["status"]] # Consider request from which a visit date has already been found # as succeeded to avoid retrieving it again if save_task_status == SAVE_TASK_SCHEDULED and visit_date: save_task_status = SAVE_TASK_SUCCEED - if save_task_status in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEED) \ - and not visit_date: + if save_task_status in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEED) and not visit_date: visit_date, _ = _get_visit_info_for_save_request(save_request) save_request.visit_date = visit_date must_save = True # Check tasks still marked as scheduled / not yet scheduled - if save_task_status in (SAVE_TASK_SCHEDULED, - SAVE_TASK_NOT_YET_SCHEDULED): + if save_task_status in (SAVE_TASK_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED): visit_date, save_task_status = _check_visit_update_status( - save_request, save_task_status) + save_request, save_task_status + ) # save task may have been archived else: save_task_status = save_request.loading_task_status - if save_task_status in (SAVE_TASK_SCHEDULED, - SAVE_TASK_NOT_YET_SCHEDULED): + if save_task_status in (SAVE_TASK_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED): visit_date, save_task_status = _check_visit_update_status( - save_request, save_task_status) + save_request, save_task_status + ) else: save_task_status = save_request.loading_task_status @@ -219,13 +220,15 @@ if must_save: save_request.save() - return {'id': save_request.id, - 'visit_type': save_request.visit_type, - 'origin_url': save_request.origin_url, - 'save_request_date': save_request.request_date.isoformat(), - 'save_request_status': save_request.status, - 'save_task_status': save_task_status, - 'visit_date': visit_date.isoformat() if visit_date else None} + return { + "id": save_request.id, + "visit_type": save_request.visit_type, + "origin_url": save_request.origin_url, + "save_request_date": save_request.request_date.isoformat(), + "save_request_status": save_request.status, + "save_task_status": save_task_status, + "visit_date": visit_date.isoformat() if visit_date else None, + } def create_save_origin_request(visit_type, origin_url): @@ -276,14 +279,16 @@ if save_request_status == SAVE_REQUEST_ACCEPTED: # create a task with high priority kwargs = { - 'priority': 'high', - 'url': origin_url, + "priority": "high", + "url": origin_url, } sor = None # get list of previously sumitted save requests - current_sors = \ - list(SaveOriginRequest.objects.filter(visit_type=visit_type, - origin_url=origin_url)) + current_sors = list( + SaveOriginRequest.objects.filter( + visit_type=visit_type, origin_url=origin_url + ) + ) can_create_task = False # if no save requests previously submitted, create the scheduler task @@ -301,11 +306,10 @@ # get the scheduler task and its status tasks = scheduler.get_tasks([sor.loading_task_id]) task = tasks[0] if tasks else None - task_status = _save_request_dict(sor, task)['save_task_status'] + task_status = _save_request_dict(sor, task)["save_task_status"] # create a new scheduler task only if the previous one has been # already executed - if task_status == SAVE_TASK_FAILED or \ - task_status == SAVE_TASK_SUCCEED: + if task_status == SAVE_TASK_FAILED or task_status == SAVE_TASK_SUCCEED: can_create_task = True sor = None else: @@ -313,43 +317,48 @@ if can_create_task: # effectively create the scheduler task - task_dict = create_oneshot_task_dict( - _visit_type_task[visit_type], **kwargs) + task_dict = create_oneshot_task_dict(_visit_type_task[visit_type], **kwargs) task = scheduler.create_tasks([task_dict])[0] # pending save request has been accepted if sor: sor.status = SAVE_REQUEST_ACCEPTED - sor.loading_task_id = task['id'] + sor.loading_task_id = task["id"] sor.save() else: - sor = SaveOriginRequest.objects.create(visit_type=visit_type, - origin_url=origin_url, - status=save_request_status, # noqa - loading_task_id=task['id']) # noqa + sor = SaveOriginRequest.objects.create( + visit_type=visit_type, + origin_url=origin_url, + status=save_request_status, + loading_task_id=task["id"], + ) # save request must be manually reviewed for acceptation elif save_request_status == SAVE_REQUEST_PENDING: # check if there is already such a save request already submitted, # no need to add it to the database in that case try: - sor = SaveOriginRequest.objects.get(visit_type=visit_type, - origin_url=origin_url, - status=save_request_status) + sor = SaveOriginRequest.objects.get( + visit_type=visit_type, origin_url=origin_url, status=save_request_status + ) # if not add it to the database except ObjectDoesNotExist: - sor = SaveOriginRequest.objects.create(visit_type=visit_type, - origin_url=origin_url, - status=save_request_status) + sor = SaveOriginRequest.objects.create( + visit_type=visit_type, origin_url=origin_url, status=save_request_status + ) # origin can not be saved as its url is blacklisted, # log the request to the database anyway else: - sor = SaveOriginRequest.objects.create(visit_type=visit_type, - origin_url=origin_url, - status=save_request_status) + sor = SaveOriginRequest.objects.create( + visit_type=visit_type, origin_url=origin_url, status=save_request_status + ) if save_request_status == SAVE_REQUEST_REJECTED: - raise ForbiddenExc(('The "save code now" request has been rejected ' - 'because the provided origin url is blacklisted.')) + raise ForbiddenExc( + ( + 'The "save code now" request has been rejected ' + "because the provided origin url is blacklisted." + ) + ) return _save_request_dict(sor, task) @@ -372,7 +381,7 @@ save_requests = [] if task_ids: tasks = scheduler.get_tasks(task_ids) - tasks = {task['id']: task for task in tasks} + tasks = {task["id"]: task for task in tasks} for sor in requests_queryset: sr_dict = _save_request_dict(sor, tasks.get(sor.loading_task_id)) save_requests.append(sr_dict) @@ -398,12 +407,14 @@ """ _check_visit_type_savable(visit_type) _check_origin_url_valid(origin_url) - sors = SaveOriginRequest.objects.filter(visit_type=visit_type, - origin_url=origin_url) + sors = SaveOriginRequest.objects.filter( + visit_type=visit_type, origin_url=origin_url + ) if sors.count() == 0: - raise NotFoundExc(('No save requests found for visit of type ' - '%s on origin with url %s.') - % (visit_type, origin_url)) + raise NotFoundExc( + ("No save requests found for visit of type " "%s on origin with url %s.") + % (visit_type, origin_url) + ) return get_save_origin_requests_from_queryset(sors) @@ -448,21 +459,21 @@ if task is None: return {} - task_run = scheduler.get_task_runs([task['id']]) + task_run = scheduler.get_task_runs([task["id"]]) task_run = task_run[0] if task_run else None if task_run is None: return {} - task_run['type'] = task['type'] - task_run['arguments'] = task['arguments'] - task_run['id'] = task_run['task'] - del task_run['task'] - del task_run['metadata'] - del task_run['started'] - - es_workers_index_url = config.get_config()['es_workers_index_url'] + task_run["type"] = task["type"] + task_run["arguments"] = task["arguments"] + task_run["id"] = task_run["task"] + del task_run["task"] + del task_run["metadata"] + del task_run["started"] + + es_workers_index_url = config.get_config()["es_workers_index_url"] if not es_workers_index_url: return task_run - es_workers_index_url += '/_search' + es_workers_index_url += "/_search" if save_request.visit_date: min_ts = save_request.visit_date @@ -473,95 +484,94 @@ min_ts = int(min_ts.timestamp()) * 1000 max_ts = int(max_ts.timestamp()) * 1000 - save_task_status = _save_task_status[task['status']] - priority = '3' if save_task_status == SAVE_TASK_FAILED else '6' + save_task_status = _save_task_status[task["status"]] + priority = "3" if save_task_status == SAVE_TASK_FAILED else "6" query = { - 'bool': { - 'must': [ - { - 'match_phrase': { - 'priority': { - 'query': priority - } - } - }, + "bool": { + "must": [ + {"match_phrase": {"priority": {"query": priority}}}, + {"match_phrase": {"swh_task_id": {"query": task_run["backend_id"]}}}, { - 'match_phrase': { - 'swh_task_id': { - 'query': task_run['backend_id'] + "range": { + "@timestamp": { + "gte": min_ts, + "lte": max_ts, + "format": "epoch_millis", } } }, - { - 'range': { - '@timestamp': { - 'gte': min_ts, - 'lte': max_ts, - 'format': 'epoch_millis' - } - } - } ] } } try: - response = requests.post(es_workers_index_url, - json={'query': query, - 'sort': ['@timestamp']}, - timeout=30) + response = requests.post( + es_workers_index_url, + json={"query": query, "sort": ["@timestamp"]}, + timeout=30, + ) results = json.loads(response.text) - if results['hits']['total']['value'] >= 1: - task_run_info = results['hits']['hits'][-1]['_source'] - if 'swh_logging_args_runtime' in task_run_info: - duration = task_run_info['swh_logging_args_runtime'] - task_run['duration'] = duration - if 'message' in task_run_info: - task_run['message'] = task_run_info['message'] - if 'swh_logging_args_name' in task_run_info: - task_run['name'] = task_run_info['swh_logging_args_name'] - elif 'swh_task_name' in task_run_info: - task_run['name'] = task_run_info['swh_task_name'] - if 'hostname' in task_run_info: - task_run['worker'] = task_run_info['hostname'] - elif 'host' in task_run_info: - task_run['worker'] = task_run_info['host'] + if results["hits"]["total"]["value"] >= 1: + task_run_info = results["hits"]["hits"][-1]["_source"] + if "swh_logging_args_runtime" in task_run_info: + duration = task_run_info["swh_logging_args_runtime"] + task_run["duration"] = duration + if "message" in task_run_info: + task_run["message"] = task_run_info["message"] + if "swh_logging_args_name" in task_run_info: + task_run["name"] = task_run_info["swh_logging_args_name"] + elif "swh_task_name" in task_run_info: + task_run["name"] = task_run_info["swh_task_name"] + if "hostname" in task_run_info: + task_run["worker"] = task_run_info["hostname"] + elif "host" in task_run_info: + task_run["worker"] = task_run_info["host"] except Exception as exc: - logger.warning('Request to Elasticsearch failed\n%s', exc) + logger.warning("Request to Elasticsearch failed\n%s", exc) sentry_sdk.capture_exception(exc) return task_run -SUBMITTED_SAVE_REQUESTS_METRIC = 'swh_web_submitted_save_requests' +SUBMITTED_SAVE_REQUESTS_METRIC = "swh_web_submitted_save_requests" _submitted_save_requests_gauge = Gauge( name=SUBMITTED_SAVE_REQUESTS_METRIC, - documentation='Number of submitted origin save requests', - labelnames=['status', 'visit_type'], - registry=SWH_WEB_METRICS_REGISTRY) + documentation="Number of submitted origin save requests", + labelnames=["status", "visit_type"], + registry=SWH_WEB_METRICS_REGISTRY, +) -ACCEPTED_SAVE_REQUESTS_METRIC = 'swh_web_accepted_save_requests' +ACCEPTED_SAVE_REQUESTS_METRIC = "swh_web_accepted_save_requests" _accepted_save_requests_gauge = Gauge( name=ACCEPTED_SAVE_REQUESTS_METRIC, - documentation='Number of accepted origin save requests', - labelnames=['load_task_status', 'visit_type'], - registry=SWH_WEB_METRICS_REGISTRY) + documentation="Number of accepted origin save requests", + labelnames=["load_task_status", "visit_type"], + registry=SWH_WEB_METRICS_REGISTRY, +) def compute_save_requests_metrics(): """Compute a couple of Prometheus metrics related to origin save requests""" - request_statuses = (SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, - SAVE_REQUEST_PENDING) - - load_task_statuses = (SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, - SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEED, - SAVE_TASK_FAILED, SAVE_TASK_RUNNING) + request_statuses = ( + SAVE_REQUEST_ACCEPTED, + SAVE_REQUEST_REJECTED, + SAVE_REQUEST_PENDING, + ) + + load_task_statuses = ( + SAVE_TASK_NOT_CREATED, + SAVE_TASK_NOT_YET_SCHEDULED, + SAVE_TASK_SCHEDULED, + SAVE_TASK_SUCCEED, + SAVE_TASK_FAILED, + SAVE_TASK_RUNNING, + ) visit_types = get_savable_visit_types() @@ -578,8 +588,9 @@ for sor in SaveOriginRequest.objects.all(): if sor.status == SAVE_REQUEST_ACCEPTED: _accepted_save_requests_gauge.labels( - load_task_status=sor.loading_task_status, - visit_type=sor.visit_type).inc() + load_task_status=sor.loading_task_status, visit_type=sor.visit_type + ).inc() _submitted_save_requests_gauge.labels( - status=sor.status, visit_type=sor.visit_type).inc() + status=sor.status, visit_type=sor.visit_type + ).inc() diff --git a/swh/web/common/origin_visits.py b/swh/web/common/origin_visits.py --- a/swh/web/common/origin_visits.py +++ b/swh/web/common/origin_visits.py @@ -36,21 +36,22 @@ from swh.web.common import service - if 'url' in origin_info: - origin_url = origin_info['url'] + if "url" in origin_info: + origin_url = origin_info["url"] else: - origin_url = service.lookup_origin(origin_info)['url'] + origin_url = service.lookup_origin(origin_info)["url"] - cache_entry_id = 'origin_visits_%s' % origin_url + cache_entry_id = "origin_visits_%s" % origin_url cache_entry = cache.get(cache_entry_id) if cache_entry: - last_visit = cache_entry[-1]['visit'] - new_visits = list(service.lookup_origin_visits(origin_url, - last_visit=last_visit)) + last_visit = cache_entry[-1]["visit"] + new_visits = list( + service.lookup_origin_visits(origin_url, last_visit=last_visit) + ) if not new_visits: last_snp = service.lookup_latest_origin_snapshot(origin_url) - if not last_snp or last_snp['id'] == cache_entry[-1]['snapshot']: + if not last_snp or last_snp["id"] == cache_entry[-1]["snapshot"]: return cache_entry origin_visits = [] @@ -58,9 +59,11 @@ per_page = service.MAX_LIMIT last_visit = None while 1: - visits = list(service.lookup_origin_visits(origin_url, - last_visit=last_visit, - per_page=per_page)) + visits = list( + service.lookup_origin_visits( + origin_url, last_visit=last_visit, per_page=per_page + ) + ) origin_visits += visits if len(visits) < per_page: break @@ -71,14 +74,13 @@ last_visit += per_page def _visit_sort_key(visit): - ts = parse_timestamp(visit['date']).timestamp() - return ts + (float(visit['visit']) / 10e3) + ts = parse_timestamp(visit["date"]).timestamp() + return ts + (float(visit["visit"]) / 10e3) for v in origin_visits: - if 'metadata' in v: - del v['metadata'] - origin_visits = [dict(t) for t in set([tuple(d.items()) - for d in origin_visits])] + if "metadata" in v: + del v["metadata"] + origin_visits = [dict(t) for t in set([tuple(d.items()) for d in origin_visits])] origin_visits = sorted(origin_visits, key=lambda v: _visit_sort_key(v)) cache.set(cache_entry_id, origin_visits) @@ -86,8 +88,7 @@ return origin_visits -def get_origin_visit(origin_info, visit_ts=None, visit_id=None, - snapshot_id=None): +def get_origin_visit(origin_info, visit_ts=None, visit_id=None, snapshot_id=None): """Function that returns information about a visit for a given origin. The visit is retrieved from a provided timestamp. @@ -110,29 +111,36 @@ visits = get_origin_visits(origin_info) if not visits: - raise NotFoundExc(('No visit associated to origin with' - ' url %s!' % origin_info['url'])) + raise NotFoundExc( + ("No visit associated to origin with" " url %s!" % origin_info["url"]) + ) if snapshot_id: - visit = [v for v in visits if v['snapshot'] == snapshot_id] + visit = [v for v in visits if v["snapshot"] == snapshot_id] if len(visit) == 0: - raise NotFoundExc(('Visit for snapshot with id %s for origin with' - ' url %s not found!' % - (snapshot_id, origin_info['url']))) + raise NotFoundExc( + ( + "Visit for snapshot with id %s for origin with" + " url %s not found!" % (snapshot_id, origin_info["url"]) + ) + ) return visit[0] if visit_id: - visit = [v for v in visits if v['visit'] == int(visit_id)] + visit = [v for v in visits if v["visit"] == int(visit_id)] if len(visit) == 0: - raise NotFoundExc(('Visit with id %s for origin with' - ' url %s not found!' % - (visit_id, origin_info['url']))) + raise NotFoundExc( + ( + "Visit with id %s for origin with" + " url %s not found!" % (visit_id, origin_info["url"]) + ) + ) return visit[0] if not visit_ts: # returns the latest full visit when no timestamp is provided for v in reversed(visits): - if v['status'] == 'full': + if v["status"] == "full": return v return visits[-1] @@ -140,20 +148,28 @@ # Find the visit with date closest to the target (in absolute value) (abs_time_delta, visit_idx) = min( - ((math.floor(parse_timestamp(visit['date']).timestamp()), i) - for (i, visit) in enumerate(visits)), - key=lambda ts_and_i: abs(ts_and_i[0] - target_visit_ts)) + ( + (math.floor(parse_timestamp(visit["date"]).timestamp()), i) + for (i, visit) in enumerate(visits) + ), + key=lambda ts_and_i: abs(ts_and_i[0] - target_visit_ts), + ) if visit_idx is not None: visit = visits[visit_idx] # If multiple visits have the same date, select the one with # the largest id. - while visit_idx < len(visits) - 1 and \ - visit['date'] == visits[visit_idx+1]['date']: + while ( + visit_idx < len(visits) - 1 + and visit["date"] == visits[visit_idx + 1]["date"] + ): visit_idx = visit_idx + 1 visit = visits[visit_idx] return visit else: - raise NotFoundExc(('Visit with timestamp %s for origin with ' - 'url %s not found!' % - (visit_ts, origin_info['url']))) + raise NotFoundExc( + ( + "Visit with timestamp %s for origin with " + "url %s not found!" % (visit_ts, origin_info["url"]) + ) + ) diff --git a/swh/web/common/query.py b/swh/web/common/query.py --- a/swh/web/common/query.py +++ b/swh/web/common/query.py @@ -12,8 +12,8 @@ from swh.web.common.exc import BadInputExc -SHA256_RE = re.compile(r'^[0-9a-f]{64}$', re.IGNORECASE) -SHA1_RE = re.compile(r'^[0-9a-f]{40}$', re.IGNORECASE) +SHA256_RE = re.compile(r"^[0-9a-f]{64}$", re.IGNORECASE) +SHA1_RE = re.compile(r"^[0-9a-f]{40}$", re.IGNORECASE) def parse_hash(q): @@ -32,22 +32,24 @@ hash value """ + def guess_algo(q): if SHA1_RE.match(q): - return 'sha1' + return "sha1" elif SHA256_RE.match(q): - return 'sha256' + return "sha256" else: - raise BadInputExc('Invalid checksum query string %s' % q) + raise BadInputExc("Invalid checksum query string %s" % q) def check_algo(algo, hex): - if (algo in {'sha1', 'sha1_git'} and not SHA1_RE.match(hex)) \ - or (algo == 'sha256' and not SHA256_RE.match(hex)): - raise BadInputExc('Invalid hash %s for algorithm %s' % (hex, algo)) + if (algo in {"sha1", "sha1_git"} and not SHA1_RE.match(hex)) or ( + algo == "sha256" and not SHA256_RE.match(hex) + ): + raise BadInputExc("Invalid hash %s for algorithm %s" % (hex, algo)) - parts = q.split(':') + parts = q.split(":") if len(parts) > 2: - raise BadInputExc('Invalid checksum query string %s' % q) + raise BadInputExc("Invalid checksum query string %s" % q) elif len(parts) == 1: parts = (guess_algo(q), q) elif len(parts) == 2: @@ -55,7 +57,7 @@ algo = parts[0] if algo not in ALGORITHMS: - raise BadInputExc('Unknown hash algorithm %s' % algo) + raise BadInputExc("Unknown hash algorithm %s" % algo) return (algo, hash_to_bytes(parts[1])) diff --git a/swh/web/common/service.py b/swh/web/common/service.py --- a/swh/web/common/service.py +++ b/swh/web/common/service.py @@ -14,9 +14,7 @@ from swh.storage.algos import diff, revisions_walker -from swh.model.identifiers import ( - CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT -) +from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT from swh.web.common import converters from swh.web.common import query from swh.web.common.exc import BadInputExc, NotFoundExc @@ -49,14 +47,14 @@ the hash is present in storage, elem['found'] = false if not. """ - hashlist = [hashutil.hash_to_bytes(elem['sha1']) for elem in hashes] + hashlist = [hashutil.hash_to_bytes(elem["sha1"]) for elem in hashes] content_missing = storage.content_missing_per_sha1(hashlist) missing = [hashutil.hash_to_hex(x) for x in content_missing] for x in hashes: - x.update({'found': True}) + x.update({"found": True}) for h in hashes: - if h['sha1'] in missing: - h['found'] = False + if h["sha1"] in missing: + h["found"] = False return hashes @@ -75,13 +73,13 @@ """ limit = min(per_page, MAX_LIMIT) - ctags = idx_storage.content_ctags_search(expression, - last_sha1=last_sha1, - limit=limit) + ctags = idx_storage.content_ctags_search( + expression, last_sha1=last_sha1, limit=limit + ) for ctag in ctags: - ctag = converters.from_swh(ctag, hashess={'id'}) - ctag['sha1'] = ctag['id'] - ctag.pop('id') + ctag = converters.from_swh(ctag, hashess={"id"}) + ctag["sha1"] = ctag["id"] + ctag.pop("id") yield ctag @@ -96,8 +94,7 @@ """ algo, hash = query.parse_hash(q) found = _first_element(storage.content_find({algo: hash})) - return {'found': converters.from_content(found), - 'algo': algo} + return {"found": converters.from_content(found), "algo": algo} def search_hash(q): @@ -111,7 +108,7 @@ """ algo, hash = query.parse_hash(q) found = _first_element(storage.content_find({algo: hash})) - return {'found': found is not None} + return {"found": found is not None} def _lookup_content_sha1(q): @@ -125,11 +122,11 @@ """ algo, hash = query.parse_hash(q) - if algo != 'sha1': + if algo != "sha1": hashes = _first_element(storage.content_find({algo: hash})) if not hashes: return None - return hashes['sha1'] + return hashes["sha1"] return hash @@ -153,7 +150,7 @@ return None for ctag in ctags: - yield converters.from_swh(ctag, hashess={'id'}) + yield converters.from_swh(ctag, hashess={"id"}) def lookup_content_filetype(q): @@ -191,7 +188,7 @@ lang = _first_element(list(idx_storage.content_language_get([sha1]))) if not lang: return None - return converters.from_swh(lang, hashess={'id'}) + return converters.from_swh(lang, hashess={"id"}) def lookup_content_license(q): @@ -211,8 +208,7 @@ if not lic: return None - return converters.from_swh({'id': sha1, 'facts': lic[sha1]}, - hashess={'id'}) + return converters.from_swh({"id": sha1, "facts": lic[sha1]}, hashess={"id"}) def lookup_origin(origin: Dict[str, str]) -> Dict[str, str]: @@ -226,22 +222,22 @@ """ origins = [origin] - if origin['url']: + if origin["url"]: # handle case when user provided an origin url with a trailing # slash while the url in storage does not have it (e.g. GitHub) - if origin['url'].endswith('/'): - origins.append({'url': origin['url'][:-1]}) + if origin["url"].endswith("/"): + origins.append({"url": origin["url"][:-1]}) # handle case when user provided an origin url without a trailing # slash while the url in storage have it (e.g. Debian source package) else: - origins.append({'url': f"{origin['url']}/"}) + origins.append({"url": f"{origin['url']}/"}) # Check all possible origin urls for orig in origins: origin_info = storage.origin_get(orig) if origin_info: break if not origin_info: - msg = 'Origin with url %s not found!' % origin['url'] + msg = "Origin with url %s not found!" % origin["url"] raise NotFoundExc(msg) return converters.from_origin(origin_info) @@ -276,26 +272,28 @@ """ if search: - results = search.origin_search(url_pattern=url_pattern, count=limit, - page_token=page_token, - with_visit=with_visit) - origins = list(map(converters.from_origin, results['results'])) - return (origins, results['next_page_token']) + results = search.origin_search( + url_pattern=url_pattern, + count=limit, + page_token=page_token, + with_visit=with_visit, + ) + origins = list(map(converters.from_origin, results["results"])) + return (origins, results["next_page_token"]) else: # Fallback to swh-storage if swh-search is not configured offset = int(page_token) if page_token else 0 regexp = True search_words = [re.escape(word) for word in url_pattern.split()] if len(search_words) >= 7: - url_pattern = '.*'.join(search_words) + url_pattern = ".*".join(search_words) else: pattern_parts = [] for permut in itertools.permutations(search_words): - pattern_parts.append('.*'.join(permut)) - url_pattern = '|'.join(pattern_parts) + pattern_parts.append(".*".join(permut)) + url_pattern = "|".join(pattern_parts) - origins = storage.origin_search(url_pattern, offset, limit, regexp, - with_visit) + origins = storage.origin_search(url_pattern, offset, limit, regexp, with_visit) origins = list(map(converters.from_origin, origins)) if len(origins) >= limit: page_token = str(offset + len(origins)) @@ -317,18 +315,19 @@ """ matches = idx_storage.origin_intrinsic_metadata_search_fulltext( - conjunction=[fulltext], limit=limit) + conjunction=[fulltext], limit=limit + ) results = [] for match in matches: - match['from_revision'] = hashutil.hash_to_hex(match['from_revision']) + match["from_revision"] = hashutil.hash_to_hex(match["from_revision"]) - origin = storage.origin_get({'url': match['id']}) - del match['id'] + origin = storage.origin_get({"url": match["id"]}) + del match["id"] result = converters.from_origin(origin) if result: - result['metadata'] = match + result["metadata"] = match results.append(result) return results @@ -347,29 +346,27 @@ """ origin_info = storage.origin_get(origin_dict) if not origin_info: - msg = 'Origin with url %s not found!' % origin_dict['url'] + msg = "Origin with url %s not found!" % origin_dict["url"] raise NotFoundExc(msg) - origins = [origin_info['url']] - match = _first_element( - idx_storage.origin_intrinsic_metadata_get(origins)) + origins = [origin_info["url"]] + match = _first_element(idx_storage.origin_intrinsic_metadata_get(origins)) result = {} if match: - result = match['metadata'] + result = match["metadata"] return result def _to_sha1_bin(sha1_hex): _, sha1_git_bin = query.parse_hash_with_algorithms_or_throws( - sha1_hex, - ['sha1'], # HACK: sha1_git really - 'Only sha1_git is supported.') + sha1_hex, ["sha1"], "Only sha1_git is supported." # HACK: sha1_git really + ) return sha1_git_bin def _check_directory_exists(sha1_git, sha1_git_bin): if len(list(storage.directory_missing([sha1_git_bin]))): - raise NotFoundExc('Directory with sha1_git %s not found' % sha1_git) + raise NotFoundExc("Directory with sha1_git %s not found" % sha1_git) def lookup_directory(sha1_git): @@ -382,7 +379,7 @@ directory information as dict. """ - empty_dir_sha1 = '4b825dc642cb6eb9a060e54bf8d69288fbee4904' + empty_dir_sha1 = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" if sha1_git == empty_dir_sha1: return [] @@ -414,11 +411,13 @@ paths = path_string.strip(os.path.sep).split(os.path.sep) queried_dir = storage.directory_entry_get_by_path( - sha1_git_bin, list(map(lambda p: p.encode('utf-8'), paths))) + sha1_git_bin, list(map(lambda p: p.encode("utf-8"), paths)) + ) if not queried_dir: - raise NotFoundExc(('Directory entry with path %s from %s not found') % - (path_string, sha1_git)) + raise NotFoundExc( + ("Directory entry with path %s from %s not found") % (path_string, sha1_git) + ) return converters.from_directory_entry(queried_dir) @@ -439,8 +438,7 @@ sha1_git_bin = _to_sha1_bin(release_sha1_git) release = _first_element(storage.release_get([sha1_git_bin])) if not release: - raise NotFoundExc('Release with sha1_git %s not found.' - % release_sha1_git) + raise NotFoundExc("Release with sha1_git %s not found." % release_sha1_git) return converters.from_release(release) @@ -480,8 +478,7 @@ sha1_git_bin = _to_sha1_bin(rev_sha1_git) revision = _first_element(storage.revision_get([sha1_git_bin])) if not revision: - raise NotFoundExc('Revision with sha1_git %s not found.' - % rev_sha1_git) + raise NotFoundExc("Revision with sha1_git %s not found." % rev_sha1_git) return converters.from_revision(revision) @@ -522,52 +519,50 @@ revision = _first_element(storage.revision_get([sha1_git_bin])) if not revision: - raise NotFoundExc('Revision with sha1_git %s not found.' - % rev_sha1_git) - if 'message' not in revision: - raise NotFoundExc('No message for revision with sha1_git %s.' - % rev_sha1_git) - res = {'message': revision['message']} + raise NotFoundExc("Revision with sha1_git %s not found." % rev_sha1_git) + if "message" not in revision: + raise NotFoundExc("No message for revision with sha1_git %s." % rev_sha1_git) + res = {"message": revision["message"]} return res def _lookup_revision_id_by(origin, branch_name, timestamp): def _get_snapshot_branch(snapshot, branch_name): - snapshot = lookup_snapshot(visit['snapshot'], - branches_from=branch_name, - branches_count=10) + snapshot = lookup_snapshot( + visit["snapshot"], branches_from=branch_name, branches_count=10 + ) branch = None - if branch_name in snapshot['branches']: - branch = snapshot['branches'][branch_name] + if branch_name in snapshot["branches"]: + branch = snapshot["branches"][branch_name] return branch if isinstance(origin, int): - origin = {'id': origin} + origin = {"id": origin} elif isinstance(origin, str): - origin = {'url': origin} + origin = {"url": origin} else: raise TypeError('"origin" must be an int or a string.') visit = get_origin_visit(origin, visit_ts=timestamp) - branch = _get_snapshot_branch(visit['snapshot'], branch_name) + branch = _get_snapshot_branch(visit["snapshot"], branch_name) rev_id = None - if branch and branch['target_type'] == 'revision': - rev_id = branch['target'] - elif branch and branch['target_type'] == 'alias': - branch = _get_snapshot_branch(visit['snapshot'], branch['target']) - if branch and branch['target_type'] == 'revision': - rev_id = branch['target'] + if branch and branch["target_type"] == "revision": + rev_id = branch["target"] + elif branch and branch["target_type"] == "alias": + branch = _get_snapshot_branch(visit["snapshot"], branch["target"]) + if branch and branch["target_type"] == "revision": + rev_id = branch["target"] if not rev_id: - raise NotFoundExc('Revision for origin %s and branch %s not found.' - % (origin.get('url'), branch_name)) + raise NotFoundExc( + "Revision for origin %s and branch %s not found." + % (origin.get("url"), branch_name) + ) return rev_id -def lookup_revision_by(origin, - branch_name='HEAD', - timestamp=None): +def lookup_revision_by(origin, branch_name="HEAD", timestamp=None): """Lookup revision by origin, snapshot branch name and visit timestamp. If branch_name is not provided, lookup using 'HEAD' as default. @@ -632,8 +627,9 @@ return lookup_revision_log(rev_id, limit) -def lookup_revision_with_context_by(origin, branch_name, timestamp, - sha1_git, limit=100): +def lookup_revision_with_context_by( + origin, branch_name, timestamp, sha1_git, limit=100 +): """Return information about revision sha1_git, limited to the sub-graph of all transitive parents of sha1_git_root. sha1_git_root being resolved through the lookup of a revision by origin, @@ -665,8 +661,10 @@ rev_root = _first_element(storage.revision_get([rev_root_id_bin])) - return (converters.from_revision(rev_root), - lookup_revision_with_context(rev_root, sha1_git, limit)) + return ( + converters.from_revision(rev_root), + lookup_revision_with_context(rev_root, sha1_git, limit), + ) def lookup_revision_with_context(sha1_git_root, sha1_git, limit=100): @@ -695,16 +693,16 @@ revision = _first_element(storage.revision_get([sha1_git_bin])) if not revision: - raise NotFoundExc('Revision %s not found' % sha1_git) + raise NotFoundExc("Revision %s not found" % sha1_git) if isinstance(sha1_git_root, str): sha1_git_root_bin = _to_sha1_bin(sha1_git_root) - revision_root = _first_element(storage.revision_get([sha1_git_root_bin])) # noqa + revision_root = _first_element(storage.revision_get([sha1_git_root_bin])) if not revision_root: - raise NotFoundExc('Revision root %s not found' % sha1_git_root) + raise NotFoundExc("Revision root %s not found" % sha1_git_root) else: - sha1_git_root_bin = sha1_git_root['id'] + sha1_git_root_bin = sha1_git_root["id"] revision_log = storage.revision_log([sha1_git_root_bin], limit) @@ -712,17 +710,18 @@ children = defaultdict(list) for rev in revision_log: - rev_id = rev['id'] + rev_id = rev["id"] parents[rev_id] = [] - for parent_id in rev['parents']: + for parent_id in rev["parents"]: parents[rev_id].append(parent_id) children[parent_id].append(rev_id) - if revision['id'] not in parents: - raise NotFoundExc('Revision %s is not an ancestor of %s' % - (sha1_git, sha1_git_root)) + if revision["id"] not in parents: + raise NotFoundExc( + "Revision %s is not an ancestor of %s" % (sha1_git, sha1_git_root) + ) - revision['children'] = children[revision['id']] + revision["children"] = children[revision["id"]] return converters.from_revision(revision) @@ -752,47 +751,51 @@ sha1_git_bin = _to_sha1_bin(sha1_git) revision = _first_element(storage.revision_get([sha1_git_bin])) if not revision: - raise NotFoundExc('Revision %s not found' % sha1_git) - dir_sha1_git_bin = revision['directory'] + raise NotFoundExc("Revision %s not found" % sha1_git) + dir_sha1_git_bin = revision["directory"] if dir_path: paths = dir_path.strip(os.path.sep).split(os.path.sep) entity = storage.directory_entry_get_by_path( - dir_sha1_git_bin, list(map(lambda p: p.encode('utf-8'), paths))) + dir_sha1_git_bin, list(map(lambda p: p.encode("utf-8"), paths)) + ) if not entity: raise NotFoundExc( "Directory or File '%s' pointed to by revision %s not found" - % (dir_path, sha1_git)) + % (dir_path, sha1_git) + ) else: - entity = {'type': 'dir', 'target': dir_sha1_git_bin} - if entity['type'] == 'dir': - directory_entries = storage.directory_ls(entity['target']) or [] - return {'type': 'dir', - 'path': '.' if not dir_path else dir_path, - 'revision': sha1_git, - 'content': list(map(converters.from_directory_entry, - directory_entries))} - elif entity['type'] == 'file': # content - content = _first_element( - storage.content_find({'sha1_git': entity['target']})) + entity = {"type": "dir", "target": dir_sha1_git_bin} + if entity["type"] == "dir": + directory_entries = storage.directory_ls(entity["target"]) or [] + return { + "type": "dir", + "path": "." if not dir_path else dir_path, + "revision": sha1_git, + "content": list(map(converters.from_directory_entry, directory_entries)), + } + elif entity["type"] == "file": # content + content = _first_element(storage.content_find({"sha1_git": entity["target"]})) if not content: - raise NotFoundExc('Content not found for revision %s' - % sha1_git) + raise NotFoundExc("Content not found for revision %s" % sha1_git) if with_data: - c = _first_element(storage.content_get([content['sha1']])) - content['data'] = c['data'] - return {'type': 'file', - 'path': '.' if not dir_path else dir_path, - 'revision': sha1_git, - 'content': converters.from_content(content)} - elif entity['type'] == 'rev': # revision - revision = next(storage.revision_get([entity['target']])) - return {'type': 'rev', - 'path': '.' if not dir_path else dir_path, - 'revision': sha1_git, - 'content': converters.from_revision(revision)} + c = _first_element(storage.content_get([content["sha1"]])) + content["data"] = c["data"] + return { + "type": "file", + "path": "." if not dir_path else dir_path, + "revision": sha1_git, + "content": converters.from_content(content), + } + elif entity["type"] == "rev": # revision + revision = next(storage.revision_get([entity["target"]])) + return { + "type": "rev", + "path": "." if not dir_path else dir_path, + "revision": sha1_git, + "content": converters.from_revision(revision), + } else: - raise NotImplementedError('Entity of type %s not implemented.' - % entity['type']) + raise NotImplementedError("Entity of type %s not implemented." % entity["type"]) def lookup_content(q): @@ -808,8 +811,10 @@ algo, hash = query.parse_hash(q) c = _first_element(storage.content_find({algo: hash})) if not c: - raise NotFoundExc('Content with %s checksum equals to %s not found!' % - (algo, hashutil.hash_to_hex(hash))) + raise NotFoundExc( + "Content with %s checksum equals to %s not found!" + % (algo, hashutil.hash_to_hex(hash)) + ) return converters.from_content(c) @@ -829,13 +834,14 @@ """ c = lookup_content(q) - content_sha1_bytes = hashutil.hash_to_bytes(c['checksums']['sha1']) + content_sha1_bytes = hashutil.hash_to_bytes(c["checksums"]["sha1"]) content = _first_element(storage.content_get([content_sha1_bytes])) if not content: algo, hash = query.parse_hash(q) - raise NotFoundExc('Bytes of content with %s checksum equals to %s ' - 'are not available!' % - (algo, hashutil.hash_to_hex(hash))) + raise NotFoundExc( + "Bytes of content with %s checksum equals to %s " + "are not available!" % (algo, hashutil.hash_to_hex(hash)) + ) return converters.from_content(content) @@ -862,8 +868,9 @@ """ limit = min(limit, MAX_LIMIT) for visit in storage.origin_visit_get( - origin_url, last_visit=last_visit, limit=limit): - visit['origin'] = origin_url + origin_url, last_visit=last_visit, limit=limit + ): + visit["origin"] = origin_url yield visit @@ -877,8 +884,7 @@ Dictionaries of origin_visit for that origin """ - visits = _lookup_origin_visits(origin, last_visit=last_visit, - limit=per_page) + visits = _lookup_origin_visits(origin, last_visit=last_visit, limit=per_page) for visit in visits: yield converters.from_origin_visit(visit) @@ -895,7 +901,8 @@ """ visit = storage.origin_visit_get_latest( - origin_url, require_snapshot=require_snapshot) + origin_url, require_snapshot=require_snapshot + ) return converters.from_origin_visit(visit) @@ -912,9 +919,10 @@ """ visit = storage.origin_visit_get_by(origin_url, visit_id) if not visit: - raise NotFoundExc('Origin %s or its visit ' - 'with id %s not found!' % (origin_url, visit_id)) - visit['origin'] = origin_url + raise NotFoundExc( + "Origin %s or its visit " "with id %s not found!" % (origin_url, visit_id) + ) + visit["origin"] = origin_url return converters.from_origin_visit(visit) @@ -930,29 +938,32 @@ """ snapshot_id_bin = _to_sha1_bin(snapshot_id) snapshot_sizes = storage.snapshot_count_branches(snapshot_id_bin) - if 'revision' not in snapshot_sizes: - snapshot_sizes['revision'] = 0 - if 'release' not in snapshot_sizes: - snapshot_sizes['release'] = 0 + if "revision" not in snapshot_sizes: + snapshot_sizes["revision"] = 0 + if "release" not in snapshot_sizes: + snapshot_sizes["release"] = 0 # adjust revision / release count for display if aliases are defined - if 'alias' in snapshot_sizes: - aliases = lookup_snapshot(snapshot_id, - branches_count=snapshot_sizes['alias'], - target_types=['alias']) - for alias in aliases['branches'].values(): - if lookup_snapshot(snapshot_id, - branches_from=alias['target'], - branches_count=1, - target_types=['revision']): - snapshot_sizes['revision'] += 1 + if "alias" in snapshot_sizes: + aliases = lookup_snapshot( + snapshot_id, branches_count=snapshot_sizes["alias"], target_types=["alias"] + ) + for alias in aliases["branches"].values(): + if lookup_snapshot( + snapshot_id, + branches_from=alias["target"], + branches_count=1, + target_types=["revision"], + ): + snapshot_sizes["revision"] += 1 else: - snapshot_sizes['release'] += 1 - del snapshot_sizes['alias'] + snapshot_sizes["release"] += 1 + del snapshot_sizes["alias"] return snapshot_sizes -def lookup_snapshot(snapshot_id, branches_from='', branches_count=1000, - target_types=None): +def lookup_snapshot( + snapshot_id, branches_from="", branches_count=1000, target_types=None +): """Return information about a snapshot, aka the list of named branches found during a specific visit of an origin. @@ -971,11 +982,11 @@ A dict filled with the snapshot content. """ snapshot_id_bin = _to_sha1_bin(snapshot_id) - snapshot = storage.snapshot_get_branches(snapshot_id_bin, - branches_from.encode(), - branches_count, target_types) + snapshot = storage.snapshot_get_branches( + snapshot_id_bin, branches_from.encode(), branches_count, target_types + ) if not snapshot: - raise NotFoundExc('Snapshot with id %s not found!' % snapshot_id) + raise NotFoundExc("Snapshot with id %s not found!" % snapshot_id) return converters.from_snapshot(snapshot) @@ -1015,37 +1026,34 @@ """ if ( - 'origin_url' in revision and - 'branch_name' in revision and - 'ts' in revision and - 'sha1_git' in revision): - return lookup_revision_with_context_by(revision['origin_url'], - revision['branch_name'], - revision['ts'], - revision['sha1_git'], - limit) - if ( - 'origin_url' in revision and - 'branch_name' in revision and - 'ts' in revision): - return lookup_revision_by(revision['origin_url'], - revision['branch_name'], - revision['ts']) - if ( - 'sha1_git_root' in revision and - 'sha1_git' in revision): - return lookup_revision_with_context(revision['sha1_git_root'], - revision['sha1_git'], - limit) - if 'sha1_git' in revision: - return lookup_revision(revision['sha1_git']) + "origin_url" in revision + and "branch_name" in revision + and "ts" in revision + and "sha1_git" in revision + ): + return lookup_revision_with_context_by( + revision["origin_url"], + revision["branch_name"], + revision["ts"], + revision["sha1_git"], + limit, + ) + if "origin_url" in revision and "branch_name" in revision and "ts" in revision: + return lookup_revision_by( + revision["origin_url"], revision["branch_name"], revision["ts"] + ) + if "sha1_git_root" in revision and "sha1_git" in revision: + return lookup_revision_with_context( + revision["sha1_git_root"], revision["sha1_git"], limit + ) + if "sha1_git" in revision: + return lookup_revision(revision["sha1_git"]) # this should not happen - raise NotImplementedError('Should not happen!') + raise NotImplementedError("Should not happen!") -def lookup_directory_through_revision(revision, path=None, - limit=100, with_data=False): +def lookup_directory_through_revision(revision, path=None, limit=100, with_data=False): """Retrieve the directory information from the revision. Args: @@ -1064,9 +1072,8 @@ rev = lookup_revision_through(revision, limit) if not rev: - raise NotFoundExc('Revision with criterion %s not found!' % revision) - return (rev['id'], - lookup_directory_with_revision(rev['id'], path, with_data)) + raise NotFoundExc("Revision with criterion %s not found!" % revision) + return (rev["id"], lookup_directory_with_revision(rev["id"], path, with_data)) def vault_cook(obj_type, obj_id, email=None): @@ -1093,16 +1100,15 @@ """ rev_sha1_git_bin = _to_sha1_bin(rev_id) - changes = diff.diff_revision(storage, rev_sha1_git_bin, - track_renaming=True) + changes = diff.diff_revision(storage, rev_sha1_git_bin, track_renaming=True) for change in changes: - change['from'] = converters.from_directory_entry(change['from']) - change['to'] = converters.from_directory_entry(change['to']) - if change['from_path']: - change['from_path'] = change['from_path'].decode('utf-8') - if change['to_path']: - change['to_path'] = change['to_path'].decode('utf-8') + change["from"] = converters.from_directory_entry(change["from"]) + change["to"] = converters.from_directory_entry(change["to"]) + if change["from_path"]: + change["from_path"] = change["from_path"].decode("utf-8") + if change["to_path"]: + change["to_path"] = change["to_path"].decode("utf-8") return changes @@ -1112,13 +1118,12 @@ Proxy class wrapping a revisions walker iterator from swh-storage and performing needed conversions. """ + def __init__(self, rev_walker_type, rev_start, *args, **kwargs): rev_start_bin = hashutil.hash_to_bytes(rev_start) - self.revisions_walker = \ - revisions_walker.get_revisions_walker(rev_walker_type, - storage, - rev_start_bin, - *args, **kwargs) + self.revisions_walker = revisions_walker.get_revisions_walker( + rev_walker_type, storage, rev_start_bin, *args, **kwargs + ) def export_state(self): return self.revisions_walker.export_state() @@ -1172,12 +1177,9 @@ BadInputExc: if the object identifier is invalid """ if object_type == CONTENT: - return lookup_content(f'sha1_git:{object_id}') + return lookup_content(f"sha1_git:{object_id}") elif object_type == DIRECTORY: - return { - 'id': object_id, - 'content': list(lookup_directory(object_id)) - } + return {"id": object_id, "content": list(lookup_directory(object_id))} elif object_type == RELEASE: return lookup_release(object_id) elif object_type == REVISION: @@ -1185,9 +1187,13 @@ elif object_type == SNAPSHOT: return lookup_snapshot(object_id) - raise BadInputExc(('Invalid swh object type! Valid types are ' - f'{CONTENT}, {DIRECTORY}, {RELEASE} ' - f'{REVISION} or {SNAPSHOT}.')) + raise BadInputExc( + ( + "Invalid swh object type! Valid types are " + f"{CONTENT}, {DIRECTORY}, {RELEASE} " + f"{REVISION} or {SNAPSHOT}." + ) + ) def lookup_missing_hashes(grouped_pids: Dict[str, List[bytes]]) -> Set[str]: @@ -1205,8 +1211,7 @@ for obj_type, obj_ids in grouped_pids.items(): if obj_type == CONTENT: - missing_hashes.append( - storage.content_missing_per_sha1_git(obj_ids)) + missing_hashes.append(storage.content_missing_per_sha1_git(obj_ids)) elif obj_type == DIRECTORY: missing_hashes.append(storage.directory_missing(obj_ids)) elif obj_type == REVISION: @@ -1216,7 +1221,8 @@ elif obj_type == SNAPSHOT: missing_hashes.append(storage.directory_missing(obj_ids)) - missing = set(map(lambda x: hashutil.hash_to_hex(x), - itertools.chain(*missing_hashes))) + missing = set( + map(lambda x: hashutil.hash_to_hex(x), itertools.chain(*missing_hashes)) + ) return missing diff --git a/swh/web/common/swh_templatetags.py b/swh/web/common/swh_templatetags.py --- a/swh/web/common/swh_templatetags.py +++ b/swh/web/common/swh_templatetags.py @@ -42,9 +42,8 @@ """ try: if 'href="' not in text: - text = re.sub(r'(http.*)', r'\1', text) - return re.sub(r'([^ <>"]+@[^ <>"]+)', - r'\1', text) + text = re.sub(r"(http.*)", r'\1', text) + return re.sub(r'([^ <>"]+@[^ <>"]+)', r'\1', text) except Exception as exc: sentry_sdk.capture_exception(exc) @@ -63,13 +62,13 @@ The text as is otherwise. """ - links = text.split(',') - ret = '' + links = text.split(",") + ret = "" for i, link in enumerate(links): - ret += re.sub(r'<(http.*)>', r'<\1>', link) + ret += re.sub(r"<(http.*)>", r'<\1>', link) # add one link per line and align them if i != len(links) - 1: - ret += '\n ' + ret += "\n " return ret diff --git a/swh/web/common/urlsindex.py b/swh/web/common/urlsindex.py --- a/swh/web/common/urlsindex.py +++ b/swh/web/common/urlsindex.py @@ -21,7 +21,7 @@ """ _urlpatterns = {} # type: Dict[str, List[django.urls.URLPattern]] - scope = 'default' + scope = "default" @classmethod def add_url_pattern(cls, url_pattern, view, view_name=None): @@ -36,14 +36,12 @@ if cls.scope not in cls._urlpatterns: cls._urlpatterns[cls.scope] = [] if view_name: - cls._urlpatterns[cls.scope].append(url(url_pattern, view, - name=view_name)) + cls._urlpatterns[cls.scope].append(url(url_pattern, view, name=view_name)) else: cls._urlpatterns[cls.scope].append(url(url_pattern, view)) @classmethod - def add_redirect_for_checksum_args(cls, view_name, url_patterns, - checksum_args): + def add_redirect_for_checksum_args(cls, view_name, url_patterns, checksum_args): """ Class method that redirects to view with lowercase checksums when upper/mixed case checksums are passed as url arguments. @@ -54,10 +52,9 @@ checksum_args (List[str]): url argument names corresponding to checksum values """ - new_view_name = view_name+'-uppercase-checksum' + new_view_name = view_name + "-uppercase-checksum" for url_pattern in url_patterns: - url_pattern_upper = url_pattern.replace('[0-9a-f]', - '[0-9a-fA-F]') + url_pattern_upper = url_pattern.replace("[0-9a-f]", "[0-9a-fA-F]") def view_redirect(request, *args, **kwargs): for checksum_arg in checksum_args: @@ -65,8 +62,7 @@ kwargs[checksum_arg] = checksum_upper.lower() return redirect(view_name, *args, **kwargs) - cls.add_url_pattern(url_pattern_upper, view_redirect, - new_view_name) + cls.add_url_pattern(url_pattern_upper, view_redirect, new_view_name) @classmethod def get_url_patterns(cls): diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py --- a/swh/web/common/utils.py +++ b/swh/web/common/utils.py @@ -34,26 +34,28 @@ SWH_WEB_METRICS_REGISTRY = CollectorRegistry(auto_describe=True) swh_object_icons = { - 'branch': 'fa fa-code-fork', - 'branches': 'fa fa-code-fork', - 'content': 'fa fa-file-text', - 'directory': 'fa fa-folder', - 'person': 'fa fa-user', - 'revisions history': 'fa fa-history', - 'release': 'fa fa-tag', - 'releases': 'fa fa-tag', - 'revision': 'octicon-git-commit', - 'snapshot': 'fa fa-camera', - 'visits': 'fa fa-calendar', + "branch": "fa fa-code-fork", + "branches": "fa fa-code-fork", + "content": "fa fa-file-text", + "directory": "fa fa-folder", + "person": "fa fa-user", + "revisions history": "fa fa-history", + "release": "fa fa-tag", + "releases": "fa fa-tag", + "revision": "octicon-git-commit", + "snapshot": "fa fa-camera", + "visits": "fa fa-calendar", } -def reverse(viewname: str, - url_args: Optional[Dict[str, Any]] = None, - query_params: Optional[QueryParameters] = None, - current_app: Optional[str] = None, - urlconf: Optional[str] = None, - request: Optional[HttpRequest] = None) -> str: +def reverse( + viewname: str, + url_args: Optional[Dict[str, Any]] = None, + query_params: Optional[QueryParameters] = None, + current_app: Optional[str] = None, + urlconf: Optional[str] = None, + request: Optional[HttpRequest] = None, +) -> str: """An override of django reverse function supporting query parameters. Args: @@ -73,17 +75,18 @@ if url_args: url_args = {k: v for k, v in url_args.items() if v is not None} - url = django_reverse(viewname, urlconf=urlconf, kwargs=url_args, - current_app=current_app) + url = django_reverse( + viewname, urlconf=urlconf, kwargs=url_args, current_app=current_app + ) if query_params: query_params = {k: v for k, v in query_params.items() if v} if query_params and len(query_params) > 0: - query_dict = QueryDict('', mutable=True) + query_dict = QueryDict("", mutable=True) for k in sorted(query_params.keys()): query_dict[k] = query_params[k] - url += ('?' + query_dict.urlencode(safe='/;:')) + url += "?" + query_dict.urlencode(safe="/;:") if request is not None: url = request.build_absolute_uri(url) @@ -101,7 +104,7 @@ datetime.datetime: datetime in UTC without timezone info """ if date.tzinfo: - return date.astimezone(tz.gettz('UTC')).replace(tzinfo=timezone.utc) + return date.astimezone(tz.gettz("UTC")).replace(tzinfo=timezone.utc) else: return date @@ -129,7 +132,8 @@ except Exception: try: return datetime.utcfromtimestamp(float(timestamp)).replace( - tzinfo=timezone.utc) + tzinfo=timezone.utc + ) except (ValueError, OverflowError) as e: raise BadInputExc(e) @@ -138,14 +142,14 @@ """Shorten the given path: for each hash present, only return the first 8 characters followed by an ellipsis""" - sha256_re = r'([0-9a-f]{8})[0-9a-z]{56}' - sha1_re = r'([0-9a-f]{8})[0-9a-f]{32}' + sha256_re = r"([0-9a-f]{8})[0-9a-z]{56}" + sha1_re = r"([0-9a-f]{8})[0-9a-f]{32}" - ret = re.sub(sha256_re, r'\1...', path) - return re.sub(sha1_re, r'\1...', ret) + ret = re.sub(sha256_re, r"\1...", path) + return re.sub(sha1_re, r"\1...", ret) -def format_utc_iso_date(iso_date, fmt='%d %B %Y, %H:%M UTC'): +def format_utc_iso_date(iso_date, fmt="%d %B %Y, %H:%M UTC"): """Turns a string representation of an ISO 8601 date string to UTC and format it into a more human readable one. @@ -188,12 +192,11 @@ """ path_info = [] if path: - sub_paths = path.strip('/').split('/') - path_from_root = '' + sub_paths = path.strip("/").split("/") + path_from_root = "" for p in sub_paths: - path_from_root += '/' + p - path_info.append({'name': p, - 'path': path_from_root.strip('/')}) + path_from_root += "/" + p + path_info.append({"name": p, "path": path_from_root.strip("/")}) return path_info @@ -212,9 +215,10 @@ parser = docutils.parsers.rst.Parser() components = (docutils.parsers.rst.Parser,) settings = docutils.frontend.OptionParser( - components=components).get_default_values() + components=components + ).get_default_values() settings.report_level = report_level - document = docutils.utils.new_document('rst-doc', settings=settings) + document = docutils.utils.new_document("rst-doc", settings=settings) parser.parse(text, document) return document @@ -229,11 +233,11 @@ Returns: str: The client IP address """ - x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR') + x_forwarded_for = request.META.get("HTTP_X_FORWARDED_FOR") if x_forwarded_for: - ip = x_forwarded_for.split(',')[0] + ip = x_forwarded_for.split(",")[0] else: - ip = request.META.get('REMOTE_ADDR') + ip = request.META.get("REMOTE_ADDR") return ip @@ -243,15 +247,15 @@ in all swh-web templates. """ config = get_config() - if request.user.is_authenticated and not hasattr(request.user, 'backend'): + if request.user.is_authenticated and not hasattr(request.user, "backend"): # To avoid django.template.base.VariableDoesNotExist errors # when rendering templates when standard Django user is logged in. - request.user.backend = 'django.contrib.auth.backends.ModelBackend' + request.user.backend = "django.contrib.auth.backends.ModelBackend" return { - 'swh_object_icons': swh_object_icons, - 'available_languages': None, - 'swh_client_config': config['client_config'], - 'oidc_enabled': bool(config['keycloak']['server_url']), + "swh_object_icons": swh_object_icons, + "available_languages": None, + "swh_client_config": config["client_config"], + "oidc_enabled": bool(config["keycloak"]["server_url"]), } @@ -262,14 +266,14 @@ """ def authenticate(self, request): - user = getattr(request._request, 'user', None) + user = getattr(request._request, "user", None) self.enforce_csrf(request) return (user, None) -def resolve_branch_alias(snapshot: Dict[str, Any], - branch: Optional[Dict[str, Any]] - ) -> Optional[Dict[str, Any]]: +def resolve_branch_alias( + snapshot: Dict[str, Any], branch: Optional[Dict[str, Any]] +) -> Optional[Dict[str, Any]]: """ Resolve branch alias in snapshot content. @@ -279,16 +283,17 @@ Returns: The real snapshot branch that got aliased. """ - while branch and branch['target_type'] == 'alias': - if branch['target'] in snapshot['branches']: - branch = snapshot['branches'][branch['target']] + while branch and branch["target_type"] == "alias": + if branch["target"] in snapshot["branches"]: + branch = snapshot["branches"][branch["target"]] else: from swh.web.common import service + snp = service.lookup_snapshot( - snapshot['id'], branches_from=branch['target'], - branches_count=1) - if snp and branch['target'] in snp['branches']: - branch = snp['branches'][branch['target']] + snapshot["id"], branches_from=branch["target"], branches_count=1 + ) + if snp and branch["target"] in snp["branches"]: + branch = snp["branches"][branch["target"]] else: branch = None return branch @@ -322,10 +327,9 @@ """ settings = { - 'initial_header_level': 2, + "initial_header_level": 2, } - pp = publish_parts(rst, writer=_HTML_WRITER, - settings_overrides=settings) + pp = publish_parts(rst, writer=_HTML_WRITER, settings_overrides=settings) return f'
{pp["html_body"]}
' @@ -339,4 +343,4 @@ Returns: The prettified HTML document """ - return BeautifulSoup(html, 'lxml').prettify() + return BeautifulSoup(html, "lxml").prettify() diff --git a/swh/web/config.py b/swh/web/config.py --- a/swh/web/config.py +++ b/swh/web/config.py @@ -18,108 +18,85 @@ SETTINGS_DIR = os.path.dirname(settings.__file__) DEFAULT_CONFIG = { - 'allowed_hosts': ('list', []), - 'search': ('dict', { - 'cls': 'remote', - 'args': { - 'url': 'http://127.0.0.1:5010/', - 'timeout': 10, - }, - }), - 'storage': ('dict', { - 'cls': 'remote', - 'url': 'http://127.0.0.1:5002/', - 'timeout': 10, - }), - 'indexer_storage': ('dict', { - 'cls': 'remote', - 'args': { - 'url': 'http://127.0.0.1:5007/', - 'timeout': 1, - } - }), - 'log_dir': ('string', '/tmp/swh/log'), - 'debug': ('bool', False), - 'serve_assets': ('bool', False), - 'host': ('string', '127.0.0.1'), - 'port': ('int', 5004), - 'secret_key': ('string', 'development key'), + "allowed_hosts": ("list", []), + "search": ( + "dict", + {"cls": "remote", "args": {"url": "http://127.0.0.1:5010/", "timeout": 10,},}, + ), + "storage": ( + "dict", + {"cls": "remote", "url": "http://127.0.0.1:5002/", "timeout": 10,}, + ), + "indexer_storage": ( + "dict", + {"cls": "remote", "args": {"url": "http://127.0.0.1:5007/", "timeout": 1,}}, + ), + "log_dir": ("string", "/tmp/swh/log"), + "debug": ("bool", False), + "serve_assets": ("bool", False), + "host": ("string", "127.0.0.1"), + "port": ("int", 5004), + "secret_key": ("string", "development key"), # do not display code highlighting for content > 1MB - 'content_display_max_size': ('int', 5 * 1024 * 1024), - 'snapshot_content_max_size': ('int', 1000), - 'throttling': ('dict', { - 'cache_uri': None, # production: memcached as cache (127.0.0.1:11211) - # development: in-memory cache so None - 'scopes': { - 'swh_api': { - 'limiter_rate': { - 'default': '120/h' + "content_display_max_size": ("int", 5 * 1024 * 1024), + "snapshot_content_max_size": ("int", 1000), + "throttling": ( + "dict", + { + "cache_uri": None, # production: memcached as cache (127.0.0.1:11211) + # development: in-memory cache so None + "scopes": { + "swh_api": { + "limiter_rate": {"default": "120/h"}, + "exempted_networks": ["127.0.0.0/8"], }, - 'exempted_networks': ['127.0.0.0/8'] - }, - 'swh_api_origin_search': { - 'limiter_rate': { - 'default': '10/m' + "swh_api_origin_search": { + "limiter_rate": {"default": "10/m"}, + "exempted_networks": ["127.0.0.0/8"], }, - 'exempted_networks': ['127.0.0.0/8'] - }, - 'swh_vault_cooking': { - 'limiter_rate': { - 'default': '120/h', - 'GET': '60/m' + "swh_vault_cooking": { + "limiter_rate": {"default": "120/h", "GET": "60/m"}, + "exempted_networks": ["127.0.0.0/8"], }, - 'exempted_networks': ['127.0.0.0/8'] - }, - 'swh_save_origin': { - 'limiter_rate': { - 'default': '120/h', - 'POST': '10/h' + "swh_save_origin": { + "limiter_rate": {"default": "120/h", "POST": "10/h"}, + "exempted_networks": ["127.0.0.0/8"], }, - 'exempted_networks': ['127.0.0.0/8'] - }, - 'swh_api_origin_visit_latest': { - 'limiter_rate': { - 'default': '700/m' + "swh_api_origin_visit_latest": { + "limiter_rate": {"default": "700/m"}, + "exempted_networks": ["127.0.0.0/8"], }, - 'exempted_networks': ['127.0.0.0/8'], }, - } - }), - 'vault': ('dict', { - 'cls': 'remote', - 'args': { - 'url': 'http://127.0.0.1:5005/', - } - }), - 'scheduler': ('dict', { - 'cls': 'remote', - 'args': { - 'url': 'http://127.0.0.1:5008/' - } - }), - 'development_db': ('string', os.path.join(SETTINGS_DIR, 'db.sqlite3')), - 'test_db': ('string', os.path.join(SETTINGS_DIR, 'testdb.sqlite3')), - 'production_db': ('string', '/var/lib/swh/web.sqlite3'), - 'deposit': ('dict', { - 'private_api_url': 'https://deposit.softwareheritage.org/1/private/', - 'private_api_user': 'swhworker', - 'private_api_password': '' - }), - 'coverage_count_origins': ('bool', False), - 'e2e_tests_mode': ('bool', False), - 'es_workers_index_url': ('string', ''), - 'history_counters_url': ('string', 'https://stats.export.softwareheritage.org/history_counters.json'), # noqa - 'client_config': ('dict', {}), - 'keycloak': ('dict', { - 'server_url': '', - 'realm_name': '' - }), + }, + ), + "vault": ("dict", {"cls": "remote", "args": {"url": "http://127.0.0.1:5005/",}}), + "scheduler": ("dict", {"cls": "remote", "args": {"url": "http://127.0.0.1:5008/"}}), + "development_db": ("string", os.path.join(SETTINGS_DIR, "db.sqlite3")), + "test_db": ("string", os.path.join(SETTINGS_DIR, "testdb.sqlite3")), + "production_db": ("string", "/var/lib/swh/web.sqlite3"), + "deposit": ( + "dict", + { + "private_api_url": "https://deposit.softwareheritage.org/1/private/", + "private_api_user": "swhworker", + "private_api_password": "", + }, + ), + "coverage_count_origins": ("bool", False), + "e2e_tests_mode": ("bool", False), + "es_workers_index_url": ("string", ""), + "history_counters_url": ( + "string", + "https://stats.export.softwareheritage.org/history_counters.json", + ), + "client_config": ("dict", {}), + "keycloak": ("dict", {"server_url": "", "realm_name": ""}), } swhweb_config = {} # type: Dict[str, Any] -def get_config(config_file='web/web'): +def get_config(config_file="web/web"): """Read the configuration file `config_file`. If an environment variable SWH_CONFIG_FILENAME is defined, this @@ -134,22 +111,22 @@ """ if not swhweb_config: - config_filename = os.environ.get('SWH_CONFIG_FILENAME') + config_filename = os.environ.get("SWH_CONFIG_FILENAME") if config_filename: config_file = config_filename cfg = config.load_named_config(config_file, DEFAULT_CONFIG) swhweb_config.update(cfg) - config.prepare_folders(swhweb_config, 'log_dir') - if swhweb_config.get('search'): - swhweb_config['search'] = get_search(**swhweb_config['search']) + config.prepare_folders(swhweb_config, "log_dir") + if swhweb_config.get("search"): + swhweb_config["search"] = get_search(**swhweb_config["search"]) else: - swhweb_config['search'] = None - swhweb_config['storage'] = get_storage(**swhweb_config['storage']) - swhweb_config['vault'] = get_vault(**swhweb_config['vault']) - swhweb_config['indexer_storage'] = \ - get_indexer_storage(**swhweb_config['indexer_storage']) - swhweb_config['scheduler'] = get_scheduler( - **swhweb_config['scheduler']) + swhweb_config["search"] = None + swhweb_config["storage"] = get_storage(**swhweb_config["storage"]) + swhweb_config["vault"] = get_vault(**swhweb_config["vault"]) + swhweb_config["indexer_storage"] = get_indexer_storage( + **swhweb_config["indexer_storage"] + ) + swhweb_config["scheduler"] = get_scheduler(**swhweb_config["scheduler"]) return swhweb_config @@ -157,32 +134,32 @@ """Return the current application's search. """ - return get_config()['search'] + return get_config()["search"] def storage(): """Return the current application's storage. """ - return get_config()['storage'] + return get_config()["storage"] def vault(): """Return the current application's vault. """ - return get_config()['vault'] + return get_config()["vault"] def indexer_storage(): """Return the current application's indexer storage. """ - return get_config()['indexer_storage'] + return get_config()["indexer_storage"] def scheduler(): """Return the current application's scheduler. """ - return get_config()['scheduler'] + return get_config()["scheduler"] diff --git a/swh/web/doc_config.py b/swh/web/doc_config.py --- a/swh/web/doc_config.py +++ b/swh/web/doc_config.py @@ -12,7 +12,7 @@ # guard to avoid ImportError when running tests through sbuild # as there is no Debian package built for swh-docs -if importlib.util.find_spec('swh.docs'): +if importlib.util.find_spec("swh.docs"): from swh.docs.sphinx.conf import setup as orig_setup @@ -21,6 +21,7 @@ Custom autodoc directive to display a docstring unindented and without function signature header. """ + objtype = "simple" # ensure the priority is lesser than the base FunctionDocumenter # to avoid side effects with autodoc processing @@ -34,16 +35,17 @@ pass -_swh_web_base_url = 'https://archive.softwareheritage.org' -_swh_web_api_endpoint = 'api' +_swh_web_base_url = "https://archive.softwareheritage.org" +_swh_web_api_endpoint = "api" _swh_web_api_version = 1 -_swh_web_api_url = '%s/%s/%s/' % (_swh_web_base_url, - _swh_web_api_endpoint, - _swh_web_api_version) +_swh_web_api_url = "%s/%s/%s/" % ( + _swh_web_base_url, + _swh_web_api_endpoint, + _swh_web_api_version, +) -_swh_web_browse_endpoint = 'browse' -_swh_web_browse_url = '%s/%s/' % (_swh_web_base_url, - _swh_web_browse_endpoint) +_swh_web_browse_endpoint = "browse" +_swh_web_browse_url = "%s/%s/" % (_swh_web_base_url, _swh_web_browse_endpoint) def setup(app): @@ -51,7 +53,7 @@ app.add_autodocumenter(SimpleDocumenter) # set an environment variable indicating we are currently # building the swh-web documentation - os.environ['SWH_WEB_DOC_BUILD'] = '1' + os.environ["SWH_WEB_DOC_BUILD"] = "1" def customize_sphinx_conf(sphinx_conf): @@ -65,9 +67,9 @@ used to build the doc. """ # fix for sphinxcontrib.httpdomain 1.3 - if 'Link' not in httpdomain.HEADER_REFS: - httpdomain.HEADER_REFS['Link'] = httpdomain.IETFRef(5988, '5') - sphinx_conf.extlinks['swh_web'] = (_swh_web_base_url + '/%s', None) - sphinx_conf.extlinks['swh_web_api'] = (_swh_web_api_url + '%s', None) - sphinx_conf.extlinks['swh_web_browse'] = (_swh_web_browse_url + '%s', None) + if "Link" not in httpdomain.HEADER_REFS: + httpdomain.HEADER_REFS["Link"] = httpdomain.IETFRef(5988, "5") + sphinx_conf.extlinks["swh_web"] = (_swh_web_base_url + "/%s", None) + sphinx_conf.extlinks["swh_web_api"] = (_swh_web_api_url + "%s", None) + sphinx_conf.extlinks["swh_web_browse"] = (_swh_web_browse_url + "%s", None) sphinx_conf.setup = setup diff --git a/swh/web/manage.py b/swh/web/manage.py --- a/swh/web/manage.py +++ b/swh/web/manage.py @@ -14,26 +14,24 @@ # the serving of static assets in development mode is handled # in swh/web/urls.py, we pass the nostatic options to runserver # in order to have gzip compression enabled. - swh_web_config['serve_assets'] = '--nostatic' in sys.argv + swh_web_config["serve_assets"] = "--nostatic" in sys.argv # import root urls module for swh-web before running the django dev server # in order to ensure it will be automatically reloaded when source files # are modified (as django autoreload feature only works if the modules are # in sys.modules) try: - from swh.web import urls # noqa + from swh.web import urls # noqa except Exception: pass try: - from django.core.management.commands.runserver import ( - Command as runserver - ) + from django.core.management.commands.runserver import Command as runserver from django.core.management import execute_from_command_line except ImportError: # The above import may fail for some other reason. Ensure that the # issue is really that Django is missing to avoid masking other # exceptions on Python 2. try: - import django # noqa + import django # noqa except ImportError: raise ImportError( "Couldn't import Django. Are you sure it's installed and " @@ -41,6 +39,6 @@ "forget to activate a virtual environment?" ) raise - runserver.default_port = swh_web_config['port'] - runserver.default_addr = swh_web_config['host'] + runserver.default_port = swh_web_config["port"] + runserver.default_addr = swh_web_config["host"] execute_from_command_line(sys.argv) diff --git a/swh/web/misc/badges.py b/swh/web/misc/badges.py --- a/swh/web/misc/badges.py +++ b/swh/web/misc/badges.py @@ -14,8 +14,14 @@ from swh.model.exceptions import ValidationError from swh.model.identifiers import ( - persistent_identifier, parse_persistent_identifier, - CONTENT, DIRECTORY, ORIGIN, RELEASE, REVISION, SNAPSHOT + persistent_identifier, + parse_persistent_identifier, + CONTENT, + DIRECTORY, + ORIGIN, + RELEASE, + REVISION, + SNAPSHOT, ) from swh.web.common import service from swh.web.common.exc import BadInputExc, NotFoundExc @@ -23,41 +29,20 @@ from swh.web.common.utils import reverse -_orange = '#f36a24' -_blue = '#0172b2' -_red = '#cd5741' +_orange = "#f36a24" +_blue = "#0172b2" +_red = "#cd5741" _swh_logo_data = None _badge_config = { - CONTENT: { - 'color': _blue, - 'title': 'Archived source file', - }, - DIRECTORY: { - 'color': _blue, - 'title': 'Archived source tree', - }, - ORIGIN: { - 'color': _orange, - 'title': 'Archived software repository', - }, - RELEASE: { - 'color': _blue, - 'title': 'Archived software release', - }, - REVISION: { - 'color': _blue, - 'title': 'Archived commit', - }, - SNAPSHOT: { - 'color': _blue, - 'title': 'Archived software repository snapshot', - }, - 'error': { - 'color': _red, - 'title': 'An error occurred when generating the badge' - } + CONTENT: {"color": _blue, "title": "Archived source file",}, + DIRECTORY: {"color": _blue, "title": "Archived source tree",}, + ORIGIN: {"color": _orange, "title": "Archived software repository",}, + RELEASE: {"color": _blue, "title": "Archived software release",}, + REVISION: {"color": _blue, "title": "Archived commit",}, + SNAPSHOT: {"color": _blue, "title": "Archived software repository snapshot",}, + "error": {"color": _red, "title": "An error occurred when generating the badge"}, } @@ -68,15 +53,20 @@ """ global _swh_logo_data if _swh_logo_data is None: - swh_logo_path = cast(str, finders.find('img/swh-logo-white.svg')) - with open(swh_logo_path, 'rb') as swh_logo_file: - _swh_logo_data = ('data:image/svg+xml;base64,%s' % - b64encode(swh_logo_file.read()).decode('ascii')) + swh_logo_path = cast(str, finders.find("img/swh-logo-white.svg")) + with open(swh_logo_path, "rb") as swh_logo_file: + _swh_logo_data = "data:image/svg+xml;base64,%s" % b64encode( + swh_logo_file.read() + ).decode("ascii") return _swh_logo_data -def _swh_badge(request: HttpRequest, object_type: str, object_id: str, - object_pid: Optional[str] = '') -> HttpResponse: +def _swh_badge( + request: HttpRequest, + object_type: str, + object_id: str, + object_pid: Optional[str] = "", +) -> HttpResponse: """ Generate a Software Heritage badge for a given object type and id. @@ -97,15 +87,14 @@ HTTP 404 status code will be returned. """ - left_text = 'error' + left_text = "error" whole_link = None try: if object_type == ORIGIN: - service.lookup_origin({'url': object_id}) - right_text = 'repository' - whole_link = reverse('browse-origin', - url_args={'origin_url': object_id}) + service.lookup_origin({"url": object_id}) + right_text = "repository" + whole_link = reverse("browse-origin", url_args={"origin_url": object_id}) else: # when pid is provided, object type and id will be parsed # from it @@ -119,30 +108,32 @@ else: right_text = persistent_identifier(object_type, object_id) - whole_link = resolve_swh_persistent_id(right_text)['browse_url'] + whole_link = resolve_swh_persistent_id(right_text)["browse_url"] # remove pid metadata if any for badge text if object_pid: - right_text = right_text.split(';')[0] + right_text = right_text.split(";")[0] # use release name for badge text if object_type == RELEASE: - right_text = 'release %s' % swh_object['name'] - left_text = 'archived' + right_text = "release %s" % swh_object["name"] + left_text = "archived" except (BadInputExc, ValidationError): right_text = f'invalid {object_type if object_type else "object"} id' - object_type = 'error' + object_type = "error" except NotFoundExc: right_text = f'{object_type if object_type else "object"} not found' - object_type = 'error' + object_type = "error" - badge_data = badge(left_text=left_text, - right_text=right_text, - right_color=_badge_config[object_type]['color'], - whole_link=request.build_absolute_uri(whole_link), - whole_title=_badge_config[object_type]['title'], - logo=_get_logo_data(), - embed_logo=True) + badge_data = badge( + left_text=left_text, + right_text=right_text, + right_color=_badge_config[object_type]["color"], + whole_link=request.build_absolute_uri(whole_link), + whole_title=_badge_config[object_type]["title"], + logo=_get_logo_data(), + embed_logo=True, + ) - return HttpResponse(badge_data, content_type='image/svg+xml') + return HttpResponse(badge_data, content_type="image/svg+xml") def _swh_badge_pid(request: HttpRequest, object_pid: str) -> HttpResponse: @@ -159,12 +150,18 @@ *image/svg+xml* containing the SVG badge data. If any error occurs, a status code of 400 will be returned. """ - return _swh_badge(request, '', '', object_pid) + return _swh_badge(request, "", "", object_pid) urlpatterns = [ - url(r'^badge/(?P[a-z]+)/(?P.+)/$', _swh_badge, - name='swh-badge'), - url(r'^badge/(?Pswh:[0-9]+:[a-z]+:[0-9a-f]+.*)/$', - _swh_badge_pid, name='swh-badge-pid'), + url( + r"^badge/(?P[a-z]+)/(?P.+)/$", + _swh_badge, + name="swh-badge", + ), + url( + r"^badge/(?Pswh:[0-9]+:[a-z]+:[0-9a-f]+.*)/$", + _swh_badge_pid, + name="swh-badge-pid", + ), ] diff --git a/swh/web/misc/coverage.py b/swh/web/misc/coverage.py --- a/swh/web/misc/coverage.py +++ b/swh/web/misc/coverage.py @@ -21,131 +21,129 @@ # TODO: Retrieve that list dynamically instead of hardcoding it _code_providers = [ { - 'provider_id': 'bitbucket', - 'provider_url': 'https://bitbucket.org/', - 'provider_logo': 'img/logos/bitbucket.png', - 'provider_info': 'public repositories from Bitbucket ' - '(continuously archived)', - 'origin_url_regexp': '^https://bitbucket.org/', - 'origin_types': 'repositories', + "provider_id": "bitbucket", + "provider_url": "https://bitbucket.org/", + "provider_logo": "img/logos/bitbucket.png", + "provider_info": "public repositories from Bitbucket " + "(continuously archived)", + "origin_url_regexp": "^https://bitbucket.org/", + "origin_types": "repositories", }, { - 'provider_id': 'cran', - 'provider_url': 'https://cran.r-project.org/', - 'provider_logo': 'img/logos/cran.svg', - 'provider_info': 'source packages from The Comprehensive R Archive ' - 'Network (continuously archived)', - 'origin_url_regexp': '^https://cran.r-project.org/', - 'origin_types': 'packages', + "provider_id": "cran", + "provider_url": "https://cran.r-project.org/", + "provider_logo": "img/logos/cran.svg", + "provider_info": "source packages from The Comprehensive R Archive " + "Network (continuously archived)", + "origin_url_regexp": "^https://cran.r-project.org/", + "origin_types": "packages", }, { - 'provider_id': 'debian', - 'provider_url': 'https://www.debian.org/', - 'provider_logo': 'img/logos/debian.png', - 'provider_info': 'source packages from the Debian distribution ' - '(continuously archived)', - 'origin_url_regexp': '^deb://', - 'origin_types': 'packages', + "provider_id": "debian", + "provider_url": "https://www.debian.org/", + "provider_logo": "img/logos/debian.png", + "provider_info": "source packages from the Debian distribution " + "(continuously archived)", + "origin_url_regexp": "^deb://", + "origin_types": "packages", }, { - 'provider_id': 'framagit', - 'provider_url': 'https://framagit.org/', - 'provider_logo': 'img/logos/framagit.png', - 'provider_info': 'public repositories from Framagit ' - '(continuously archived)', - 'origin_url_regexp': '^https://framagit.org/', - 'origin_types': 'repositories', + "provider_id": "framagit", + "provider_url": "https://framagit.org/", + "provider_logo": "img/logos/framagit.png", + "provider_info": "public repositories from Framagit " "(continuously archived)", + "origin_url_regexp": "^https://framagit.org/", + "origin_types": "repositories", }, { - 'provider_id': 'github', - 'provider_url': 'https://github.com', - 'provider_logo': 'img/logos/github.png', - 'provider_info': 'public repositories from GitHub ' - '(continuously archived)', - 'origin_url_regexp': '^https://github.com/', - 'origin_types': 'repositories', + "provider_id": "github", + "provider_url": "https://github.com", + "provider_logo": "img/logos/github.png", + "provider_info": "public repositories from GitHub " "(continuously archived)", + "origin_url_regexp": "^https://github.com/", + "origin_types": "repositories", }, { - 'provider_id': 'gitlab', - 'provider_url': 'https://gitlab.com', - 'provider_logo': 'img/logos/gitlab.svg', - 'provider_info': 'public repositories from GitLab ' - '(continuously archived)', - 'origin_url_regexp': '^https://gitlab.com/', - 'origin_types': 'repositories', + "provider_id": "gitlab", + "provider_url": "https://gitlab.com", + "provider_logo": "img/logos/gitlab.svg", + "provider_info": "public repositories from GitLab " "(continuously archived)", + "origin_url_regexp": "^https://gitlab.com/", + "origin_types": "repositories", }, { - 'provider_id': 'gitorious', - 'provider_url': 'https://gitorious.org/', - 'provider_logo': 'img/logos/gitorious.png', - 'provider_info': 'public repositories from the former Gitorious code ' - 'hosting service', - 'origin_url_regexp': '^https://gitorious.org/', - 'origin_types': 'repositories', + "provider_id": "gitorious", + "provider_url": "https://gitorious.org/", + "provider_logo": "img/logos/gitorious.png", + "provider_info": "public repositories from the former Gitorious code " + "hosting service", + "origin_url_regexp": "^https://gitorious.org/", + "origin_types": "repositories", }, { - 'provider_id': 'googlecode', - 'provider_url': 'https://code.google.com/archive/', - 'provider_logo': 'img/logos/googlecode.png', - 'provider_info': 'public repositories from the former Google Code ' - 'project hosting service', - 'origin_url_regexp': '^http.*.googlecode.com/', - 'origin_types': 'repositories', + "provider_id": "googlecode", + "provider_url": "https://code.google.com/archive/", + "provider_logo": "img/logos/googlecode.png", + "provider_info": "public repositories from the former Google Code " + "project hosting service", + "origin_url_regexp": "^http.*.googlecode.com/", + "origin_types": "repositories", }, { - 'provider_id': 'gnu', - 'provider_url': 'https://www.gnu.org', - 'provider_logo': 'img/logos/gnu.png', - 'provider_info': 'releases from the GNU project (as of August 2015)', - 'origin_url_regexp': '^rsync://ftp.gnu.org/', - 'origin_types': 'releases', + "provider_id": "gnu", + "provider_url": "https://www.gnu.org", + "provider_logo": "img/logos/gnu.png", + "provider_info": "releases from the GNU project (as of August 2015)", + "origin_url_regexp": "^rsync://ftp.gnu.org/", + "origin_types": "releases", }, { - 'provider_id': 'hal', - 'provider_url': 'https://hal.archives-ouvertes.fr/', - 'provider_logo': 'img/logos/hal.png', - 'provider_info': 'scientific software source code deposited in the ' - 'open archive HAL', - 'origin_url_regexp': '^https://hal.archives-ouvertes.fr/', - 'origin_types': 'deposits', - + "provider_id": "hal", + "provider_url": "https://hal.archives-ouvertes.fr/", + "provider_logo": "img/logos/hal.png", + "provider_info": "scientific software source code deposited in the " + "open archive HAL", + "origin_url_regexp": "^https://hal.archives-ouvertes.fr/", + "origin_types": "deposits", }, { - 'provider_id': 'inria', - 'provider_url': 'https://gitlab.inria.fr', - 'provider_logo': 'img/logos/inria.jpg', - 'provider_info': 'public repositories from Inria GitLab ' - '(continuously archived)', - 'origin_url_regexp': '^https://gitlab.inria.fr/', - 'origin_types': 'repositories', + "provider_id": "inria", + "provider_url": "https://gitlab.inria.fr", + "provider_logo": "img/logos/inria.jpg", + "provider_info": "public repositories from Inria GitLab " + "(continuously archived)", + "origin_url_regexp": "^https://gitlab.inria.fr/", + "origin_types": "repositories", }, { - 'provider_id': 'npm', - 'provider_url': 'https://www.npmjs.com/', - 'provider_logo': 'img/logos/npm.png', - 'provider_info': 'public packages from the package registry for ' - 'javascript (continuously archived)', - 'origin_url_regexp': '^https://www.npmjs.com/', - 'origin_types': 'packages', + "provider_id": "npm", + "provider_url": "https://www.npmjs.com/", + "provider_logo": "img/logos/npm.png", + "provider_info": "public packages from the package registry for " + "javascript (continuously archived)", + "origin_url_regexp": "^https://www.npmjs.com/", + "origin_types": "packages", }, { - 'provider_id': 'pypi', - 'provider_url': 'https://pypi.org', - 'provider_logo': 'img/logos/pypi.svg', - 'provider_info': 'source packages from the Python Packaging Index ' - '(continuously archived)', - 'origin_url_regexp': '^https://pypi.org/', - 'origin_types': 'packages', + "provider_id": "pypi", + "provider_url": "https://pypi.org", + "provider_logo": "img/logos/pypi.svg", + "provider_info": "source packages from the Python Packaging Index " + "(continuously archived)", + "origin_url_regexp": "^https://pypi.org/", + "origin_types": "packages", }, ] @xframe_options_exempt def _swh_coverage(request): - count_origins = get_config()['coverage_count_origins'] - return render(request, 'misc/coverage.html', - {'providers': _code_providers, - 'count_origins': count_origins}) + count_origins = get_config()["coverage_count_origins"] + return render( + request, + "misc/coverage.html", + {"providers": _code_providers, "count_origins": count_origins}, + ) @never_cache @@ -158,42 +156,43 @@ the same count query twice to the storage database. """ try: - cache = caches['db_cache'] + cache = caches["db_cache"] results = [] for code_provider in _code_providers: - provider_id = code_provider['provider_id'] - url_regexp = code_provider['origin_url_regexp'] - cache_key = '%s_origins_count' % provider_id - prev_cache_key = '%s_origins_prev_count' % provider_id + provider_id = code_provider["provider_id"] + url_regexp = code_provider["origin_url_regexp"] + cache_key = "%s_origins_count" % provider_id + prev_cache_key = "%s_origins_prev_count" % provider_id # get cached origin count origin_count = cache.get(cache_key, -2) # cache entry has expired or does not exist if origin_count == -2: # mark the origin count as processing - cache.set(cache_key, -1, timeout=10*60) + cache.set(cache_key, -1, timeout=10 * 60) # execute long count query - origin_count = service.storage.origin_count(url_regexp, - regexp=True) + origin_count = service.storage.origin_count(url_regexp, regexp=True) # cache count result - cache.set(cache_key, origin_count, timeout=24*60*60) + cache.set(cache_key, origin_count, timeout=24 * 60 * 60) cache.set(prev_cache_key, origin_count, timeout=None) # origin count is currently processing elif origin_count == -1: # return previous count if it exists origin_count = cache.get(prev_cache_key, -1) - results.append({ - 'provider_id': provider_id, - 'origin_count': origin_count, - 'origin_types': code_provider['origin_types'] - }) + results.append( + { + "provider_id": provider_id, + "origin_count": origin_count, + "origin_types": code_provider["origin_types"], + } + ) results = json.dumps(results) except Exception as exc: return handle_view_exception(request, exc, html_response=False) - return HttpResponse(results, content_type='application/json') + return HttpResponse(results, content_type="application/json") urlpatterns = [ - url(r'^coverage/$', _swh_coverage, name='swh-coverage'), - url(r'^coverage/count/$', _swh_coverage_count, name='swh-coverage-count'), + url(r"^coverage/$", _swh_coverage, name="swh-coverage"), + url(r"^coverage/count/$", _swh_coverage_count, name="swh-coverage-count"), ] diff --git a/swh/web/misc/metrics.py b/swh/web/misc/metrics.py --- a/swh/web/misc/metrics.py +++ b/swh/web/misc/metrics.py @@ -16,4 +16,5 @@ return HttpResponse( content=generate_latest(registry=SWH_WEB_METRICS_REGISTRY), - content_type=CONTENT_TYPE_LATEST) + content_type=CONTENT_TYPE_LATEST, + ) diff --git a/swh/web/misc/origin_save.py b/swh/web/misc/origin_save.py --- a/swh/web/misc/origin_save.py +++ b/swh/web/misc/origin_save.py @@ -7,9 +7,7 @@ from django.conf.urls import url from django.core.paginator import Paginator -from django.http import ( - HttpResponse, HttpResponseForbidden, HttpResponseServerError -) +from django.http import HttpResponse, HttpResponseForbidden, HttpResponseServerError from django.shortcuts import render from rest_framework.decorators import api_view, authentication_classes @@ -18,21 +16,24 @@ from swh.web.common.exc import ForbiddenExc from swh.web.common.models import SaveOriginRequest from swh.web.common.origin_save import ( - create_save_origin_request, get_savable_visit_types, - get_save_origin_requests_from_queryset + create_save_origin_request, + get_savable_visit_types, + get_save_origin_requests_from_queryset, ) from swh.web.common.utils import EnforceCSRFAuthentication def _origin_save_view(request): - return render(request, 'misc/origin-save.html', - {'heading': ('Request the saving of a software origin into ' - 'the archive')}) + return render( + request, + "misc/origin-save.html", + {"heading": ("Request the saving of a software origin into " "the archive")}, + ) -@api_view(['POST']) -@authentication_classes((EnforceCSRFAuthentication, )) -@throttle_scope('swh_save_origin') +@api_view(["POST"]) +@authentication_classes((EnforceCSRFAuthentication,)) +@throttle_scope("swh_save_origin") def _origin_save_request(request, visit_type, origin_url): """ This view is called through AJAX from the save code now form of swh-web. @@ -40,69 +41,77 @@ per user to avoid being possibly flooded by bots. """ try: - response = json.dumps(create_save_origin_request(visit_type, - origin_url), - separators=(',', ': ')) - return HttpResponse(response, content_type='application/json') + response = json.dumps( + create_save_origin_request(visit_type, origin_url), separators=(",", ": ") + ) + return HttpResponse(response, content_type="application/json") except ForbiddenExc as exc: - return HttpResponseForbidden(json.dumps({'detail': str(exc)}), - content_type='application/json') + return HttpResponseForbidden( + json.dumps({"detail": str(exc)}), content_type="application/json" + ) except Exception as exc: - return HttpResponseServerError(json.dumps({'detail': str(exc)}), - content_type='application/json') + return HttpResponseServerError( + json.dumps({"detail": str(exc)}), content_type="application/json" + ) def _visit_save_types_list(request): - visit_types = json.dumps(get_savable_visit_types(), - separators=(',', ': ')) - return HttpResponse(visit_types, content_type='application/json') + visit_types = json.dumps(get_savable_visit_types(), separators=(",", ": ")) + return HttpResponse(visit_types, content_type="application/json") def _origin_save_requests_list(request, status): - if status != 'all': + if status != "all": save_requests = SaveOriginRequest.objects.filter(status=status) else: save_requests = SaveOriginRequest.objects.all() table_data = {} - table_data['recordsTotal'] = save_requests.count() - table_data['draw'] = int(request.GET['draw']) + table_data["recordsTotal"] = save_requests.count() + table_data["draw"] = int(request.GET["draw"]) - search_value = request.GET['search[value]'] + search_value = request.GET["search[value]"] - column_order = request.GET['order[0][column]'] - field_order = request.GET['columns[%s][name]' % column_order] - order_dir = request.GET['order[0][dir]'] - if order_dir == 'desc': - field_order = '-' + field_order + column_order = request.GET["order[0][column]"] + field_order = request.GET["columns[%s][name]" % column_order] + order_dir = request.GET["order[0][dir]"] + if order_dir == "desc": + field_order = "-" + field_order save_requests = save_requests.order_by(field_order) - length = int(request.GET['length']) - page = int(request.GET['start']) / length + 1 + length = int(request.GET["length"]) + page = int(request.GET["start"]) / length + 1 save_requests = get_save_origin_requests_from_queryset(save_requests) if search_value: - save_requests = \ - [sr for sr in save_requests - if search_value.lower() in sr['save_request_status'].lower() - or search_value.lower() in sr['save_task_status'].lower() - or search_value.lower() in sr['visit_type'].lower() - or search_value.lower() in sr['origin_url'].lower()] - - table_data['recordsFiltered'] = len(save_requests) + save_requests = [ + sr + for sr in save_requests + if search_value.lower() in sr["save_request_status"].lower() + or search_value.lower() in sr["save_task_status"].lower() + or search_value.lower() in sr["visit_type"].lower() + or search_value.lower() in sr["origin_url"].lower() + ] + + table_data["recordsFiltered"] = len(save_requests) paginator = Paginator(save_requests, length) - table_data['data'] = paginator.page(page).object_list - table_data_json = json.dumps(table_data, separators=(',', ': ')) - return HttpResponse(table_data_json, content_type='application/json') + table_data["data"] = paginator.page(page).object_list + table_data_json = json.dumps(table_data, separators=(",", ": ")) + return HttpResponse(table_data_json, content_type="application/json") urlpatterns = [ - url(r'^save/$', _origin_save_view, name='origin-save'), - url(r'^save/(?P.+)/url/(?P.+)/$', - _origin_save_request, name='origin-save-request'), - url(r'^save/types/list/$', _visit_save_types_list, - name='origin-save-types-list'), - url(r'^save/requests/list/(?P.+)/$', _origin_save_requests_list, - name='origin-save-requests-list'), + url(r"^save/$", _origin_save_view, name="origin-save"), + url( + r"^save/(?P.+)/url/(?P.+)/$", + _origin_save_request, + name="origin-save-request", + ), + url(r"^save/types/list/$", _visit_save_types_list, name="origin-save-types-list"), + url( + r"^save/requests/list/(?P.+)/$", + _origin_save_requests_list, + name="origin-save-requests-list", + ), ] diff --git a/swh/web/misc/urls.py b/swh/web/misc/urls.py --- a/swh/web/misc/urls.py +++ b/swh/web/misc/urls.py @@ -19,18 +19,18 @@ def _jslicenses(request): - jslicenses_file = finders.find('jssources/jslicenses.json') + jslicenses_file = finders.find("jssources/jslicenses.json") jslicenses_data = json.load(open(jslicenses_file)) - jslicenses_data = sorted(jslicenses_data.items(), - key=lambda item: item[0].split('/')[-1]) - return render(request, "misc/jslicenses.html", - {'jslicenses_data': jslicenses_data}) + jslicenses_data = sorted( + jslicenses_data.items(), key=lambda item: item[0].split("/")[-1] + ) + return render(request, "misc/jslicenses.html", {"jslicenses_data": jslicenses_data}) def _stat_counters(request): stat = service.stat_counters() - url = get_config()['history_counters_url'] - stat_counters_history = 'null' + url = get_config()["history_counters_url"] + stat_counters_history = "null" if url: try: response = requests.get(url, timeout=5) @@ -38,46 +38,65 @@ except Exception as exc: sentry_sdk.capture_exception(exc) json_data = '{"stat_counters": %s, "stat_counters_history": %s}' % ( - json.dumps(stat), stat_counters_history) - return HttpResponse(json_data, content_type='application/json') + json.dumps(stat), + stat_counters_history, + ) + return HttpResponse(json_data, content_type="application/json") urlpatterns = [ - url(r'^', include('swh.web.misc.coverage')), - url(r'^jslicenses/$', _jslicenses, name='jslicenses'), - url(r'^', include('swh.web.misc.origin_save')), - url(r'^stat_counters/', _stat_counters, name='stat-counters'), - url(r'^', include('swh.web.misc.badges')), - url(r'^metrics/prometheus/$', prometheus_metrics, - name='metrics-prometheus'), + url(r"^", include("swh.web.misc.coverage")), + url(r"^jslicenses/$", _jslicenses, name="jslicenses"), + url(r"^", include("swh.web.misc.origin_save")), + url(r"^stat_counters/", _stat_counters, name="stat-counters"), + url(r"^", include("swh.web.misc.badges")), + url(r"^metrics/prometheus/$", prometheus_metrics, name="metrics-prometheus"), ] # when running end to end tests trough cypress, declare some extra # endpoints to provide input data for some of those tests -if get_config()['e2e_tests_mode']: +if get_config()["e2e_tests_mode"]: from swh.web.tests.views import ( get_content_code_data_by_ext, get_content_other_data_by_ext, get_content_code_data_all_exts, get_content_code_data_by_filename, get_content_code_data_all_filenames, - ) # noqa + ) + urlpatterns.append( - url(r'^tests/data/content/code/extension/(?P.+)/$', + url( + r"^tests/data/content/code/extension/(?P.+)/$", get_content_code_data_by_ext, - name='tests-content-code-extension')) + name="tests-content-code-extension", + ) + ) urlpatterns.append( - url(r'^tests/data/content/other/extension/(?P.+)/$', + url( + r"^tests/data/content/other/extension/(?P.+)/$", get_content_other_data_by_ext, - name='tests-content-other-extension')) - urlpatterns.append(url(r'^tests/data/content/code/extensions/$', - get_content_code_data_all_exts, - name='tests-content-code-extensions')) + name="tests-content-other-extension", + ) + ) + urlpatterns.append( + url( + r"^tests/data/content/code/extensions/$", + get_content_code_data_all_exts, + name="tests-content-code-extensions", + ) + ) urlpatterns.append( - url(r'^tests/data/content/code/filename/(?P.+)/$', + url( + r"^tests/data/content/code/filename/(?P.+)/$", get_content_code_data_by_filename, - name='tests-content-code-filename')) - urlpatterns.append(url(r'^tests/data/content/code/filenames/$', - get_content_code_data_all_filenames, - name='tests-content-code-filenames')) + name="tests-content-code-filename", + ) + ) + urlpatterns.append( + url( + r"^tests/data/content/code/filenames/$", + get_content_code_data_all_filenames, + name="tests-content-code-filenames", + ) + ) diff --git a/swh/web/settings/common.py b/swh/web/settings/common.py --- a/swh/web/settings/common.py +++ b/swh/web/settings/common.py @@ -24,79 +24,77 @@ # See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = swh_web_config['secret_key'] +SECRET_KEY = swh_web_config["secret_key"] # SECURITY WARNING: don't run with debug turned on in production! -DEBUG = swh_web_config['debug'] -DEBUG_PROPAGATE_EXCEPTIONS = swh_web_config['debug'] +DEBUG = swh_web_config["debug"] +DEBUG_PROPAGATE_EXCEPTIONS = swh_web_config["debug"] -ALLOWED_HOSTS = ['127.0.0.1', 'localhost'] + swh_web_config['allowed_hosts'] +ALLOWED_HOSTS = ["127.0.0.1", "localhost"] + swh_web_config["allowed_hosts"] # Application definition INSTALLED_APPS = [ - 'django.contrib.admin', - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.messages', - 'django.contrib.staticfiles', - 'rest_framework', - 'swh.web.common', - 'swh.web.api', - 'swh.web.browse', - 'webpack_loader', - 'django_js_reverse', - 'corsheaders', + "django.contrib.admin", + "django.contrib.auth", + "django.contrib.contenttypes", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.staticfiles", + "rest_framework", + "swh.web.common", + "swh.web.api", + "swh.web.browse", + "webpack_loader", + "django_js_reverse", + "corsheaders", ] MIDDLEWARE = [ - 'django.middleware.security.SecurityMiddleware', - 'django.contrib.sessions.middleware.SessionMiddleware', - 'corsheaders.middleware.CorsMiddleware', - 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'swh.web.auth.middlewares.OIDCSessionRefreshMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.middleware.clickjacking.XFrameOptionsMiddleware', - 'swh.web.common.middlewares.ThrottlingHeadersMiddleware', + "django.middleware.security.SecurityMiddleware", + "django.contrib.sessions.middleware.SessionMiddleware", + "corsheaders.middleware.CorsMiddleware", + "django.middleware.common.CommonMiddleware", + "django.middleware.csrf.CsrfViewMiddleware", + "django.contrib.auth.middleware.AuthenticationMiddleware", + "swh.web.auth.middlewares.OIDCSessionRefreshMiddleware", + "django.contrib.messages.middleware.MessageMiddleware", + "django.middleware.clickjacking.XFrameOptionsMiddleware", + "swh.web.common.middlewares.ThrottlingHeadersMiddleware", ] # Compress all assets (static ones and dynamically generated html) # served by django in a local development environment context. # In a production environment, assets compression will be directly # handled by web servers like apache or nginx. -if swh_web_config['serve_assets']: - MIDDLEWARE.insert(0, 'django.middleware.gzip.GZipMiddleware') +if swh_web_config["serve_assets"]: + MIDDLEWARE.insert(0, "django.middleware.gzip.GZipMiddleware") -ROOT_URLCONF = 'swh.web.urls' +ROOT_URLCONF = "swh.web.urls" TEMPLATES = [ { - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'DIRS': [os.path.join(PROJECT_DIR, "../templates")], - 'APP_DIRS': True, - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.debug', - 'django.template.context_processors.request', - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', - 'swh.web.common.utils.context_processor' + "BACKEND": "django.template.backends.django.DjangoTemplates", + "DIRS": [os.path.join(PROJECT_DIR, "../templates")], + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.debug", + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", + "swh.web.common.utils.context_processor", ], - 'libraries': { - 'swh_templatetags': 'swh.web.common.swh_templatetags', - }, + "libraries": {"swh_templatetags": "swh.web.common.swh_templatetags",}, }, }, ] DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': swh_web_config['development_db'], + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": swh_web_config["development_db"], } } @@ -105,26 +103,20 @@ AUTH_PASSWORD_VALIDATORS = [ { - 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', # noqa - }, - { - 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', # noqa - }, - { - 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', # noqa - }, - { - 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', # noqa + "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", # noqa }, + {"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",}, + {"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",}, + {"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",}, ] # Internationalization # https://docs.djangoproject.com/en/1.11/topics/i18n/ -LANGUAGE_CODE = 'en-us' +LANGUAGE_CODE = "en-us" -TIME_ZONE = 'UTC' +TIME_ZONE = "UTC" USE_I18N = True @@ -135,169 +127,158 @@ # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/1.11/howto/static-files/ -STATIC_URL = '/static/' +STATIC_URL = "/static/" # static folder location when swh-web has been installed with pip -STATIC_DIR = os.path.join(sys.prefix, 'share/swh/web/static') +STATIC_DIR = os.path.join(sys.prefix, "share/swh/web/static") if not os.path.exists(STATIC_DIR): # static folder location when developping swh-web - STATIC_DIR = os.path.join(PROJECT_DIR, '../../../static') + STATIC_DIR = os.path.join(PROJECT_DIR, "../../../static") STATICFILES_DIRS = [STATIC_DIR] -INTERNAL_IPS = ['127.0.0.1'] +INTERNAL_IPS = ["127.0.0.1"] throttle_rates = {} -http_requests = ['GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'OPTIONS', 'PATCH'] +http_requests = ["GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "PATCH"] -throttling = swh_web_config['throttling'] -for limiter_scope, limiter_conf in throttling['scopes'].items(): - if 'default' in limiter_conf['limiter_rate']: - throttle_rates[limiter_scope] = limiter_conf['limiter_rate']['default'] +throttling = swh_web_config["throttling"] +for limiter_scope, limiter_conf in throttling["scopes"].items(): + if "default" in limiter_conf["limiter_rate"]: + throttle_rates[limiter_scope] = limiter_conf["limiter_rate"]["default"] # for backward compatibility else: - throttle_rates[limiter_scope] = limiter_conf['limiter_rate'] + throttle_rates[limiter_scope] = limiter_conf["limiter_rate"] # register sub scopes specific for HTTP request types for http_request in http_requests: - if http_request in limiter_conf['limiter_rate']: - throttle_rates[limiter_scope + '_' + http_request.lower()] = \ - limiter_conf['limiter_rate'][http_request] + if http_request in limiter_conf["limiter_rate"]: + throttle_rates[limiter_scope + "_" + http_request.lower()] = limiter_conf[ + "limiter_rate" + ][http_request] REST_FRAMEWORK: Dict[str, Any] = { - 'DEFAULT_RENDERER_CLASSES': ( - 'rest_framework.renderers.JSONRenderer', - 'swh.web.api.renderers.YAMLRenderer', - 'rest_framework.renderers.TemplateHTMLRenderer' - ), - 'DEFAULT_THROTTLE_CLASSES': ( - 'swh.web.api.throttling.SwhWebRateThrottle', + "DEFAULT_RENDERER_CLASSES": ( + "rest_framework.renderers.JSONRenderer", + "swh.web.api.renderers.YAMLRenderer", + "rest_framework.renderers.TemplateHTMLRenderer", ), - 'DEFAULT_THROTTLE_RATES': throttle_rates, - 'DEFAULT_AUTHENTICATION_CLASSES': [ - 'rest_framework.authentication.SessionAuthentication', - 'swh.web.auth.backends.OIDCBearerTokenAuthentication', + "DEFAULT_THROTTLE_CLASSES": ("swh.web.api.throttling.SwhWebRateThrottle",), + "DEFAULT_THROTTLE_RATES": throttle_rates, + "DEFAULT_AUTHENTICATION_CLASSES": [ + "rest_framework.authentication.SessionAuthentication", + "swh.web.auth.backends.OIDCBearerTokenAuthentication", ], } LOGGING = { - 'version': 1, - 'disable_existing_loggers': False, - 'filters': { - 'require_debug_false': { - '()': 'django.utils.log.RequireDebugFalse', - }, - 'require_debug_true': { - '()': 'django.utils.log.RequireDebugTrue', - }, + "version": 1, + "disable_existing_loggers": False, + "filters": { + "require_debug_false": {"()": "django.utils.log.RequireDebugFalse",}, + "require_debug_true": {"()": "django.utils.log.RequireDebugTrue",}, }, - 'formatters': { - 'request': { - 'format': '[%(asctime)s] [%(levelname)s] %(request)s %(status_code)s', # noqa - 'datefmt': "%d/%b/%Y %H:%M:%S" + "formatters": { + "request": { + "format": "[%(asctime)s] [%(levelname)s] %(request)s %(status_code)s", + "datefmt": "%d/%b/%Y %H:%M:%S", }, - 'simple': { - 'format': '[%(asctime)s] [%(levelname)s] %(message)s', - 'datefmt': "%d/%b/%Y %H:%M:%S" + "simple": { + "format": "[%(asctime)s] [%(levelname)s] %(message)s", + "datefmt": "%d/%b/%Y %H:%M:%S", }, - 'verbose': { - 'format': '[%(asctime)s] [%(levelname)s] %(name)s.%(funcName)s:%(lineno)s - %(message)s', # noqa - 'datefmt': "%d/%b/%Y %H:%M:%S" + "verbose": { + "format": ( + "[%(asctime)s] [%(levelname)s] %(name)s.%(funcName)s:%(lineno)s " + "- %(message)s" + ), + "datefmt": "%d/%b/%Y %H:%M:%S", }, }, - 'handlers': { - 'console': { - 'level': 'DEBUG', - 'filters': ['require_debug_true'], - 'class': 'logging.StreamHandler', - 'formatter': 'simple' - }, - 'file': { - 'level': 'WARNING', - 'filters': ['require_debug_false'], - 'class': 'logging.FileHandler', - 'filename': os.path.join(swh_web_config['log_dir'], 'swh-web.log'), - 'formatter': 'simple' + "handlers": { + "console": { + "level": "DEBUG", + "filters": ["require_debug_true"], + "class": "logging.StreamHandler", + "formatter": "simple", }, - 'file_request': { - 'level': 'WARNING', - 'filters': ['require_debug_false'], - 'class': 'logging.FileHandler', - 'filename': os.path.join(swh_web_config['log_dir'], 'swh-web.log'), - 'formatter': 'request' + "file": { + "level": "WARNING", + "filters": ["require_debug_false"], + "class": "logging.FileHandler", + "filename": os.path.join(swh_web_config["log_dir"], "swh-web.log"), + "formatter": "simple", }, - 'console_verbose': { - 'level': 'DEBUG', - 'filters': ['require_debug_true'], - 'class': 'logging.StreamHandler', - 'formatter': 'verbose' + "file_request": { + "level": "WARNING", + "filters": ["require_debug_false"], + "class": "logging.FileHandler", + "filename": os.path.join(swh_web_config["log_dir"], "swh-web.log"), + "formatter": "request", }, - 'file_verbose': { - 'level': 'WARNING', - 'filters': ['require_debug_false'], - 'class': 'logging.FileHandler', - 'filename': os.path.join(swh_web_config['log_dir'], 'swh-web.log'), - 'formatter': 'verbose' + "console_verbose": { + "level": "DEBUG", + "filters": ["require_debug_true"], + "class": "logging.StreamHandler", + "formatter": "verbose", }, - 'null': { - 'class': 'logging.NullHandler', + "file_verbose": { + "level": "WARNING", + "filters": ["require_debug_false"], + "class": "logging.FileHandler", + "filename": os.path.join(swh_web_config["log_dir"], "swh-web.log"), + "formatter": "verbose", }, + "null": {"class": "logging.NullHandler",}, }, - 'loggers': { - '': { - 'handlers': ['console_verbose', 'file_verbose'], - 'level': 'DEBUG' if DEBUG else 'WARNING', + "loggers": { + "": { + "handlers": ["console_verbose", "file_verbose"], + "level": "DEBUG" if DEBUG else "WARNING", }, - 'django': { - 'handlers': ['console'], - 'level': 'DEBUG' if DEBUG else 'WARNING', - 'propagate': False, + "django": { + "handlers": ["console"], + "level": "DEBUG" if DEBUG else "WARNING", + "propagate": False, }, - 'django.request': { - 'handlers': ['file_request'], - 'level': 'DEBUG' if DEBUG else 'WARNING', - 'propagate': False, - }, - 'django.db.backends': { - 'handlers': ['null'], - 'propagate': False - }, - 'django.utils.autoreload': { - 'level': 'INFO', + "django.request": { + "handlers": ["file_request"], + "level": "DEBUG" if DEBUG else "WARNING", + "propagate": False, }, + "django.db.backends": {"handlers": ["null"], "propagate": False}, + "django.utils.autoreload": {"level": "INFO",}, }, } WEBPACK_LOADER = { - 'DEFAULT': { - 'CACHE': False, - 'BUNDLE_DIR_NAME': './', - 'STATS_FILE': os.path.join(STATIC_DIR, 'webpack-stats.json'), - 'POLL_INTERVAL': 0.1, - 'TIMEOUT': None, - 'IGNORE': ['.+\\.hot-update.js', '.+\\.map'] + "DEFAULT": { + "CACHE": False, + "BUNDLE_DIR_NAME": "./", + "STATS_FILE": os.path.join(STATIC_DIR, "webpack-stats.json"), + "POLL_INTERVAL": 0.1, + "TIMEOUT": None, + "IGNORE": [".+\\.hot-update.js", ".+\\.map"], } } -LOGIN_URL = '/admin/login/' -LOGIN_REDIRECT_URL = 'admin' +LOGIN_URL = "/admin/login/" +LOGIN_REDIRECT_URL = "admin" -SESSION_ENGINE = 'django.contrib.sessions.backends.cache' +SESSION_ENGINE = "django.contrib.sessions.backends.cache" CACHES = { - 'default': { - 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache' + "default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"}, + "db_cache": { + "BACKEND": "django.core.cache.backends.db.DatabaseCache", + "LOCATION": "swh_web_cache", }, - 'db_cache': { - 'BACKEND': 'django.core.cache.backends.db.DatabaseCache', - 'LOCATION': 'swh_web_cache', - } } JS_REVERSE_JS_MINIFY = False CORS_ORIGIN_ALLOW_ALL = True -CORS_URLS_REGEX = r'^/badge/.*$' +CORS_URLS_REGEX = r"^/badge/.*$" AUTHENTICATION_BACKENDS = [ - 'django.contrib.auth.backends.ModelBackend', - 'swh.web.auth.backends.OIDCAuthorizationCodePKCEBackend', + "django.contrib.auth.backends.ModelBackend", + "swh.web.auth.backends.OIDCAuthorizationCodePKCEBackend", ] diff --git a/swh/web/settings/development.py b/swh/web/settings/development.py --- a/swh/web/settings/development.py +++ b/swh/web/settings/development.py @@ -9,10 +9,10 @@ from django.core.cache import cache -from .common import * # noqa +from .common import * # noqa from .common import MIDDLEWARE -MIDDLEWARE += ['swh.web.common.middlewares.HtmlPrettifyMiddleware'] +MIDDLEWARE += ["swh.web.common.middlewares.HtmlPrettifyMiddleware"] AUTH_PASSWORD_VALIDATORS = [] # disable any pwd validation mechanism diff --git a/swh/web/settings/production.py b/swh/web/settings/production.py --- a/swh/web/settings/production.py +++ b/swh/web/settings/production.py @@ -7,40 +7,44 @@ Django production settings for swh-web. """ -from .common import * # noqa +from .common import * # noqa from .common import MIDDLEWARE, CACHES, WEBPACK_LOADER from .common import swh_web_config from .common import REST_FRAMEWORK # activate per-site caching -if 'GZip' in MIDDLEWARE[0]: - MIDDLEWARE.insert(1, 'django.middleware.cache.UpdateCacheMiddleware') +if "GZip" in MIDDLEWARE[0]: + MIDDLEWARE.insert(1, "django.middleware.cache.UpdateCacheMiddleware") else: - MIDDLEWARE.insert(0, 'django.middleware.cache.UpdateCacheMiddleware') - -MIDDLEWARE += ['swh.web.common.middlewares.HtmlMinifyMiddleware', - 'django.middleware.cache.FetchFromCacheMiddleware'] - -if swh_web_config.get('throttling', {}).get('cache_uri'): - CACHES.update({ - 'default': { - 'BACKEND': 'django.core.cache.backends.memcached.MemcachedCache', - 'LOCATION': swh_web_config['throttling']['cache_uri'], + MIDDLEWARE.insert(0, "django.middleware.cache.UpdateCacheMiddleware") + +MIDDLEWARE += [ + "swh.web.common.middlewares.HtmlMinifyMiddleware", + "django.middleware.cache.FetchFromCacheMiddleware", +] + +if swh_web_config.get("throttling", {}).get("cache_uri"): + CACHES.update( + { + "default": { + "BACKEND": "django.core.cache.backends.memcached.MemcachedCache", + "LOCATION": swh_web_config["throttling"]["cache_uri"], + } } - }) + ) # Setup support for proxy headers USE_X_FORWARDED_HOST = True -SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https') +SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https") # We're going through seven (or, in that case, 2) proxies thanks to Varnish -REST_FRAMEWORK['NUM_PROXIES'] = 2 +REST_FRAMEWORK["NUM_PROXIES"] = 2 DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': swh_web_config['production_db'], + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": swh_web_config["production_db"], } } -WEBPACK_LOADER['DEFAULT']['CACHE'] = True +WEBPACK_LOADER["DEFAULT"]["CACHE"] = True diff --git a/swh/web/settings/tests.py b/swh/web/settings/tests.py --- a/swh/web/settings/tests.py +++ b/swh/web/settings/tests.py @@ -22,94 +22,86 @@ swh_web_config = get_config() -swh_web_config.update({ - 'debug': False, - 'secret_key': 'test', - 'history_counters_url': '', - 'throttling': { - 'cache_uri': None, - 'scopes': { - 'swh_api': { - 'limiter_rate': { - 'default': '60/min' +swh_web_config.update( + { + "debug": False, + "secret_key": "test", + "history_counters_url": "", + "throttling": { + "cache_uri": None, + "scopes": { + "swh_api": { + "limiter_rate": {"default": "60/min"}, + "exempted_networks": ["127.0.0.0/8"], }, - 'exempted_networks': ['127.0.0.0/8'] - }, - 'swh_api_origin_search': { - 'limiter_rate': { - 'default': '100/min' + "swh_api_origin_search": { + "limiter_rate": {"default": "100/min"}, + "exempted_networks": ["127.0.0.0/8"], }, - 'exempted_networks': ['127.0.0.0/8'] - }, - 'swh_api_origin_visit_latest': { - 'limiter_rate': { - 'default': '6000/min' + "swh_api_origin_visit_latest": { + "limiter_rate": {"default": "6000/min"}, + "exempted_networks": ["127.0.0.0/8"], }, - 'exempted_networks': ['127.0.0.0/8'] - }, - 'swh_vault_cooking': { - 'limiter_rate': { - 'default': '120/h', - 'GET': '60/m' + "swh_vault_cooking": { + "limiter_rate": {"default": "120/h", "GET": "60/m"}, + "exempted_networks": ["127.0.0.0/8"], }, - 'exempted_networks': ['127.0.0.0/8'] - }, - 'swh_save_origin': { - 'limiter_rate': { - 'default': '120/h', - 'POST': '%s/h' % save_origin_rate_post, - } - }, - 'scope1': { - 'limiter_rate': { - 'default': '%s/min' % scope1_limiter_rate, - 'POST': '%s/min' % scope1_limiter_rate_post, - } - }, - 'scope2': { - 'limiter_rate': { - 'default': '%s/min' % scope2_limiter_rate, - 'POST': '%s/min' % scope2_limiter_rate_post - } - }, - 'scope3': { - 'limiter_rate': { - 'default': '%s/min' % scope3_limiter_rate, - 'POST': '%s/min' % scope3_limiter_rate_post + "swh_save_origin": { + "limiter_rate": { + "default": "120/h", + "POST": "%s/h" % save_origin_rate_post, + } }, - 'exempted_networks': ['127.0.0.0/8'] - } - } - }, - 'keycloak': { - 'server_url': 'http://localhost:8080/auth', - 'realm_name': 'SoftwareHeritage', - }, -}) + "scope1": { + "limiter_rate": { + "default": "%s/min" % scope1_limiter_rate, + "POST": "%s/min" % scope1_limiter_rate_post, + } + }, + "scope2": { + "limiter_rate": { + "default": "%s/min" % scope2_limiter_rate, + "POST": "%s/min" % scope2_limiter_rate_post, + } + }, + "scope3": { + "limiter_rate": { + "default": "%s/min" % scope3_limiter_rate, + "POST": "%s/min" % scope3_limiter_rate_post, + }, + "exempted_networks": ["127.0.0.0/8"], + }, + }, + }, + "keycloak": { + "server_url": "http://localhost:8080/auth", + "realm_name": "SoftwareHeritage", + }, + } +) -from .common import * # noqa -from .common import ALLOWED_HOSTS, LOGGING # noqa +from .common import * # noqa +from .common import ALLOWED_HOSTS, LOGGING # noqa DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': swh_web_config['test_db'], + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": swh_web_config["test_db"], } } # when not running unit tests, make the webapp fetch data from memory storages -if 'pytest' not in sys.argv[0] and 'PYTEST_XDIST_WORKER' not in os.environ: - swh_web_config.update({ - 'debug': True, - 'e2e_tests_mode': True - }) - from swh.web.tests.data import get_tests_data, override_storages # noqa +if "pytest" not in sys.argv[0] and "PYTEST_XDIST_WORKER" not in os.environ: + swh_web_config.update({"debug": True, "e2e_tests_mode": True}) + from swh.web.tests.data import get_tests_data, override_storages + test_data = get_tests_data() - override_storages(test_data['storage'], test_data['idx_storage'], - test_data['search']) + override_storages( + test_data["storage"], test_data["idx_storage"], test_data["search"] + ) else: - ALLOWED_HOSTS += ['testserver'] + ALLOWED_HOSTS += ["testserver"] # Silent DEBUG output when running unit tests - LOGGING['handlers']['console']['level'] = 'INFO' # type: ignore + LOGGING["handlers"]["console"]["level"] = "INFO" # type: ignore diff --git a/swh/web/tests/admin/test_origin_save.py b/swh/web/tests/admin/test_origin_save.py --- a/swh/web/tests/admin/test_origin_save.py +++ b/swh/web/tests/admin/test_origin_save.py @@ -10,21 +10,25 @@ from django.contrib.auth import get_user_model from swh.web.common.models import ( - SaveAuthorizedOrigin, SaveUnauthorizedOrigin, SaveOriginRequest + SaveAuthorizedOrigin, + SaveUnauthorizedOrigin, + SaveOriginRequest, ) from swh.web.common.origin_save import can_save_origin from swh.web.common.models import ( - SAVE_REQUEST_PENDING, SAVE_REQUEST_ACCEPTED, - SAVE_REQUEST_REJECTED, SAVE_TASK_NOT_YET_SCHEDULED + SAVE_REQUEST_PENDING, + SAVE_REQUEST_ACCEPTED, + SAVE_REQUEST_REJECTED, + SAVE_TASK_NOT_YET_SCHEDULED, ) from swh.web.common.utils import reverse -_user_name = 'swh-web-admin' -_user_mail = 'admin@swh-web.org' -_user_password = '..34~pounds~BEAUTY~march~63..' +_user_name = "swh-web-admin" +_user_mail = "admin@swh-web.org" +_user_password = "..34~pounds~BEAUTY~march~63.." -_authorized_origin_url = 'https://scm.ourproject.org/anonscm/' -_unauthorized_origin_url = 'https://www.softwareheritage.org/' +_authorized_origin_url = "https://scm.ourproject.org/anonscm/" +_unauthorized_origin_url = "https://www.softwareheritage.org/" pytestmark = pytest.mark.django_db @@ -41,18 +45,19 @@ def check_not_login(client, url): - login_url = reverse('login', query_params={'next': url}) + login_url = reverse("login", query_params={"next": url}) response = client.post(url) assert response.status_code == 302 assert unquote(response.url) == login_url def test_add_authorized_origin_url(client): - authorized_url = 'https://scm.adullact.net/anonscm/' + authorized_url = "https://scm.adullact.net/anonscm/" assert can_save_origin(authorized_url) == SAVE_REQUEST_PENDING - url = reverse('admin-origin-save-add-authorized-url', - url_args={'origin_url': authorized_url}) + url = reverse( + "admin-origin-save-add-authorized-url", url_args={"origin_url": authorized_url} + ) check_not_login(client, url) @@ -67,8 +72,10 @@ def test_remove_authorized_origin_url(client): assert can_save_origin(_authorized_origin_url) == SAVE_REQUEST_ACCEPTED - url = reverse('admin-origin-save-remove-authorized-url', - url_args={'origin_url': _authorized_origin_url}) + url = reverse( + "admin-origin-save-remove-authorized-url", + url_args={"origin_url": _authorized_origin_url}, + ) check_not_login(client, url) @@ -81,11 +88,13 @@ def test_add_unauthorized_origin_url(client): - unauthorized_url = 'https://www.yahoo./' + unauthorized_url = "https://www.yahoo./" assert can_save_origin(unauthorized_url) == SAVE_REQUEST_PENDING - url = reverse('admin-origin-save-add-unauthorized-url', - url_args={'origin_url': unauthorized_url}) + url = reverse( + "admin-origin-save-add-unauthorized-url", + url_args={"origin_url": unauthorized_url}, + ) check_not_login(client, url) @@ -100,8 +109,10 @@ def test_remove_unauthorized_origin_url(client): assert can_save_origin(_unauthorized_origin_url) == SAVE_REQUEST_REJECTED - url = reverse('admin-origin-save-remove-unauthorized-url', - url_args={'origin_url': _unauthorized_origin_url}) + url = reverse( + "admin-origin-save-remove-unauthorized-url", + url_args={"origin_url": _unauthorized_origin_url}, + ) check_not_login(client, url) @@ -114,37 +125,35 @@ def test_accept_pending_save_request(client, mocker): - mock_scheduler = mocker.patch('swh.web.common.origin_save.scheduler') - visit_type = 'git' - origin_url = 'https://v2.pikacode.com/bthate/botlib.git' - save_request_url = reverse('api-1-save-origin', - url_args={'visit_type': visit_type, - 'origin_url': origin_url}) - response = client.post(save_request_url, data={}, - content_type='application/x-www-form-urlencoded') + mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler") + visit_type = "git" + origin_url = "https://v2.pikacode.com/bthate/botlib.git" + save_request_url = reverse( + "api-1-save-origin", + url_args={"visit_type": visit_type, "origin_url": origin_url}, + ) + response = client.post( + save_request_url, data={}, content_type="application/x-www-form-urlencoded" + ) assert response.status_code == 200 - assert response.data['save_request_status'] == SAVE_REQUEST_PENDING + assert response.data["save_request_status"] == SAVE_REQUEST_PENDING - accept_request_url = reverse('admin-origin-save-request-accept', - url_args={'visit_type': visit_type, - 'origin_url': origin_url}) + accept_request_url = reverse( + "admin-origin-save-request-accept", + url_args={"visit_type": visit_type, "origin_url": origin_url}, + ) check_not_login(client, accept_request_url) tasks_data = [ { - 'priority': 'high', - 'policy': 'oneshot', - 'type': 'load-git', - 'arguments': { - 'kwargs': { - 'repo_url': origin_url - }, - 'args': [] - }, - 'status': 'next_run_not_scheduled', - 'id': 1, - } + "priority": "high", + "policy": "oneshot", + "type": "load-git", + "arguments": {"kwargs": {"repo_url": origin_url}, "args": []}, + "status": "next_run_not_scheduled", + "id": 1, + } ] mock_scheduler.create_tasks.return_value = tasks_data @@ -156,25 +165,28 @@ response = client.get(save_request_url) assert response.status_code == 200 - assert response.data[0]['save_request_status'] == SAVE_REQUEST_ACCEPTED - assert response.data[0]['save_task_status'] == SAVE_TASK_NOT_YET_SCHEDULED + assert response.data[0]["save_request_status"] == SAVE_REQUEST_ACCEPTED + assert response.data[0]["save_task_status"] == SAVE_TASK_NOT_YET_SCHEDULED def test_reject_pending_save_request(client, mocker): - mock_scheduler = mocker.patch('swh.web.common.origin_save.scheduler') - visit_type = 'git' - origin_url = 'https://wikipedia.com' - save_request_url = reverse('api-1-save-origin', - url_args={'visit_type': visit_type, - 'origin_url': origin_url}) - response = client.post(save_request_url, data={}, - content_type='application/x-www-form-urlencoded') + mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler") + visit_type = "git" + origin_url = "https://wikipedia.com" + save_request_url = reverse( + "api-1-save-origin", + url_args={"visit_type": visit_type, "origin_url": origin_url}, + ) + response = client.post( + save_request_url, data={}, content_type="application/x-www-form-urlencoded" + ) assert response.status_code == 200 - assert response.data['save_request_status'] == SAVE_REQUEST_PENDING + assert response.data["save_request_status"] == SAVE_REQUEST_PENDING - reject_request_url = reverse('admin-origin-save-request-reject', - url_args={'visit_type': visit_type, - 'origin_url': origin_url}) + reject_request_url = reverse( + "admin-origin-save-request-reject", + url_args={"visit_type": visit_type, "origin_url": origin_url}, + ) check_not_login(client, reject_request_url) @@ -184,17 +196,12 @@ tasks_data = [ { - 'priority': 'high', - 'policy': 'oneshot', - 'type': 'load-git', - 'arguments': { - 'kwargs': { - 'repo_url': origin_url - }, - 'args': [] - }, - 'status': 'next_run_not_scheduled', - 'id': 1, + "priority": "high", + "policy": "oneshot", + "type": "load-git", + "arguments": {"kwargs": {"repo_url": origin_url}, "args": []}, + "status": "next_run_not_scheduled", + "id": 1, } ] @@ -203,17 +210,20 @@ response = client.get(save_request_url) assert response.status_code == 200 - assert response.data[0]['save_request_status'] == SAVE_REQUEST_REJECTED + assert response.data[0]["save_request_status"] == SAVE_REQUEST_REJECTED def test_remove_save_request(client): - sor = SaveOriginRequest.objects.create(visit_type='git', - origin_url='https://wikipedia.com', # noqa - status=SAVE_REQUEST_PENDING) + sor = SaveOriginRequest.objects.create( + visit_type="git", + origin_url="https://wikipedia.com", + status=SAVE_REQUEST_PENDING, + ) assert SaveOriginRequest.objects.count() == 1 - remove_request_url = reverse('admin-origin-save-request-remove', - url_args={'sor_id': sor.id}) + remove_request_url = reverse( + "admin-origin-save-request-remove", url_args={"sor_id": sor.id} + ) check_not_login(client, remove_request_url) diff --git a/swh/web/tests/api/test_api_lookup.py b/swh/web/tests/api/test_api_lookup.py --- a/swh/web/tests/api/test_api_lookup.py +++ b/swh/web/tests/api/test_api_lookup.py @@ -10,106 +10,114 @@ def test_genericapi_lookup_nothing_is_found(): - def test_generic_lookup_fn(sha1, another_unused_arg): - assert another_unused_arg == 'unused_arg' - assert sha1 == 'sha1' + assert another_unused_arg == "unused_arg" + assert sha1 == "sha1" return None - notfound_msg = 'This will be raised because None is returned.' + notfound_msg = "This will be raised because None is returned." with pytest.raises(NotFoundExc) as e: utils.api_lookup( - test_generic_lookup_fn, 'sha1', 'unused_arg', - notfound_msg=notfound_msg) + test_generic_lookup_fn, "sha1", "unused_arg", notfound_msg=notfound_msg + ) assert e.match(notfound_msg) def test_generic_api_map_are_enriched_and_transformed_to_list(): - def test_generic_lookup_fn_1(criteria0, param0, param1): - assert criteria0 == 'something' + assert criteria0 == "something" return map(lambda x: x + 1, [1, 2, 3]) actual_result = utils.api_lookup( - test_generic_lookup_fn_1, 'something', 'some param 0', - 'some param 1', - notfound_msg=('This is not the error message you are looking for. ' - 'Move along.'), - enrich_fn=lambda x, request: x * 2) + test_generic_lookup_fn_1, + "something", + "some param 0", + "some param 1", + notfound_msg=( + "This is not the error message you are looking for. " "Move along." + ), + enrich_fn=lambda x, request: x * 2, + ) assert actual_result == [4, 6, 8] def test_generic_api_list_are_enriched_too(): - def test_generic_lookup_fn_2(crit): - assert crit == 'something' - return ['a', 'b', 'c'] + assert crit == "something" + return ["a", "b", "c"] actual_result = utils.api_lookup( - test_generic_lookup_fn_2, 'something', - notfound_msg=('Not the error message you are looking for, it is. ' - 'Along, you move!'), - enrich_fn=lambda x, request: ''. join(['=', x, '='])) + test_generic_lookup_fn_2, + "something", + notfound_msg=( + "Not the error message you are looking for, it is. " "Along, you move!" + ), + enrich_fn=lambda x, request: "".join(["=", x, "="]), + ) - assert actual_result == ['=a=', '=b=', '=c='] + assert actual_result == ["=a=", "=b=", "=c="] def test_generic_api_generator_are_enriched_and_returned_as_list(): - def test_generic_lookup_fn_3(crit): - assert crit == 'crit' + assert crit == "crit" return (i for i in [4, 5, 6]) actual_result = utils.api_lookup( - test_generic_lookup_fn_3, 'crit', - notfound_msg='Move!', - enrich_fn=lambda x, request: x - 1) + test_generic_lookup_fn_3, + "crit", + notfound_msg="Move!", + enrich_fn=lambda x, request: x - 1, + ) assert actual_result == [3, 4, 5] def test_generic_api_simple_data_are_enriched_and_returned_too(): - def test_generic_lookup_fn_4(crit): - assert crit == '123' - return {'a': 10} + assert crit == "123" + return {"a": 10} def test_enrich_data(x, request): - x['a'] = x['a'] * 10 + x["a"] = x["a"] * 10 return x actual_result = utils.api_lookup( - test_generic_lookup_fn_4, '123', - notfound_msg='Nothing to do', - enrich_fn=test_enrich_data) + test_generic_lookup_fn_4, + "123", + notfound_msg="Nothing to do", + enrich_fn=test_enrich_data, + ) - assert actual_result == {'a': 100} + assert actual_result == {"a": 100} def test_api_lookup_not_found(): - notfound_msg = 'this is the error message raised as it is None' + notfound_msg = "this is the error message raised as it is None" with pytest.raises(NotFoundExc) as e: - utils.api_lookup( - lambda x: None, 'something', - notfound_msg=notfound_msg) + utils.api_lookup(lambda x: None, "something", notfound_msg=notfound_msg) assert e.match(notfound_msg) def test_api_lookup_with_result(): actual_result = utils.api_lookup( - lambda x: x + '!', 'something', - notfound_msg='this is the error which won\'t be used here') + lambda x: x + "!", + "something", + notfound_msg="this is the error which won't be used here", + ) - assert actual_result == 'something!' + assert actual_result == "something!" def test_api_lookup_with_result_as_map(): actual_result = utils.api_lookup( - lambda x: map(lambda y: y+1, x), [1, 2, 3], - notfound_msg='this is the error which won\'t be used here') + lambda x: map(lambda y: y + 1, x), + [1, 2, 3], + notfound_msg="this is the error which won't be used here", + ) assert actual_result == [2, 3, 4] diff --git a/swh/web/tests/api/test_apidoc.py b/swh/web/tests/api/test_apidoc.py --- a/swh/web/tests/api/test_apidoc.py +++ b/swh/web/tests/api/test_apidoc.py @@ -83,294 +83,276 @@ def test_apidoc_nodoc_failure(): with pytest.raises(Exception): - @api_doc('/my/nodoc/url/') + + @api_doc("/my/nodoc/url/") def apidoc_nodoc_tester(request, arga=0, argb=0): return Response(arga + argb) -@api_route(r'/some/(?P[0-9]+)/(?P[0-9]+)/', - 'api-1-some-doc-route') -@api_doc('/some/doc/route/') +@api_route(r"/some/(?P[0-9]+)/(?P[0-9]+)/", "api-1-some-doc-route") +@api_doc("/some/doc/route/") def apidoc_route(request, myarg, myotherarg, akw=0): """ Sample doc """ - return {'result': int(myarg) + int(myotherarg) + akw} + return {"result": int(myarg) + int(myotherarg) + akw} def test_apidoc_route_doc(client): - url = reverse('api-1-some-doc-route-doc') - rv = client.get(url, HTTP_ACCEPT='text/html') + url = reverse("api-1-some-doc-route-doc") + rv = client.get(url, HTTP_ACCEPT="text/html") assert rv.status_code == 200, rv.content - assert_template_used(rv, 'api/apidoc.html') + assert_template_used(rv, "api/apidoc.html") def test_apidoc_route_fn(api_client): - url = reverse('api-1-some-doc-route', - url_args={'myarg': 1, 'myotherarg': 1}) + url = reverse("api-1-some-doc-route", url_args={"myarg": 1, "myotherarg": 1}) rv = api_client.get(url) assert rv.status_code == 200, rv.data -@api_route(r'/test/error/(?P.+)/', 'api-1-test-error') -@api_doc('/test/error/') +@api_route(r"/test/error/(?P.+)/", "api-1-test-error") +@api_doc("/test/error/") def apidoc_test_error_route(request, exc_name): """ Sample doc """ for e in _exception_http_code.keys(): if e.__name__ == exc_name: - raise e('Error') + raise e("Error") def test_apidoc_error(api_client): for exc, code in _exception_http_code.items(): - url = reverse('api-1-test-error', - url_args={'exc_name': exc.__name__}) + url = reverse("api-1-test-error", url_args={"exc_name": exc.__name__}) rv = api_client.get(url) assert rv.status_code == code, rv.data -@api_route(r'/some/full/(?P[0-9]+)/(?P[0-9]+)/', - 'api-1-some-complete-doc-route') -@api_doc('/some/complete/doc/route/') +@api_route( + r"/some/full/(?P[0-9]+)/(?P[0-9]+)/", + "api-1-some-complete-doc-route", +) +@api_doc("/some/complete/doc/route/") def apidoc_full_stack(request, myarg, myotherarg, akw=0): """ Sample doc """ - return {'result': int(myarg) + int(myotherarg) + akw} + return {"result": int(myarg) + int(myotherarg) + akw} def test_apidoc_full_stack_doc(client): - url = reverse('api-1-some-complete-doc-route-doc') - rv = client.get(url, HTTP_ACCEPT='text/html') + url = reverse("api-1-some-complete-doc-route-doc") + rv = client.get(url, HTTP_ACCEPT="text/html") assert rv.status_code == 200, rv.content - assert_template_used(rv, 'api/apidoc.html') + assert_template_used(rv, "api/apidoc.html") def test_apidoc_full_stack_fn(api_client): - url = reverse('api-1-some-complete-doc-route', - url_args={'myarg': 1, 'myotherarg': 1}) + url = reverse( + "api-1-some-complete-doc-route", url_args={"myarg": 1, "myotherarg": 1} + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data -@api_route(r'/test/post/only/', 'api-1-test-post-only', - methods=['POST']) -@api_doc('/test/post/only/') +@api_route(r"/test/post/only/", "api-1-test-post-only", methods=["POST"]) +@api_doc("/test/post/only/") def apidoc_test_post_only(request, exc_name): """ Sample doc """ - return {'result': 'some data'} + return {"result": "some data"} def test_apidoc_post_only(client): # a dedicated view accepting GET requests should have # been created to display the HTML documentation - url = reverse('api-1-test-post-only-doc') - rv = client.get(url, HTTP_ACCEPT='text/html') + url = reverse("api-1-test-post-only-doc") + rv = client.get(url, HTTP_ACCEPT="text/html") assert rv.status_code == 200, rv.content - assert_template_used(rv, 'api/apidoc.html') + assert_template_used(rv, "api/apidoc.html") def test_api_doc_parse_httpdomain(): doc_data = { - 'description': '', - 'urls': [], - 'args': [], - 'params': [], - 'resheaders': [], - 'reqheaders': [], - 'input_type': '', - 'inputs': [], - 'return_type': '', - 'returns': [], - 'status_codes': [], - 'examples': [] + "description": "", + "urls": [], + "args": [], + "params": [], + "resheaders": [], + "reqheaders": [], + "input_type": "", + "inputs": [], + "return_type": "", + "returns": [], + "status_codes": [], + "examples": [], } _parse_httpdomain_doc(_httpdomain_doc, doc_data) - expected_urls = [{ - 'rule': '/api/1/revision/ **\\(sha1_git\\)** /', - 'methods': ['GET', 'HEAD', 'OPTIONS'] - }] + expected_urls = [ + { + "rule": "/api/1/revision/ **\\(sha1_git\\)** /", + "methods": ["GET", "HEAD", "OPTIONS"], + } + ] - assert 'urls' in doc_data - assert doc_data['urls'] == expected_urls + assert "urls" in doc_data + assert doc_data["urls"] == expected_urls - expected_description = ('Get information about a revision in the archive. ' - 'Revisions are identified by **sha1** checksums, ' - 'compatible with Git commit identifiers. See ' - '**swh.model.identifiers.revision_identifier** in ' - 'our data model module for details about how they ' - 'are computed.') + expected_description = ( + "Get information about a revision in the archive. " + "Revisions are identified by **sha1** checksums, " + "compatible with Git commit identifiers. See " + "**swh.model.identifiers.revision_identifier** in " + "our data model module for details about how they " + "are computed." + ) - assert 'description' in doc_data - assert doc_data['description'] == expected_description + assert "description" in doc_data + assert doc_data["description"] == expected_description - expected_args = [{ - 'name': 'sha1_git', - 'type': 'string', - 'doc': ('hexadecimal representation of the revision ' - '**sha1_git** identifier') - }] + expected_args = [ + { + "name": "sha1_git", + "type": "string", + "doc": ( + "hexadecimal representation of the revision " "**sha1_git** identifier" + ), + } + ] - assert 'args' in doc_data - assert doc_data['args'] == expected_args + assert "args" in doc_data + assert doc_data["args"] == expected_args expected_params = [] - assert 'params' in doc_data - assert doc_data['params'] == expected_params + assert "params" in doc_data + assert doc_data["params"] == expected_params - expected_reqheaders = [{ - 'doc': ('the requested response content type, either ' - '``application/json`` (default) or ``application/yaml``'), - 'name': 'Accept' - }] + expected_reqheaders = [ + { + "doc": ( + "the requested response content type, either " + "``application/json`` (default) or ``application/yaml``" + ), + "name": "Accept", + } + ] - assert 'reqheaders' in doc_data - assert doc_data['reqheaders'] == expected_reqheaders + assert "reqheaders" in doc_data + assert doc_data["reqheaders"] == expected_reqheaders - expected_resheaders = [{ - 'doc': 'this depends on **Accept** header of request', - 'name': 'Content-Type' - }] + expected_resheaders = [ + {"doc": "this depends on **Accept** header of request", "name": "Content-Type"} + ] - assert 'resheaders' in doc_data - assert doc_data['resheaders'] == expected_resheaders + assert "resheaders" in doc_data + assert doc_data["resheaders"] == expected_resheaders expected_statuscodes = [ - { - 'code': '200', - 'doc': 'no error' - }, - { - 'code': '400', - 'doc': 'an invalid **sha1_git** value has been provided' - }, - { - 'code': '404', - 'doc': 'requested revision can not be found in the archive' - } + {"code": "200", "doc": "no error"}, + {"code": "400", "doc": "an invalid **sha1_git** value has been provided"}, + {"code": "404", "doc": "requested revision can not be found in the archive"}, ] - assert 'status_codes' in doc_data - assert doc_data['status_codes'] == expected_statuscodes + assert "status_codes" in doc_data + assert doc_data["status_codes"] == expected_statuscodes - expected_input_type = 'object' + expected_input_type = "object" - assert 'input_type' in doc_data - assert doc_data['input_type'] == expected_input_type + assert "input_type" in doc_data + assert doc_data["input_type"] == expected_input_type expected_inputs = [ - { - 'name': 'n', - 'type': 'int', - 'doc': 'sample input integer' - }, - { - 'name': 's', - 'type': 'string', - 'doc': 'sample input string' - }, - { - 'name': 'a', - 'type': 'array', - 'doc': 'sample input array' - }, + {"name": "n", "type": "int", "doc": "sample input integer"}, + {"name": "s", "type": "string", "doc": "sample input string"}, + {"name": "a", "type": "array", "doc": "sample input array"}, ] - assert 'inputs' in doc_data - assert doc_data['inputs'] == expected_inputs + assert "inputs" in doc_data + assert doc_data["inputs"] == expected_inputs - expected_return_type = 'object' + expected_return_type = "object" - assert 'return_type' in doc_data - assert doc_data['return_type'] == expected_return_type + assert "return_type" in doc_data + assert doc_data["return_type"] == expected_return_type expected_returns = [ { - 'name': 'author', - 'type': 'object', - 'doc': 'information about the author of the revision' - }, - { - 'name': 'committer', - 'type': 'object', - 'doc': 'information about the committer of the revision' + "name": "author", + "type": "object", + "doc": "information about the author of the revision", }, { - 'name': 'committer_date', - 'type': 'string', - 'doc': 'ISO representation of the commit date (in UTC)' + "name": "committer", + "type": "object", + "doc": "information about the committer of the revision", }, { - 'name': 'date', - 'type': 'string', - 'doc': 'ISO representation of the revision date (in UTC)' + "name": "committer_date", + "type": "string", + "doc": "ISO representation of the commit date (in UTC)", }, { - 'name': 'directory', - 'type': 'string', - 'doc': 'the unique identifier that revision points to' + "name": "date", + "type": "string", + "doc": "ISO representation of the revision date (in UTC)", }, { - 'name': 'directory_url', - 'type': 'string', - 'doc': ('link to `/api/1/directory/ `_ ' - 'to get information about the directory associated to ' - 'the revision') + "name": "directory", + "type": "string", + "doc": "the unique identifier that revision points to", }, { - 'name': 'id', - 'type': 'string', - 'doc': 'the revision unique identifier' + "name": "directory_url", + "type": "string", + "doc": ( + "link to `/api/1/directory/ `_ " + "to get information about the directory associated to " + "the revision" + ), }, + {"name": "id", "type": "string", "doc": "the revision unique identifier"}, { - 'name': 'merge', - 'type': 'boolean', - 'doc': 'whether or not the revision corresponds to a merge commit' + "name": "merge", + "type": "boolean", + "doc": "whether or not the revision corresponds to a merge commit", }, { - 'name': 'message', - 'type': 'string', - 'doc': 'the message associated to the revision' + "name": "message", + "type": "string", + "doc": "the message associated to the revision", }, { - 'name': 'parents', - 'type': 'array', - 'doc': ('the parents of the revision, i.e. the previous revisions ' - 'that head directly to it, each entry of that array ' - 'contains an unique parent revision identifier but also a ' - 'link to `/api/1/revision/ `_ ' - 'to get more information about it') + "name": "parents", + "type": "array", + "doc": ( + "the parents of the revision, i.e. the previous revisions " + "that head directly to it, each entry of that array " + "contains an unique parent revision identifier but also a " + "link to `/api/1/revision/ `_ " + "to get more information about it" + ), }, - { - 'name': 'type', - 'type': 'string', - 'doc': 'the type of the revision' - } + {"name": "type", "type": "string", "doc": "the type of the revision"}, ] - assert 'returns' in doc_data - assert doc_data['returns'] == expected_returns + assert "returns" in doc_data + assert doc_data["returns"] == expected_returns - expected_examples = [ - '/api/1/revision/aafb16d69fd30ff58afdd69036a26047f3aebdc6/' - ] + expected_examples = ["/api/1/revision/aafb16d69fd30ff58afdd69036a26047f3aebdc6/"] - assert 'examples' in doc_data - assert doc_data['examples'] == expected_examples + assert "examples" in doc_data + assert doc_data["examples"] == expected_examples -@api_route(r'/post/endpoint/', 'api-1-post-endpoint', - methods=['POST']) -@api_doc('/post/endpoint/') +@api_route(r"/post/endpoint/", "api-1-post-endpoint", methods=["POST"]) +@api_doc("/post/endpoint/") def apidoc_test_post_endpoint(request): """ .. http:post:: /api/1/post/endpoint/ @@ -389,63 +371,69 @@ def test_apidoc_input_output_doc(client): - url = reverse('api-1-post-endpoint-doc') - rv = client.get(url, HTTP_ACCEPT='text/html') + url = reverse("api-1-post-endpoint-doc") + rv = client.get(url, HTTP_ACCEPT="text/html") assert rv.status_code == 200, rv.content - assert_template_used(rv, 'api/apidoc.html') - - input_html_doc = textwrap.indent(( - '
\n' - '
\n' - ' array\n' - '
\n' - '
\n' - '

\n' - ' Input array of pids\n' - '

\n' - '
\n' - '
\n' - ), ' '*7) - - output_html_doc = textwrap.indent(( - '
\n' - '
\n' - ' object\n' - '
\n' - '
\n' - '

\n' - ' an object containing the following keys:\n' - '

\n' - '
\n' - '
\n' - '
    \n' - '
  • \n' - '

    \n' - ' \n' - ' <swh_pid> (object)\n' - ' \n' - ' : an object whose keys are input persistent identifiers' - ' and values objects with the following keys:\n' - '

    \n' - '
    \n' - '
      \n' - '
    • \n' - '

      \n' - ' \n' - ' known (bool)\n' - ' \n' - ' : whether the object was found\n' - '

      \n' - '
    • \n' - '
    \n' - '
    \n' - '
  • \n' - '
\n' - '
\n' - '
\n' - '
\n' - '
\n' - ), ' '*7) + assert_template_used(rv, "api/apidoc.html") + + input_html_doc = textwrap.indent( + ( + '
\n' + '
\n' + " array\n" + "
\n" + '
\n' + "

\n" + " Input array of pids\n" + "

\n" + "
\n" + "
\n" + ), + " " * 7, + ) + + output_html_doc = textwrap.indent( + ( + '
\n' + '
\n' + " object\n" + "
\n" + '
\n' + "

\n" + " an object containing the following keys:\n" + "

\n" + '
\n' + "
\n" + "
    \n" + "
  • \n" + "

    \n" + " \n" + " <swh_pid> (object)\n" + " \n" + " : an object whose keys are input persistent identifiers" + " and values objects with the following keys:\n" + "

    \n" + "
    \n" + '
      \n' + "
    • \n" + "

      \n" + " \n" + " known (bool)\n" + " \n" + " : whether the object was found\n" + "

      \n" + "
    • \n" + "
    \n" + "
    \n" + "
  • \n" + "
\n" + "
\n" + "
\n" + "
\n" + "
\n" + ), + " " * 7, + ) html = prettify_html(rv.content) @@ -453,8 +441,8 @@ assert output_html_doc in html -@api_route(r'/endpoint/links/in/doc/', 'api-1-endpoint-links-in-doc') -@api_doc('/endpoint/links/in/doc/') +@api_route(r"/endpoint/links/in/doc/", "api-1-endpoint-links-in-doc") +@api_doc("/endpoint/links/in/doc/") def apidoc_test_endpoint_with_links_in_doc(request): """ .. http:get:: /api/1/post/endpoint/ @@ -468,31 +456,40 @@ def test_apidoc_with_links(client): - url = reverse('api-1-endpoint-links-in-doc') - rv = client.get(url, HTTP_ACCEPT='text/html') + url = reverse("api-1-endpoint-links-in-doc") + rv = client.get(url, HTTP_ACCEPT="text/html") assert rv.status_code == 200, rv.content - assert_template_used(rv, 'api/apidoc.html') + assert_template_used(rv, "api/apidoc.html") html = prettify_html(rv.content) - first_link = textwrap.indent(( - '\n' - ' /api/1/content/\n' - '' - ), ' '*9) - - second_link = textwrap.indent(( - '\n' - ' /api/1/directory/\n' - '' - ), ' '*9) - - third_link = textwrap.indent(( - '\n' - ' archive\n' - '' - ), ' '*9) + first_link = textwrap.indent( + ( + '\n' + " /api/1/content/\n" + "" + ), + " " * 9, + ) + + second_link = textwrap.indent( + ( + '\n' + " /api/1/directory/\n" + "" + ), + " " * 9, + ) + + third_link = textwrap.indent( + ( + '\n' + " archive\n" + "" + ), + " " * 9, + ) assert first_link in html assert second_link in html diff --git a/swh/web/tests/api/test_apiresponse.py b/swh/web/tests/api/test_apiresponse.py --- a/swh/web/tests/api/test_apiresponse.py +++ b/swh/web/tests/api/test_apiresponse.py @@ -6,25 +6,26 @@ import json from swh.web.api.apiresponse import ( - compute_link_header, transform, make_api_response, - filter_by_fields + compute_link_header, + transform, + make_api_response, + filter_by_fields, ) def test_compute_link_header(): - next_link = '/api/endpoint/next' - prev_link = '/api/endpoint/prev' + next_link = "/api/endpoint/next" + prev_link = "/api/endpoint/prev" rv = { - 'headers': {'link-next': next_link, 'link-prev': prev_link}, - 'results': [1, 2, 3] + "headers": {"link-next": next_link, "link-prev": prev_link}, + "results": [1, 2, 3], } options = {} headers = compute_link_header(rv, options) assert headers == { - 'Link': (f'<{next_link}>; rel="next",' - f'<{prev_link}>; rel="previous"') + "Link": (f'<{next_link}>; rel="next",' f'<{prev_link}>; rel="previous"') } @@ -38,7 +39,7 @@ def test_compute_link_header_nothing_changed_2(): - rv = {'headers': {}} + rv = {"headers": {}} options = {} headers = compute_link_header(rv, options) @@ -47,58 +48,55 @@ def test_transform_only_return_results_1(): - rv = {'results': {'some-key': 'some-value'}} - assert transform(rv) == {'some-key': 'some-value'} + rv = {"results": {"some-key": "some-value"}} + assert transform(rv) == {"some-key": "some-value"} def test_transform_only_return_results_2(): - rv = {'headers': {'something': 'do changes'}, - 'results': {'some-key': 'some-value'}} - assert transform(rv) == {'some-key': 'some-value'} + rv = {"headers": {"something": "do changes"}, "results": {"some-key": "some-value"}} + assert transform(rv) == {"some-key": "some-value"} def test_transform_do_remove_headers(): - rv = {'headers': {'something': 'do changes'}, - 'some-key': 'some-value'} - assert transform(rv) == {'some-key': 'some-value'} + rv = {"headers": {"something": "do changes"}, "some-key": "some-value"} + assert transform(rv) == {"some-key": "some-value"} def test_transform_do_nothing(): - rv = {'some-key': 'some-value'} - assert transform(rv) == {'some-key': 'some-value'} + rv = {"some-key": "some-value"} + assert transform(rv) == {"some-key": "some-value"} def test_swh_multi_response_mimetype(mocker, api_request_factory): - mock_shorten_path = mocker.patch('swh.web.api.apiresponse.shorten_path') - mock_filter = mocker.patch('swh.web.api.apiresponse.filter_by_fields') - mock_json = mocker.patch('swh.web.api.apiresponse.json') + mock_shorten_path = mocker.patch("swh.web.api.apiresponse.shorten_path") + mock_filter = mocker.patch("swh.web.api.apiresponse.filter_by_fields") + mock_json = mocker.patch("swh.web.api.apiresponse.json") - data = { - 'data': [12, 34], - 'id': 'adc83b19e793491b1c6ea0fd8b46cd9f32e592fc' - } + data = {"data": [12, 34], "id": "adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"} mock_filter.return_value = data - mock_shorten_path.return_value = 'my_short_path' + mock_shorten_path.return_value = "my_short_path" - accepted_response_formats = {'html': 'text/html', - 'yaml': 'application/yaml', - 'json': 'application/json'} + accepted_response_formats = { + "html": "text/html", + "yaml": "application/yaml", + "json": "application/json", + } for format in accepted_response_formats: - request = api_request_factory.get('/api/test/path/') + request = api_request_factory.get("/api/test/path/") mime_type = accepted_response_formats[format] - setattr(request, 'accepted_media_type', mime_type) + setattr(request, "accepted_media_type", mime_type) - if mime_type == 'text/html': + if mime_type == "text/html": expected_data = { - 'response_data': json.dumps(data), - 'headers_data': {}, - 'heading': 'my_short_path', - 'status_code': 200 + "response_data": json.dumps(data), + "headers_data": {}, + "heading": "my_short_path", + "status_code": 200, } mock_json.dumps.return_value = json.dumps(data) @@ -111,15 +109,15 @@ assert rv.status_code == 200, rv.data assert rv.data == expected_data - if mime_type == 'text/html': - assert rv.template_name == 'api/apidoc.html' + if mime_type == "text/html": + assert rv.template_name == "api/apidoc.html" def test_swh_filter_renderer_do_nothing(api_request_factory): - input_data = {'a': 'some-data'} + input_data = {"a": "some-data"} - request = api_request_factory.get('/api/test/path/', data={}) - setattr(request, 'query_params', request.GET) + request = api_request_factory.get("/api/test/path/", data={}) + setattr(request, "query_params", request.GET) actual_data = filter_by_fields(request, input_data) @@ -127,18 +125,16 @@ def test_swh_filter_renderer_do_filter(mocker, api_request_factory): - mock_ffk = mocker.patch('swh.web.api.apiresponse.utils.filter_field_keys') - mock_ffk.return_value = {'a': 'some-data'} + mock_ffk = mocker.patch("swh.web.api.apiresponse.utils.filter_field_keys") + mock_ffk.return_value = {"a": "some-data"} - request = api_request_factory.get('/api/test/path/', - data={'fields': 'a,c'}) - setattr(request, 'query_params', request.GET) + request = api_request_factory.get("/api/test/path/", data={"fields": "a,c"}) + setattr(request, "query_params", request.GET) - input_data = {'a': 'some-data', - 'b': 'some-other-data'} + input_data = {"a": "some-data", "b": "some-other-data"} actual_data = filter_by_fields(request, input_data) - assert actual_data == {'a': 'some-data'} + assert actual_data == {"a": "some-data"} - mock_ffk.assert_called_once_with(input_data, {'a', 'c'}) + mock_ffk.assert_called_once_with(input_data, {"a", "c"}) diff --git a/swh/web/tests/api/test_throttling.py b/swh/web/tests/api/test_throttling.py --- a/swh/web/tests/api/test_throttling.py +++ b/swh/web/tests/api/test_throttling.py @@ -15,72 +15,74 @@ from swh.web.api.throttling import SwhWebRateThrottle, throttle_scope from swh.web.settings.tests import ( - scope1_limiter_rate, scope1_limiter_rate_post, - scope2_limiter_rate, scope2_limiter_rate_post, - scope3_limiter_rate, scope3_limiter_rate_post + scope1_limiter_rate, + scope1_limiter_rate_post, + scope2_limiter_rate, + scope2_limiter_rate_post, + scope3_limiter_rate, + scope3_limiter_rate_post, ) from swh.web.urls import urlpatterns class MockViewScope1(APIView): throttle_classes = (SwhWebRateThrottle,) - throttle_scope = 'scope1' + throttle_scope = "scope1" def get(self, request): - return Response('foo_get') + return Response("foo_get") def post(self, request): - return Response('foo_post') + return Response("foo_post") -@api_view(['GET', 'POST']) -@throttle_scope('scope2') +@api_view(["GET", "POST"]) +@throttle_scope("scope2") def mock_view_scope2(request): - if request.method == 'GET': - return Response('bar_get') - elif request.method == 'POST': - return Response('bar_post') + if request.method == "GET": + return Response("bar_get") + elif request.method == "POST": + return Response("bar_post") class MockViewScope3(APIView): throttle_classes = (SwhWebRateThrottle,) - throttle_scope = 'scope3' + throttle_scope = "scope3" def get(self, request): - return Response('foo_get') + return Response("foo_get") def post(self, request): - return Response('foo_post') + return Response("foo_post") -@api_view(['GET', 'POST']) -@throttle_scope('scope3') +@api_view(["GET", "POST"]) +@throttle_scope("scope3") def mock_view_scope3(request): - if request.method == 'GET': - return Response('bar_get') - elif request.method == 'POST': - return Response('bar_post') + if request.method == "GET": + return Response("bar_get") + elif request.method == "POST": + return Response("bar_post") urlpatterns += [ - url(r'^scope1_class$', MockViewScope1.as_view()), - url(r'^scope2_func$', mock_view_scope2), - url(r'^scope3_class$', MockViewScope3.as_view()), - url(r'^scope3_func$', mock_view_scope3) + url(r"^scope1_class$", MockViewScope1.as_view()), + url(r"^scope2_func$", mock_view_scope2), + url(r"^scope3_class$", MockViewScope3.as_view()), + url(r"^scope3_func$", mock_view_scope3), ] -def check_response(response, status_code, - limit=None, remaining=None): +def check_response(response, status_code, limit=None, remaining=None): assert response.status_code == status_code if limit is not None: - assert response['X-RateLimit-Limit'] == str(limit) + assert response["X-RateLimit-Limit"] == str(limit) else: - assert 'X-RateLimit-Limit' not in response + assert "X-RateLimit-Limit" not in response if remaining is not None: - assert response['X-RateLimit-Remaining'] == str(remaining) + assert response["X-RateLimit-Remaining"] == str(remaining) else: - assert 'X-RateLimit-Remaining' not in response + assert "X-RateLimit-Remaining" not in response @override_settings(ROOT_URLCONF=__name__) @@ -89,19 +91,19 @@ Ensure request rate is limited in scope1 """ for i in range(scope1_limiter_rate): - response = api_client.get('/scope1_class') - check_response(response, 200, scope1_limiter_rate, - scope1_limiter_rate - i - 1) + response = api_client.get("/scope1_class") + check_response(response, 200, scope1_limiter_rate, scope1_limiter_rate - i - 1) - response = api_client.get('/scope1_class') + response = api_client.get("/scope1_class") check_response(response, 429, scope1_limiter_rate, 0) for i in range(scope1_limiter_rate_post): - response = api_client.post('/scope1_class') - check_response(response, 200, scope1_limiter_rate_post, - scope1_limiter_rate_post - i - 1) + response = api_client.post("/scope1_class") + check_response( + response, 200, scope1_limiter_rate_post, scope1_limiter_rate_post - i - 1 + ) - response = api_client.post('/scope1_class') + response = api_client.post("/scope1_class") check_response(response, 429, scope1_limiter_rate_post, 0) @@ -111,19 +113,19 @@ Ensure request rate is limited in scope2 """ for i in range(scope2_limiter_rate): - response = api_client.get('/scope2_func') - check_response(response, 200, scope2_limiter_rate, - scope2_limiter_rate - i - 1) + response = api_client.get("/scope2_func") + check_response(response, 200, scope2_limiter_rate, scope2_limiter_rate - i - 1) - response = api_client.get('/scope2_func') + response = api_client.get("/scope2_func") check_response(response, 429, scope2_limiter_rate, 0) for i in range(scope2_limiter_rate_post): - response = api_client.post('/scope2_func') - check_response(response, 200, scope2_limiter_rate_post, - scope2_limiter_rate_post - i - 1) + response = api_client.post("/scope2_func") + check_response( + response, 200, scope2_limiter_rate_post, scope2_limiter_rate_post - i - 1 + ) - response = api_client.post('/scope2_func') + response = api_client.post("/scope2_func") check_response(response, 429, scope2_limiter_rate_post, 0) @@ -133,20 +135,20 @@ Ensure request rate is not limited in scope3 as requests coming from localhost are exempted from rate limit. """ - for _ in range(scope3_limiter_rate+1): - response = api_client.get('/scope3_class') + for _ in range(scope3_limiter_rate + 1): + response = api_client.get("/scope3_class") check_response(response, 200) - for _ in range(scope3_limiter_rate_post+1): - response = api_client.post('/scope3_class') + for _ in range(scope3_limiter_rate_post + 1): + response = api_client.post("/scope3_class") check_response(response, 200) - for _ in range(scope3_limiter_rate+1): - response = api_client.get('/scope3_func') + for _ in range(scope3_limiter_rate + 1): + response = api_client.get("/scope3_func") check_response(response, 200) - for _ in range(scope3_limiter_rate_post+1): - response = api_client.post('/scope3_func') + for _ in range(scope3_limiter_rate_post + 1): + response = api_client.post("/scope3_func") check_response(response, 200) @@ -154,39 +156,39 @@ @pytest.mark.django_db def test_staff_users_are_not_rate_limited(api_client): staff_user = User.objects.create_user( - username='johndoe', password='', is_staff=True) + username="johndoe", password="", is_staff=True + ) api_client.force_login(staff_user) - for _ in range(scope2_limiter_rate+1): - response = api_client.get('/scope2_func') + for _ in range(scope2_limiter_rate + 1): + response = api_client.get("/scope2_func") check_response(response, 200) - for _ in range(scope2_limiter_rate_post+1): - response = api_client.post('/scope2_func') + for _ in range(scope2_limiter_rate_post + 1): + response = api_client.post("/scope2_func") check_response(response, 200) @override_settings(ROOT_URLCONF=__name__) @pytest.mark.django_db def test_non_staff_users_are_rate_limited(api_client): - user = User.objects.create_user( - username='johndoe', password='', is_staff=False) + user = User.objects.create_user(username="johndoe", password="", is_staff=False) api_client.force_login(user) for i in range(scope2_limiter_rate): - response = api_client.get('/scope2_func') - check_response(response, 200, scope2_limiter_rate, - scope2_limiter_rate - i - 1) + response = api_client.get("/scope2_func") + check_response(response, 200, scope2_limiter_rate, scope2_limiter_rate - i - 1) - response = api_client.get('/scope2_func') + response = api_client.get("/scope2_func") check_response(response, 429, scope2_limiter_rate, 0) for i in range(scope2_limiter_rate_post): - response = api_client.post('/scope2_func') - check_response(response, 200, scope2_limiter_rate_post, - scope2_limiter_rate_post - i - 1) + response = api_client.post("/scope2_func") + check_response( + response, 200, scope2_limiter_rate_post, scope2_limiter_rate_post - i - 1 + ) - response = api_client.post('/scope2_func') + response = api_client.post("/scope2_func") check_response(response, 429, scope2_limiter_rate_post, 0) diff --git a/swh/web/tests/api/test_utils.py b/swh/web/tests/api/test_utils.py --- a/swh/web/tests/api/test_utils.py +++ b/swh/web/tests/api/test_utils.py @@ -13,93 +13,94 @@ from swh.web.common.origin_visits import get_origin_visits from swh.web.common.utils import reverse, resolve_branch_alias from swh.web.tests.strategies import ( - release, directory, content, revision, snapshot, origin + release, + directory, + content, + revision, + snapshot, + origin, ) url_map = [ { - 'rule': '/other/', - 'methods': set(['GET', 'POST', 'HEAD']), - 'endpoint': 'foo' + "rule": "/other/", + "methods": set(["GET", "POST", "HEAD"]), + "endpoint": "foo", }, { - 'rule': '/some/old/url/', - 'methods': set(['GET', 'POST']), - 'endpoint': 'blablafn' + "rule": "/some/old/url/", + "methods": set(["GET", "POST"]), + "endpoint": "blablafn", }, { - 'rule': '/other/old/url/', - 'methods': set(['GET', 'HEAD']), - 'endpoint': 'bar' + "rule": "/other/old/url/", + "methods": set(["GET", "HEAD"]), + "endpoint": "bar", }, - { - 'rule': '/other', - 'methods': set([]), - 'endpoint': None - }, - { - 'rule': '/other2', - 'methods': set([]), - 'endpoint': None - } + {"rule": "/other", "methods": set([]), "endpoint": None}, + {"rule": "/other2", "methods": set([]), "endpoint": None}, ] def test_filter_field_keys_dict_unknown_keys(): actual_res = utils.filter_field_keys( - {'directory': 1, 'file': 2, 'link': 3}, - {'directory1', 'file2'}) + {"directory": 1, "file": 2, "link": 3}, {"directory1", "file2"} + ) assert actual_res == {} def test_filter_field_keys_dict(): actual_res = utils.filter_field_keys( - {'directory': 1, 'file': 2, 'link': 3}, - {'directory', 'link'}) + {"directory": 1, "file": 2, "link": 3}, {"directory", "link"} + ) - assert actual_res == {'directory': 1, 'link': 3} + assert actual_res == {"directory": 1, "link": 3} def test_filter_field_keys_list_unknown_keys(): actual_res = utils.filter_field_keys( - [{'directory': 1, 'file': 2, 'link': 3}, - {'1': 1, '2': 2, 'link': 3}], {'d'}) + [{"directory": 1, "file": 2, "link": 3}, {"1": 1, "2": 2, "link": 3}], {"d"} + ) assert actual_res == [{}, {}] def test_filter_field_keys_map(): actual_res = utils.filter_field_keys( - map(lambda x: {'i': x['i']+1, 'j': x['j']}, - [{'i': 1, 'j': None}, - {'i': 2, 'j': None}, - {'i': 3, 'j': None}]), {'i'}) + map( + lambda x: {"i": x["i"] + 1, "j": x["j"]}, + [{"i": 1, "j": None}, {"i": 2, "j": None}, {"i": 3, "j": None}], + ), + {"i"}, + ) - assert list(actual_res) == [{'i': 2}, {'i': 3}, {'i': 4}] + assert list(actual_res) == [{"i": 2}, {"i": 3}, {"i": 4}] def test_filter_field_keys_list(): actual_res = utils.filter_field_keys( - [{'directory': 1, 'file': 2, 'link': 3}, - {'dir': 1, 'fil': 2, 'lin': 3}], - {'directory', 'dir'}) + [{"directory": 1, "file": 2, "link": 3}, {"dir": 1, "fil": 2, "lin": 3}], + {"directory", "dir"}, + ) - assert actual_res == [{'directory': 1}, {'dir': 1}] + assert actual_res == [{"directory": 1}, {"dir": 1}] def test_filter_field_keys_other(): input_set = {1, 2} - actual_res = utils.filter_field_keys(input_set, {'a', '1'}) + actual_res = utils.filter_field_keys(input_set, {"a", "1"}) assert actual_res == input_set def test_person_to_string(): - assert utils.person_to_string({'name': 'raboof', - 'email': 'foo@bar'}) == 'raboof ' + assert ( + utils.person_to_string({"name": "raboof", "email": "foo@bar"}) + == "raboof " + ) def test_enrich_release_empty(): @@ -109,125 +110,123 @@ @given(release()) -def test_enrich_release_content_target(api_request_factory, - archive_data, release): +def test_enrich_release_content_target(api_request_factory, archive_data, release): release_data = archive_data.release_get(release) - release_data['target_type'] = 'content' + release_data["target_type"] = "content" - url = reverse('api-1-release', url_args={'sha1_git': release}) + url = reverse("api-1-release", url_args={"sha1_git": release}) request = api_request_factory.get(url) actual_release = utils.enrich_release(release_data, request) - release_data['target_url'] = reverse( - 'api-1-content', - url_args={'q': f'sha1_git:{release_data["target"]}'}, - request=request) + release_data["target_url"] = reverse( + "api-1-content", + url_args={"q": f'sha1_git:{release_data["target"]}'}, + request=request, + ) assert actual_release == release_data @given(release()) -def test_enrich_release_directory_target(api_request_factory, - archive_data, release): +def test_enrich_release_directory_target(api_request_factory, archive_data, release): release_data = archive_data.release_get(release) - release_data['target_type'] = 'directory' + release_data["target_type"] = "directory" - url = reverse('api-1-release', url_args={'sha1_git': release}) + url = reverse("api-1-release", url_args={"sha1_git": release}) request = api_request_factory.get(url) actual_release = utils.enrich_release(release_data, request) - release_data['target_url'] = reverse( - 'api-1-directory', - url_args={'sha1_git': release_data['target']}, - request=request) + release_data["target_url"] = reverse( + "api-1-directory", + url_args={"sha1_git": release_data["target"]}, + request=request, + ) assert actual_release == release_data @given(release()) -def test_enrich_release_revision_target(api_request_factory, - archive_data, release): +def test_enrich_release_revision_target(api_request_factory, archive_data, release): release_data = archive_data.release_get(release) - release_data['target_type'] = 'revision' + release_data["target_type"] = "revision" - url = reverse('api-1-release', url_args={'sha1_git': release}) + url = reverse("api-1-release", url_args={"sha1_git": release}) request = api_request_factory.get(url) actual_release = utils.enrich_release(release_data, request) - release_data['target_url'] = reverse( - 'api-1-revision', - url_args={'sha1_git': release_data['target']}, - request=request) + release_data["target_url"] = reverse( + "api-1-revision", url_args={"sha1_git": release_data["target"]}, request=request + ) assert actual_release == release_data @given(release()) -def test_enrich_release_release_target(api_request_factory, - archive_data, release): +def test_enrich_release_release_target(api_request_factory, archive_data, release): release_data = archive_data.release_get(release) - release_data['target_type'] = 'release' + release_data["target_type"] = "release" - url = reverse('api-1-release', url_args={'sha1_git': release}) + url = reverse("api-1-release", url_args={"sha1_git": release}) request = api_request_factory.get(url) actual_release = utils.enrich_release(release_data, request) - release_data['target_url'] = reverse( - 'api-1-release', - url_args={'sha1_git': release_data['target']}, - request=request) + release_data["target_url"] = reverse( + "api-1-release", url_args={"sha1_git": release_data["target"]}, request=request + ) assert actual_release == release_data def test_enrich_directory_no_type(): - assert utils.enrich_directory({'id': 'dir-id'}) == {'id': 'dir-id'} + assert utils.enrich_directory({"id": "dir-id"}) == {"id": "dir-id"} @given(directory()) -def test_enrich_directory_with_type(api_request_factory, - archive_data, directory): +def test_enrich_directory_with_type(api_request_factory, archive_data, directory): dir_content = archive_data.directory_ls(directory) dir_entry = random.choice(dir_content) - url = reverse('api-1-directory', url_args={'sha1_git': directory}) + url = reverse("api-1-directory", url_args={"sha1_git": directory}) request = api_request_factory.get(url) actual_directory = utils.enrich_directory(dir_entry, request) - if dir_entry['type'] == 'file': - dir_entry['target_url'] = reverse( - 'api-1-content', - url_args={'q': f'sha1_git:{dir_entry["target"]}'}, - request=request) - - elif dir_entry['type'] == 'dir': - dir_entry['target_url'] = reverse( - 'api-1-directory', - url_args={'sha1_git': dir_entry['target']}, - request=request) - - elif dir_entry['type'] == 'rev': - dir_entry['target_url'] = reverse( - 'api-1-revision', - url_args={'sha1_git': dir_entry['target']}, - request=request) + if dir_entry["type"] == "file": + dir_entry["target_url"] = reverse( + "api-1-content", + url_args={"q": f'sha1_git:{dir_entry["target"]}'}, + request=request, + ) + + elif dir_entry["type"] == "dir": + dir_entry["target_url"] = reverse( + "api-1-directory", + url_args={"sha1_git": dir_entry["target"]}, + request=request, + ) + + elif dir_entry["type"] == "rev": + dir_entry["target_url"] = reverse( + "api-1-revision", + url_args={"sha1_git": dir_entry["target"]}, + request=request, + ) assert actual_directory == dir_entry def test_enrich_content_without_hashes(): - assert utils.enrich_content({'id': '123'}) == {'id': '123'} + assert utils.enrich_content({"id": "123"}) == {"id": "123"} @given(content()) @@ -237,288 +236,285 @@ content_data = dict(content) - query_string = '%s:%s' % (algo, content_data[algo]) + query_string = "%s:%s" % (algo, content_data[algo]) - url = reverse('api-1-content', url_args={'q': query_string}) + url = reverse("api-1-content", url_args={"q": query_string}) request = api_request_factory.get(url) - enriched_content = utils.enrich_content(content_data, - query_string=query_string, - request=request) + enriched_content = utils.enrich_content( + content_data, query_string=query_string, request=request + ) - content_data['data_url'] = reverse('api-1-content-raw', - url_args={'q': query_string}, - request=request) + content_data["data_url"] = reverse( + "api-1-content-raw", url_args={"q": query_string}, request=request + ) - content_data['filetype_url'] = reverse('api-1-content-filetype', - url_args={'q': query_string}, - request=request) + content_data["filetype_url"] = reverse( + "api-1-content-filetype", url_args={"q": query_string}, request=request + ) - content_data['language_url'] = reverse('api-1-content-language', - url_args={'q': query_string}, - request=request) + content_data["language_url"] = reverse( + "api-1-content-language", url_args={"q": query_string}, request=request + ) - content_data['license_url'] = reverse('api-1-content-license', - url_args={'q': query_string}, - request=request) + content_data["license_url"] = reverse( + "api-1-content-license", url_args={"q": query_string}, request=request + ) assert enriched_content == content_data @given(content()) -def test_enrich_content_with_hashes_and_top_level_url(api_request_factory, - content): +def test_enrich_content_with_hashes_and_top_level_url(api_request_factory, content): for algo in DEFAULT_ALGORITHMS: content_data = dict(content) - query_string = '%s:%s' % (algo, content_data[algo]) + query_string = "%s:%s" % (algo, content_data[algo]) - url = reverse('api-1-content', url_args={'q': query_string}) + url = reverse("api-1-content", url_args={"q": query_string}) request = api_request_factory.get(url) - enriched_content = utils.enrich_content(content_data, - query_string=query_string, - top_url=True, - request=request) + enriched_content = utils.enrich_content( + content_data, query_string=query_string, top_url=True, request=request + ) - content_data['content_url'] = reverse('api-1-content', - url_args={'q': query_string}, - request=request) + content_data["content_url"] = reverse( + "api-1-content", url_args={"q": query_string}, request=request + ) - content_data['data_url'] = reverse('api-1-content-raw', - url_args={'q': query_string}, - request=request) + content_data["data_url"] = reverse( + "api-1-content-raw", url_args={"q": query_string}, request=request + ) - content_data['filetype_url'] = reverse('api-1-content-filetype', - url_args={'q': query_string}, - request=request) + content_data["filetype_url"] = reverse( + "api-1-content-filetype", url_args={"q": query_string}, request=request + ) - content_data['language_url'] = reverse('api-1-content-language', - url_args={'q': query_string}, - request=request) + content_data["language_url"] = reverse( + "api-1-content-language", url_args={"q": query_string}, request=request + ) - content_data['license_url'] = reverse('api-1-content-license', - url_args={'q': query_string}, - request=request) + content_data["license_url"] = reverse( + "api-1-content-license", url_args={"q": query_string}, request=request + ) assert enriched_content == content_data @given(revision()) -def test_enrich_revision_without_children_or_parent(api_request_factory, - archive_data, revision): +def test_enrich_revision_without_children_or_parent( + api_request_factory, archive_data, revision +): revision_data = archive_data.revision_get(revision) - del revision_data['parents'] + del revision_data["parents"] - url = reverse('api-1-revision', url_args={'sha1_git': revision}) + url = reverse("api-1-revision", url_args={"sha1_git": revision}) request = api_request_factory.get(url) actual_revision = utils.enrich_revision(revision_data, request) - revision_data['url'] = reverse( - 'api-1-revision', - url_args={'sha1_git': revision}, - request=request) + revision_data["url"] = reverse( + "api-1-revision", url_args={"sha1_git": revision}, request=request + ) - revision_data['history_url'] = reverse( - 'api-1-revision-log', - url_args={'sha1_git': revision}, - request=request) + revision_data["history_url"] = reverse( + "api-1-revision-log", url_args={"sha1_git": revision}, request=request + ) - revision_data['directory_url'] = reverse( - 'api-1-directory', - url_args={'sha1_git': revision_data['directory']}, - request=request) + revision_data["directory_url"] = reverse( + "api-1-directory", + url_args={"sha1_git": revision_data["directory"]}, + request=request, + ) assert actual_revision == revision_data @given(revision(), revision(), revision()) -def test_enrich_revision_with_children_and_parent_no_dir(api_request_factory, - archive_data, - revision, - parent_revision, - child_revision): +def test_enrich_revision_with_children_and_parent_no_dir( + api_request_factory, archive_data, revision, parent_revision, child_revision +): revision_data = archive_data.revision_get(revision) - del revision_data['directory'] - revision_data['parents'].append(parent_revision) - revision_data['children'] = child_revision + del revision_data["directory"] + revision_data["parents"].append(parent_revision) + revision_data["children"] = child_revision - url = reverse('api-1-revision', url_args={'sha1_git': revision}) + url = reverse("api-1-revision", url_args={"sha1_git": revision}) request = api_request_factory.get(url) actual_revision = utils.enrich_revision(revision_data, request) - revision_data['url'] = reverse( - 'api-1-revision', - url_args={'sha1_git': revision}, - request=request) - - revision_data['history_url'] = reverse( - 'api-1-revision-log', - url_args={'sha1_git': revision}, - request=request) - - revision_data['parents'] = [ - {'id': p['id'], 'url': reverse('api-1-revision', - url_args={'sha1_git': p['id']}, - request=request)} - for p in revision_data['parents'] + revision_data["url"] = reverse( + "api-1-revision", url_args={"sha1_git": revision}, request=request + ) + + revision_data["history_url"] = reverse( + "api-1-revision-log", url_args={"sha1_git": revision}, request=request + ) + + revision_data["parents"] = [ + { + "id": p["id"], + "url": reverse( + "api-1-revision", url_args={"sha1_git": p["id"]}, request=request + ), + } + for p in revision_data["parents"] ] - revision_data['children_urls'] = [ - reverse('api-1-revision', - url_args={'sha1_git': child_revision}, - request=request) + revision_data["children_urls"] = [ + reverse( + "api-1-revision", url_args={"sha1_git": child_revision}, request=request + ) ] assert actual_revision == revision_data @given(revision(), revision(), revision()) -def test_enrich_revision_no_context(api_request_factory, - revision, - parent_revision, - child_revision): +def test_enrich_revision_no_context( + api_request_factory, revision, parent_revision, child_revision +): revision_data = { - 'id': revision, - 'parents': [parent_revision], - 'children': [child_revision] + "id": revision, + "parents": [parent_revision], + "children": [child_revision], } - url = reverse('api-1-revision', url_args={'sha1_git': revision}) + url = reverse("api-1-revision", url_args={"sha1_git": revision}) request = api_request_factory.get(url) actual_revision = utils.enrich_revision(revision_data, request) - revision_data['url'] = reverse( - 'api-1-revision', - url_args={'sha1_git': revision}, - request=request) - - revision_data['history_url'] = reverse( - 'api-1-revision-log', - url_args={'sha1_git': revision}, - request=request) - - revision_data['parents'] = [{ - 'id': parent_revision, - 'url': reverse('api-1-revision', - url_args={'sha1_git': parent_revision}, - request=request) - }] - - revision_data['children_urls'] = [ - reverse('api-1-revision', - url_args={'sha1_git': child_revision}, - request=request) + revision_data["url"] = reverse( + "api-1-revision", url_args={"sha1_git": revision}, request=request + ) + + revision_data["history_url"] = reverse( + "api-1-revision-log", url_args={"sha1_git": revision}, request=request + ) + + revision_data["parents"] = [ + { + "id": parent_revision, + "url": reverse( + "api-1-revision", + url_args={"sha1_git": parent_revision}, + request=request, + ), + } + ] + + revision_data["children_urls"] = [ + reverse( + "api-1-revision", url_args={"sha1_git": child_revision}, request=request + ) ] assert actual_revision == revision_data @given(revision(), revision(), revision()) -def test_enrich_revision_with_no_message(api_request_factory, - archive_data, - revision, - parent_revision, - child_revision): +def test_enrich_revision_with_no_message( + api_request_factory, archive_data, revision, parent_revision, child_revision +): revision_data = archive_data.revision_get(revision) - revision_data['message'] = None - revision_data['parents'].append(parent_revision) - revision_data['children'] = child_revision + revision_data["message"] = None + revision_data["parents"].append(parent_revision) + revision_data["children"] = child_revision - url = reverse('api-1-revision', url_args={'sha1_git': revision}) + url = reverse("api-1-revision", url_args={"sha1_git": revision}) request = api_request_factory.get(url) actual_revision = utils.enrich_revision(revision_data, request) - revision_data['url'] = reverse( - 'api-1-revision', - url_args={'sha1_git': revision}, - request=request) - - revision_data['directory_url'] = reverse( - 'api-1-directory', - url_args={'sha1_git': revision_data['directory']}, - request=request) - - revision_data['history_url'] = reverse( - 'api-1-revision-log', - url_args={'sha1_git': revision}, - request=request) - - revision_data['parents'] = [ - {'id': p['id'], 'url': reverse('api-1-revision', - url_args={'sha1_git': p['id']}, - request=request)} - for p in revision_data['parents'] + revision_data["url"] = reverse( + "api-1-revision", url_args={"sha1_git": revision}, request=request + ) + + revision_data["directory_url"] = reverse( + "api-1-directory", + url_args={"sha1_git": revision_data["directory"]}, + request=request, + ) + + revision_data["history_url"] = reverse( + "api-1-revision-log", url_args={"sha1_git": revision}, request=request + ) + + revision_data["parents"] = [ + { + "id": p["id"], + "url": reverse( + "api-1-revision", url_args={"sha1_git": p["id"]}, request=request + ), + } + for p in revision_data["parents"] ] - revision_data['children_urls'] = [ - reverse('api-1-revision', - url_args={'sha1_git': child_revision}, - request=request) + revision_data["children_urls"] = [ + reverse( + "api-1-revision", url_args={"sha1_git": child_revision}, request=request + ) ] assert actual_revision == revision_data @given(revision(), revision(), revision()) -def test_enrich_revision_with_invalid_message(api_request_factory, - archive_data, - revision, - parent_revision, - child_revision): +def test_enrich_revision_with_invalid_message( + api_request_factory, archive_data, revision, parent_revision, child_revision +): revision_data = archive_data.revision_get(revision) - revision_data['message'] = None - revision_data['message_decoding_failed'] = True, - revision_data['parents'].append(parent_revision) - revision_data['children'] = child_revision + revision_data["message"] = None + revision_data["message_decoding_failed"] = (True,) + revision_data["parents"].append(parent_revision) + revision_data["children"] = child_revision - url = reverse('api-1-revision', url_args={'sha1_git': revision}) + url = reverse("api-1-revision", url_args={"sha1_git": revision}) request = api_request_factory.get(url) actual_revision = utils.enrich_revision(revision_data, request) - revision_data['url'] = reverse( - 'api-1-revision', - url_args={'sha1_git': revision}, - request=request) - - revision_data['message_url'] = reverse( - 'api-1-revision-raw-message', - url_args={'sha1_git': revision}, - request=request) - - revision_data['directory_url'] = reverse( - 'api-1-directory', - url_args={'sha1_git': revision_data['directory']}, - request=request) - - revision_data['history_url'] = reverse( - 'api-1-revision-log', - url_args={'sha1_git': revision}, - request=request) - - revision_data['parents'] = [ - {'id': p['id'], 'url': reverse('api-1-revision', - url_args={'sha1_git': p['id']}, - request=request)} - for p in revision_data['parents'] + revision_data["url"] = reverse( + "api-1-revision", url_args={"sha1_git": revision}, request=request + ) + + revision_data["message_url"] = reverse( + "api-1-revision-raw-message", url_args={"sha1_git": revision}, request=request + ) + + revision_data["directory_url"] = reverse( + "api-1-directory", + url_args={"sha1_git": revision_data["directory"]}, + request=request, + ) + + revision_data["history_url"] = reverse( + "api-1-revision-log", url_args={"sha1_git": revision}, request=request + ) + + revision_data["parents"] = [ + { + "id": p["id"], + "url": reverse( + "api-1-revision", url_args={"sha1_git": p["id"]}, request=request + ), + } + for p in revision_data["parents"] ] - revision_data['children_urls'] = [ - reverse('api-1-revision', - url_args={'sha1_git': child_revision}, - request=request) + revision_data["children_urls"] = [ + reverse( + "api-1-revision", url_args={"sha1_git": child_revision}, request=request + ) ] assert actual_revision == revision_data @@ -528,42 +524,44 @@ def test_enrich_snapshot(api_request_factory, archive_data, snapshot): snapshot_data = archive_data.snapshot_get(snapshot) - url = reverse('api-1-snapshot', url_args={'snapshot_id': snapshot}) + url = reverse("api-1-snapshot", url_args={"snapshot_id": snapshot}) request = api_request_factory.get(url) actual_snapshot = utils.enrich_snapshot(snapshot_data, request) - for _, b in snapshot_data['branches'].items(): - if b['target_type'] in ('directory', 'revision', 'release'): - b['target_url'] = reverse(f'api-1-{b["target_type"]}', - url_args={'sha1_git': b['target']}, - request=request) - elif b['target_type'] == 'content': - b['target_url'] = reverse( - 'api-1-content', - url_args={'q': f'sha1_git:{b["target"]}'}, - request=request) - - for _, b in snapshot_data['branches'].items(): - if b['target_type'] == 'alias': + for _, b in snapshot_data["branches"].items(): + if b["target_type"] in ("directory", "revision", "release"): + b["target_url"] = reverse( + f'api-1-{b["target_type"]}', + url_args={"sha1_git": b["target"]}, + request=request, + ) + elif b["target_type"] == "content": + b["target_url"] = reverse( + "api-1-content", + url_args={"q": f'sha1_git:{b["target"]}'}, + request=request, + ) + + for _, b in snapshot_data["branches"].items(): + if b["target_type"] == "alias": target = resolve_branch_alias(snapshot_data, b) - b['target_url'] = target['target_url'] + b["target_url"] = target["target_url"] assert actual_snapshot == snapshot_data @given(origin()) def test_enrich_origin(api_request_factory, archive_data, origin): - url = reverse('api-1-origin', url_args={'origin_url': origin['url']}) + url = reverse("api-1-origin", url_args={"origin_url": origin["url"]}) request = api_request_factory.get(url) - origin_data = {'url': origin['url']} + origin_data = {"url": origin["url"]} actual_origin = utils.enrich_origin(origin_data, request) - origin_data['origin_visits_url'] = reverse( - 'api-1-origin-visits', - url_args={'origin_url': origin['url']}, - request=request) + origin_data["origin_visits_url"] = reverse( + "api-1-origin-visits", url_args={"origin_url": origin["url"]}, request=request + ) assert actual_origin == origin_data @@ -573,29 +571,33 @@ origin_visit = random.choice(get_origin_visits(origin)) - url = reverse('api-1-origin-visit', - url_args={'origin_url': origin['url'], - 'visit_id': origin_visit['visit']}) + url = reverse( + "api-1-origin-visit", + url_args={"origin_url": origin["url"], "visit_id": origin_visit["visit"]}, + ) request = api_request_factory.get(url) actual_origin_visit = utils.enrich_origin_visit( - origin_visit, with_origin_link=True, - with_origin_visit_link=True, request=request) - - origin_visit['origin_url'] = reverse( - 'api-1-origin', - url_args={'origin_url': origin['url']}, - request=request) - - origin_visit['origin_visit_url'] = reverse( - 'api-1-origin-visit', - url_args={'origin_url': origin['url'], - 'visit_id': origin_visit['visit']}, - request=request) - - origin_visit['snapshot_url'] = reverse( - 'api-1-snapshot', - url_args={'snapshot_id': origin_visit['snapshot']}, - request=request) + origin_visit, + with_origin_link=True, + with_origin_visit_link=True, + request=request, + ) + + origin_visit["origin_url"] = reverse( + "api-1-origin", url_args={"origin_url": origin["url"]}, request=request + ) + + origin_visit["origin_visit_url"] = reverse( + "api-1-origin-visit", + url_args={"origin_url": origin["url"], "visit_id": origin_visit["visit"]}, + request=request, + ) + + origin_visit["snapshot_url"] = reverse( + "api-1-snapshot", + url_args={"snapshot_id": origin_visit["snapshot"]}, + request=request, + ) assert actual_origin_visit == origin_visit diff --git a/swh/web/tests/api/views/test_content.py b/swh/web/tests/api/views/test_content.py --- a/swh/web/tests/api/views/test_content.py +++ b/swh/web/tests/api/views/test_content.py @@ -15,373 +15,385 @@ @given(content()) def test_api_content_filetype(api_client, indexer_data, content): - indexer_data.content_add_mimetype(content['sha1']) - url = reverse('api-1-content-filetype', - url_args={'q': 'sha1_git:%s' % content['sha1_git']}) + indexer_data.content_add_mimetype(content["sha1"]) + url = reverse( + "api-1-content-filetype", url_args={"q": "sha1_git:%s" % content["sha1_git"]} + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' - content_url = reverse('api-1-content', - url_args={'q': 'sha1:%s' % content['sha1']}, - request=rv.wsgi_request) - expected_data = indexer_data.content_get_mimetype(content['sha1']) - expected_data['content_url'] = content_url + assert rv["Content-Type"] == "application/json" + content_url = reverse( + "api-1-content", + url_args={"q": "sha1:%s" % content["sha1"]}, + request=rv.wsgi_request, + ) + expected_data = indexer_data.content_get_mimetype(content["sha1"]) + expected_data["content_url"] = content_url assert rv.data == expected_data def test_api_content_filetype_sha_not_found(api_client): unknown_content_ = random_content() - url = reverse('api-1-content-filetype', - url_args={'q': 'sha1:%s' % unknown_content_['sha1']}) + url = reverse( + "api-1-content-filetype", url_args={"q": "sha1:%s" % unknown_content_["sha1"]} + ) rv = api_client.get(url) assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': 'No filetype information found for content ' - 'sha1:%s.' % unknown_content_['sha1'] + "exception": "NotFoundExc", + "reason": "No filetype information found for content " + "sha1:%s." % unknown_content_["sha1"], } @pytest.mark.skip # Language indexer is disabled @given(content()) def test_api_content_language(api_client, indexer_data, content): - indexer_data.content_add_language(content['sha1']) - url = reverse('api-1-content-language', - url_args={'q': 'sha1_git:%s' % content['sha1_git']}) + indexer_data.content_add_language(content["sha1"]) + url = reverse( + "api-1-content-language", url_args={"q": "sha1_git:%s" % content["sha1_git"]} + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' - content_url = reverse('api-1-content', - url_args={'q': 'sha1:%s' % content['sha1']}, - request=rv.wsgi_request) - expected_data = indexer_data.content_get_language(content['sha1']) - expected_data['content_url'] = content_url + assert rv["Content-Type"] == "application/json" + content_url = reverse( + "api-1-content", + url_args={"q": "sha1:%s" % content["sha1"]}, + request=rv.wsgi_request, + ) + expected_data = indexer_data.content_get_language(content["sha1"]) + expected_data["content_url"] = content_url assert rv.data == expected_data def test_api_content_language_sha_not_found(api_client): unknown_content_ = random_content() - url = reverse('api-1-content-language', - url_args={'q': 'sha1:%s' % unknown_content_['sha1']}) + url = reverse( + "api-1-content-language", url_args={"q": "sha1:%s" % unknown_content_["sha1"]} + ) rv = api_client.get(url) assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': 'No language information found for content ' - 'sha1:%s.' % unknown_content_['sha1'] + "exception": "NotFoundExc", + "reason": "No language information found for content " + "sha1:%s." % unknown_content_["sha1"], } @pytest.mark.skip # Language indexer is disabled -@pytest.mark.skipif(ctags_json_missing, - reason="requires ctags with json output support") +@pytest.mark.skipif( + ctags_json_missing, reason="requires ctags with json output support" +) @given(contents_with_ctags()) def test_api_content_symbol(api_client, indexer_data, contents_with_ctags): expected_data = {} - for content_sha1 in contents_with_ctags['sha1s']: + for content_sha1 in contents_with_ctags["sha1s"]: indexer_data.content_add_ctags(content_sha1) for ctag in indexer_data.content_get_ctags(content_sha1): - if ctag['name'] == contents_with_ctags['symbol_name']: + if ctag["name"] == contents_with_ctags["symbol_name"]: expected_data[content_sha1] = ctag break - url = reverse('api-1-content-symbol', - url_args={'q': contents_with_ctags['symbol_name']}, - query_params={'per_page': 100}) + url = reverse( + "api-1-content-symbol", + url_args={"q": contents_with_ctags["symbol_name"]}, + query_params={"per_page": 100}, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" for entry in rv.data: - content_sha1 = entry['sha1'] + content_sha1 = entry["sha1"] expected_entry = expected_data[content_sha1] - for key, view_name in (('content_url', 'api-1-content'), - ('data_url', 'api-1-content-raw'), - ('license_url', 'api-1-content-license'), - ('language_url', 'api-1-content-language'), - ('filetype_url', 'api-1-content-filetype')): + for key, view_name in ( + ("content_url", "api-1-content"), + ("data_url", "api-1-content-raw"), + ("license_url", "api-1-content-license"), + ("language_url", "api-1-content-language"), + ("filetype_url", "api-1-content-filetype"), + ): expected_entry[key] = reverse( - view_name, url_args={'q': 'sha1:%s' % content_sha1}, - request=rv.wsgi_request) - expected_entry['sha1'] = content_sha1 - del expected_entry['id'] + view_name, + url_args={"q": "sha1:%s" % content_sha1}, + request=rv.wsgi_request, + ) + expected_entry["sha1"] = content_sha1 + del expected_entry["id"] assert entry == expected_entry - assert 'Link' not in rv + assert "Link" not in rv - url = reverse('api-1-content-symbol', - url_args={'q': contents_with_ctags['symbol_name']}, - query_params={'per_page': 2}) + url = reverse( + "api-1-content-symbol", + url_args={"q": contents_with_ctags["symbol_name"]}, + query_params={"per_page": 2}, + ) rv = api_client.get(url) - next_url = reverse('api-1-content-symbol', - url_args={'q': contents_with_ctags['symbol_name']}, - query_params={'last_sha1': rv.data[1]['sha1'], - 'per_page': 2}, - request=rv.wsgi_request), - assert rv['Link'] == '<%s>; rel="next"' % next_url + next_url = ( + reverse( + "api-1-content-symbol", + url_args={"q": contents_with_ctags["symbol_name"]}, + query_params={"last_sha1": rv.data[1]["sha1"], "per_page": 2}, + request=rv.wsgi_request, + ), + ) + assert rv["Link"] == '<%s>; rel="next"' % next_url def test_api_content_symbol_not_found(api_client): - url = reverse('api-1-content-symbol', url_args={'q': 'bar'}) + url = reverse("api-1-content-symbol", url_args={"q": "bar"}) rv = api_client.get(url) assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': 'No indexed raw content match expression \'bar\'.' + "exception": "NotFoundExc", + "reason": "No indexed raw content match expression 'bar'.", } - assert 'Link' not in rv + assert "Link" not in rv -@pytest.mark.skipif(ctags_json_missing, - reason="requires ctags with json output support") +@pytest.mark.skipif( + ctags_json_missing, reason="requires ctags with json output support" +) @given(content()) def test_api_content_ctags(api_client, indexer_data, content): - indexer_data.content_add_ctags(content['sha1']) - url = reverse('api-1-content-ctags', - url_args={'q': 'sha1_git:%s' % content['sha1_git']}) + indexer_data.content_add_ctags(content["sha1"]) + url = reverse( + "api-1-content-ctags", url_args={"q": "sha1_git:%s" % content["sha1_git"]} + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' - content_url = reverse('api-1-content', - url_args={'q': 'sha1:%s' % content['sha1']}, - request=rv.wsgi_request) - expected_data = list(indexer_data.content_get_ctags(content['sha1'])) + assert rv["Content-Type"] == "application/json" + content_url = reverse( + "api-1-content", + url_args={"q": "sha1:%s" % content["sha1"]}, + request=rv.wsgi_request, + ) + expected_data = list(indexer_data.content_get_ctags(content["sha1"])) for e in expected_data: - e['content_url'] = content_url + e["content_url"] = content_url assert rv.data == expected_data -@pytest.mark.skipif(fossology_missing, - reason="requires fossology-nomossa installed") +@pytest.mark.skipif(fossology_missing, reason="requires fossology-nomossa installed") @given(content()) def test_api_content_license(api_client, indexer_data, content): - indexer_data.content_add_license(content['sha1']) - url = reverse('api-1-content-license', - url_args={'q': 'sha1_git:%s' % content['sha1_git']}) + indexer_data.content_add_license(content["sha1"]) + url = reverse( + "api-1-content-license", url_args={"q": "sha1_git:%s" % content["sha1_git"]} + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' - content_url = reverse('api-1-content', - url_args={'q': 'sha1:%s' % content['sha1']}, - request=rv.wsgi_request) - expected_data = indexer_data.content_get_license(content['sha1']) - expected_data['content_url'] = content_url + assert rv["Content-Type"] == "application/json" + content_url = reverse( + "api-1-content", + url_args={"q": "sha1:%s" % content["sha1"]}, + request=rv.wsgi_request, + ) + expected_data = indexer_data.content_get_license(content["sha1"]) + expected_data["content_url"] = content_url assert rv.data == expected_data def test_api_content_license_sha_not_found(api_client): unknown_content_ = random_content() - url = reverse('api-1-content-license', - url_args={'q': 'sha1:%s' % unknown_content_['sha1']}) + url = reverse( + "api-1-content-license", url_args={"q": "sha1:%s" % unknown_content_["sha1"]} + ) rv = api_client.get(url) assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': 'No license information found for content ' - 'sha1:%s.' % unknown_content_['sha1'] + "exception": "NotFoundExc", + "reason": "No license information found for content " + "sha1:%s." % unknown_content_["sha1"], } @given(content()) def test_api_content_metadata(api_client, archive_data, content): - url = reverse('api-1-content', {'q': 'sha1:%s' % content['sha1']}) + url = reverse("api-1-content", {"q": "sha1:%s" % content["sha1"]}) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' - expected_data = archive_data.content_get_metadata(content['sha1']) - for key, view_name in (('data_url', 'api-1-content-raw'), - ('license_url', 'api-1-content-license'), - ('language_url', 'api-1-content-language'), - ('filetype_url', 'api-1-content-filetype')): + assert rv["Content-Type"] == "application/json" + expected_data = archive_data.content_get_metadata(content["sha1"]) + for key, view_name in ( + ("data_url", "api-1-content-raw"), + ("license_url", "api-1-content-license"), + ("language_url", "api-1-content-language"), + ("filetype_url", "api-1-content-filetype"), + ): expected_data[key] = reverse( - view_name, url_args={'q': 'sha1:%s' % content['sha1']}, - request=rv.wsgi_request) + view_name, + url_args={"q": "sha1:%s" % content["sha1"]}, + request=rv.wsgi_request, + ) assert rv.data == expected_data def test_api_content_not_found_as_json(api_client): unknown_content_ = random_content() - url = reverse('api-1-content', - url_args={'q': 'sha1:%s' % unknown_content_['sha1']}) + url = reverse("api-1-content", url_args={"q": "sha1:%s" % unknown_content_["sha1"]}) rv = api_client.get(url) assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': 'Content with sha1 checksum equals to %s not found!' - % unknown_content_['sha1'] + "exception": "NotFoundExc", + "reason": "Content with sha1 checksum equals to %s not found!" + % unknown_content_["sha1"], } def test_api_content_not_found_as_yaml(api_client): unknown_content_ = random_content() - url = reverse('api-1-content', - url_args={'q': 'sha256:%s' % unknown_content_['sha256']}) - rv = api_client.get(url, HTTP_ACCEPT='application/yaml') + url = reverse( + "api-1-content", url_args={"q": "sha256:%s" % unknown_content_["sha256"]} + ) + rv = api_client.get(url, HTTP_ACCEPT="application/yaml") assert rv.status_code == 404, rv.data - assert 'application/yaml' in rv['Content-Type'] + assert "application/yaml" in rv["Content-Type"] assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': 'Content with sha256 checksum equals to %s not found!' % - unknown_content_['sha256'] + "exception": "NotFoundExc", + "reason": "Content with sha256 checksum equals to %s not found!" + % unknown_content_["sha256"], } def test_api_content_raw_ko_not_found(api_client): unknown_content_ = random_content() - url = reverse('api-1-content-raw', - url_args={'q': 'sha1:%s' % unknown_content_['sha1']}) + url = reverse( + "api-1-content-raw", url_args={"q": "sha1:%s" % unknown_content_["sha1"]} + ) rv = api_client.get(url) assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': 'Content with sha1 checksum equals to %s not found!' % - unknown_content_['sha1'] + "exception": "NotFoundExc", + "reason": "Content with sha1 checksum equals to %s not found!" + % unknown_content_["sha1"], } @given(content()) def test_api_content_raw_text(api_client, archive_data, content): - url = reverse('api-1-content-raw', - url_args={'q': 'sha1:%s' % content['sha1']}) + url = reverse("api-1-content-raw", url_args={"q": "sha1:%s" % content["sha1"]}) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/octet-stream' - assert rv['Content-disposition'] == \ - 'attachment; filename=content_sha1_%s_raw' % content['sha1'] - assert rv['Content-Type'] == 'application/octet-stream' - expected_data = archive_data.content_get(content['sha1']) - assert rv.content == expected_data['data'] + assert rv["Content-Type"] == "application/octet-stream" + assert ( + rv["Content-disposition"] + == "attachment; filename=content_sha1_%s_raw" % content["sha1"] + ) + assert rv["Content-Type"] == "application/octet-stream" + expected_data = archive_data.content_get(content["sha1"]) + assert rv.content == expected_data["data"] @given(content()) def test_api_content_raw_text_with_filename(api_client, archive_data, content): - url = reverse('api-1-content-raw', - url_args={'q': 'sha1:%s' % content['sha1']}, - query_params={'filename': 'filename.txt'}) + url = reverse( + "api-1-content-raw", + url_args={"q": "sha1:%s" % content["sha1"]}, + query_params={"filename": "filename.txt"}, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/octet-stream' - assert rv['Content-disposition'] == \ - 'attachment; filename=filename.txt' - assert rv['Content-Type'] == 'application/octet-stream' - expected_data = archive_data.content_get(content['sha1']) - assert rv.content == expected_data['data'] + assert rv["Content-Type"] == "application/octet-stream" + assert rv["Content-disposition"] == "attachment; filename=filename.txt" + assert rv["Content-Type"] == "application/octet-stream" + expected_data = archive_data.content_get(content["sha1"]) + assert rv.content == expected_data["data"] @given(content()) def test_api_check_content_known(api_client, content): - url = reverse('api-1-content-known', - url_args={'q': content['sha1']}) + url = reverse("api-1-content-known", url_args={"q": content["sha1"]}) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'search_res': [ - { - 'found': True, - 'sha1': content['sha1'] - } - ], - 'search_stats': {'nbfiles': 1, 'pct': 100.0} + "search_res": [{"found": True, "sha1": content["sha1"]}], + "search_stats": {"nbfiles": 1, "pct": 100.0}, } @given(content()) def test_api_check_content_known_as_yaml(api_client, content): - url = reverse('api-1-content-known', - url_args={'q': content['sha1']}) - rv = api_client.get(url, HTTP_ACCEPT='application/yaml') + url = reverse("api-1-content-known", url_args={"q": content["sha1"]}) + rv = api_client.get(url, HTTP_ACCEPT="application/yaml") assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/yaml' + assert rv["Content-Type"] == "application/yaml" assert rv.data == { - 'search_res': [ - { - 'found': True, - 'sha1': content['sha1'] - } - ], - 'search_stats': {'nbfiles': 1, 'pct': 100.0} + "search_res": [{"found": True, "sha1": content["sha1"]}], + "search_stats": {"nbfiles": 1, "pct": 100.0}, } @given(content()) def test_api_check_content_known_post_as_yaml(api_client, content): - url = reverse('api-1-content-known') - rv = api_client.post(url, data={'q': content['sha1']}, - HTTP_ACCEPT='application/yaml') + url = reverse("api-1-content-known") + rv = api_client.post( + url, data={"q": content["sha1"]}, HTTP_ACCEPT="application/yaml" + ) assert rv.status_code == 200, rv.data - assert 'application/yaml' in rv['Content-Type'] + assert "application/yaml" in rv["Content-Type"] assert rv.data == { - 'search_res': [ - { - 'found': True, - 'sha1': content['sha1'] - } - ], - 'search_stats': {'nbfiles': 1, 'pct': 100.0} + "search_res": [{"found": True, "sha1": content["sha1"]}], + "search_stats": {"nbfiles": 1, "pct": 100.0}, } def test_api_check_content_known_not_found(api_client): unknown_content_ = random_content() - url = reverse('api-1-content-known', - url_args={'q': unknown_content_['sha1']}) + url = reverse("api-1-content-known", url_args={"q": unknown_content_["sha1"]}) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'search_res': [ - { - 'found': False, - 'sha1': unknown_content_['sha1'] - } - ], - 'search_stats': {'nbfiles': 1, 'pct': 0.0} + "search_res": [{"found": False, "sha1": unknown_content_["sha1"]}], + "search_stats": {"nbfiles": 1, "pct": 0.0}, } @given(content()) def test_api_content_uppercase(api_client, content): - url = reverse('api-1-content-uppercase-checksum', - url_args={'q': content['sha1'].upper()}) + url = reverse( + "api-1-content-uppercase-checksum", url_args={"q": content["sha1"].upper()} + ) rv = api_client.get(url) assert rv.status_code == 302, rv.data - redirect_url = reverse('api-1-content', - url_args={'q': content['sha1']}) + redirect_url = reverse("api-1-content", url_args={"q": content["sha1"]}) - assert rv['location'] == redirect_url + assert rv["location"] == redirect_url diff --git a/swh/web/tests/api/views/test_directory.py b/swh/web/tests/api/views/test_directory.py --- a/swh/web/tests/api/views/test_directory.py +++ b/swh/web/tests/api/views/test_directory.py @@ -16,16 +16,16 @@ @given(directory()) def test_api_directory(api_client, archive_data, directory): - url = reverse('api-1-directory', url_args={'sha1_git': directory}) + url = reverse("api-1-directory", url_args={"sha1_git": directory}) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" dir_content = list(archive_data.directory_ls(directory)) - expected_data = list(map(enrich_directory, - dir_content, - [rv.wsgi_request] * len(dir_content))) + expected_data = list( + map(enrich_directory, dir_content, [rv.wsgi_request] * len(dir_content)) + ) assert rv.data == expected_data @@ -33,15 +33,14 @@ def test_api_directory_not_found(api_client): unknown_directory_ = random_sha1() - url = reverse('api-1-directory', - url_args={'sha1_git': unknown_directory_}) + url = reverse("api-1-directory", url_args={"sha1_git": unknown_directory_}) rv = api_client.get(url) assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': 'Directory with sha1_git %s not found' % unknown_directory_ + "exception": "NotFoundExc", + "reason": "Directory with sha1_git %s not found" % unknown_directory_, } @@ -51,40 +50,42 @@ directory_content = archive_data.directory_ls(directory) path = random.choice(directory_content) - url = reverse('api-1-directory', - url_args={'sha1_git': directory, 'path': path['name']}) + url = reverse( + "api-1-directory", url_args={"sha1_git": directory, "path": path["name"]} + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == enrich_directory(path, rv.wsgi_request) @given(directory()) def test_api_directory_with_path_not_found(api_client, directory): - path = 'some/path/to/nonexistent/dir/' - url = reverse('api-1-directory', - url_args={'sha1_git': directory, 'path': path}) + path = "some/path/to/nonexistent/dir/" + url = reverse("api-1-directory", url_args={"sha1_git": directory, "path": path}) rv = api_client.get(url) assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': ('Directory entry with path %s from %s not found' % - (path, directory)) + "exception": "NotFoundExc", + "reason": ( + "Directory entry with path %s from %s not found" % (path, directory) + ), } @given(directory()) def test_api_directory_uppercase(api_client, directory): - url = reverse('api-1-directory-uppercase-checksum', - url_args={'sha1_git': directory.upper()}) + url = reverse( + "api-1-directory-uppercase-checksum", url_args={"sha1_git": directory.upper()} + ) resp = api_client.get(url) assert resp.status_code == 302 - redirect_url = reverse('api-1-directory', url_args={'sha1_git': directory}) + redirect_url = reverse("api-1-directory", url_args={"sha1_git": directory}) - assert resp['location'] == redirect_url + assert resp["location"] == redirect_url diff --git a/swh/web/tests/api/views/test_identifiers.py b/swh/web/tests/api/views/test_identifiers.py --- a/swh/web/tests/api/views/test_identifiers.py +++ b/swh/web/tests/api/views/test_identifiers.py @@ -5,55 +5,64 @@ from hypothesis import given -from swh.model.identifiers import ( - CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT -) +from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT from swh.web.common.utils import reverse from swh.web.tests.data import random_sha1 from swh.web.tests.strategies import ( - content, directory, origin, release, revision, snapshot, - unknown_content, unknown_directory, unknown_release, - unknown_revision, unknown_snapshot + content, + directory, + origin, + release, + revision, + snapshot, + unknown_content, + unknown_directory, + unknown_release, + unknown_revision, + unknown_snapshot, ) @given(origin(), content(), directory(), release(), revision(), snapshot()) -def test_swh_id_resolve_success(api_client, origin, content, directory, - release, revision, snapshot): +def test_swh_id_resolve_success( + api_client, origin, content, directory, release, revision, snapshot +): for obj_type_short, obj_type, obj_id in ( - ('cnt', CONTENT, content['sha1_git']), - ('dir', DIRECTORY, directory), - ('rel', RELEASE, release), - ('rev', REVISION, revision), - ('snp', SNAPSHOT, snapshot)): + ("cnt", CONTENT, content["sha1_git"]), + ("dir", DIRECTORY, directory), + ("rel", RELEASE, release), + ("rev", REVISION, revision), + ("snp", SNAPSHOT, snapshot), + ): - swh_id = 'swh:1:%s:%s;origin=%s' % (obj_type_short, obj_id, - origin['url']) - url = reverse('api-1-resolve-swh-pid', url_args={'swh_id': swh_id}) + swh_id = "swh:1:%s:%s;origin=%s" % (obj_type_short, obj_id, origin["url"]) + url = reverse("api-1-resolve-swh-pid", url_args={"swh_id": swh_id}) resp = api_client.get(url) if obj_type == CONTENT: - url_args = {'query_string': 'sha1_git:%s' % obj_id} + url_args = {"query_string": "sha1_git:%s" % obj_id} elif obj_type == SNAPSHOT: - url_args = {'snapshot_id': obj_id} + url_args = {"snapshot_id": obj_id} else: - url_args = {'sha1_git': obj_id} + url_args = {"sha1_git": obj_id} - browse_rev_url = reverse('browse-%s' % obj_type, - url_args=url_args, - query_params={'origin': origin['url']}, - request=resp.wsgi_request) + browse_rev_url = reverse( + "browse-%s" % obj_type, + url_args=url_args, + query_params={"origin": origin["url"]}, + request=resp.wsgi_request, + ) expected_result = { - 'browse_url': browse_rev_url, - 'metadata': {'origin': origin['url']}, - 'namespace': 'swh', - 'object_id': obj_id, - 'object_type': obj_type, - 'scheme_version': 1 + "browse_url": browse_rev_url, + "metadata": {"origin": origin["url"]}, + "namespace": "swh", + "object_id": obj_id, + "object_type": obj_type, + "scheme_version": 1, } assert resp.status_code == 200, resp.data @@ -61,30 +70,42 @@ def test_swh_id_resolve_invalid(api_client): - rev_id_invalid = '96db9023b8_foo_50d6c108e9a3' - swh_id = 'swh:1:rev:%s' % rev_id_invalid - url = reverse('api-1-resolve-swh-pid', url_args={'swh_id': swh_id}) + rev_id_invalid = "96db9023b8_foo_50d6c108e9a3" + swh_id = "swh:1:rev:%s" % rev_id_invalid + url = reverse("api-1-resolve-swh-pid", url_args={"swh_id": swh_id}) resp = api_client.get(url) assert resp.status_code == 400, resp.data -@given(unknown_content(), unknown_directory(), unknown_release(), - unknown_revision(), unknown_snapshot()) -def test_swh_id_resolve_not_found(api_client, unknown_content, - unknown_directory, unknown_release, - unknown_revision, unknown_snapshot): - - for obj_type_short, obj_id in (('cnt', unknown_content['sha1_git']), - ('dir', unknown_directory), - ('rel', unknown_release), - ('rev', unknown_revision), - ('snp', unknown_snapshot)): - - swh_id = 'swh:1:%s:%s' % (obj_type_short, obj_id) - - url = reverse('api-1-resolve-swh-pid', url_args={'swh_id': swh_id}) +@given( + unknown_content(), + unknown_directory(), + unknown_release(), + unknown_revision(), + unknown_snapshot(), +) +def test_swh_id_resolve_not_found( + api_client, + unknown_content, + unknown_directory, + unknown_release, + unknown_revision, + unknown_snapshot, +): + + for obj_type_short, obj_id in ( + ("cnt", unknown_content["sha1_git"]), + ("dir", unknown_directory), + ("rel", unknown_release), + ("rev", unknown_revision), + ("snp", unknown_snapshot), + ): + + swh_id = "swh:1:%s:%s" % (obj_type_short, obj_id) + + url = reverse("api-1-resolve-swh-pid", url_args={"swh_id": swh_id}) resp = api_client.get(url) @@ -92,70 +113,76 @@ def test_swh_origin_id_not_resolvable(api_client): - ori_pid = 'swh:1:ori:8068d0075010b590762c6cb5682ed53cb3c13deb' - url = reverse('api-1-resolve-swh-pid', url_args={'swh_id': ori_pid}) + ori_pid = "swh:1:ori:8068d0075010b590762c6cb5682ed53cb3c13deb" + url = reverse("api-1-resolve-swh-pid", url_args={"swh_id": ori_pid}) resp = api_client.get(url) assert resp.status_code == 400, resp.data @given(content(), directory()) def test_api_known_swhpid_some_present(api_client, content, directory): - content_ = 'swh:1:cnt:%s' % content['sha1_git'] - directory_ = 'swh:1:dir:%s' % directory - unknown_revision_ = 'swh:1:rev:%s' % random_sha1() - unknown_release_ = 'swh:1:rel:%s' % random_sha1() - unknown_snapshot_ = 'swh:1:snp:%s' % random_sha1() - - input_pids = [content_, directory_, unknown_revision_, - unknown_release_, unknown_snapshot_] - - url = reverse('api-1-known') - - resp = api_client.post(url, data=input_pids, format='json', - HTTP_ACCEPT='application/json') + content_ = "swh:1:cnt:%s" % content["sha1_git"] + directory_ = "swh:1:dir:%s" % directory + unknown_revision_ = "swh:1:rev:%s" % random_sha1() + unknown_release_ = "swh:1:rel:%s" % random_sha1() + unknown_snapshot_ = "swh:1:snp:%s" % random_sha1() + + input_pids = [ + content_, + directory_, + unknown_revision_, + unknown_release_, + unknown_snapshot_, + ] + + url = reverse("api-1-known") + + resp = api_client.post( + url, data=input_pids, format="json", HTTP_ACCEPT="application/json" + ) assert resp.status_code == 200, resp.data - assert resp['Content-Type'] == 'application/json' + assert resp["Content-Type"] == "application/json" assert resp.data == { - content_: {'known': True}, - directory_: {'known': True}, - unknown_revision_: {'known': False}, - unknown_release_: {'known': False}, - unknown_snapshot_: {'known': False} - } + content_: {"known": True}, + directory_: {"known": True}, + unknown_revision_: {"known": False}, + unknown_release_: {"known": False}, + unknown_snapshot_: {"known": False}, + } def test_api_known_invalid_swhpid(api_client): - invalid_pid_sha1 = ['swh:1:cnt:8068d0075010b590762c6cb5682ed53cb3c13de;'] - invalid_pid_type = ['swh:1:cnn:8068d0075010b590762c6cb5682ed53cb3c13deb'] + invalid_pid_sha1 = ["swh:1:cnt:8068d0075010b590762c6cb5682ed53cb3c13de;"] + invalid_pid_type = ["swh:1:cnn:8068d0075010b590762c6cb5682ed53cb3c13deb"] - url = reverse('api-1-known') + url = reverse("api-1-known") - resp = api_client.post(url, data=invalid_pid_sha1, format='json', - HTTP_ACCEPT='application/json') + resp = api_client.post( + url, data=invalid_pid_sha1, format="json", HTTP_ACCEPT="application/json" + ) assert resp.status_code == 400, resp.data - resp2 = api_client.post(url, data=invalid_pid_type, format='json', - HTTP_ACCEPT='application/json') + resp2 = api_client.post( + url, data=invalid_pid_type, format="json", HTTP_ACCEPT="application/json" + ) assert resp2.status_code == 400, resp.data def test_api_known_raises_large_payload_error(api_client): - random_pid = 'swh:1:cnt:8068d0075010b590762c6cb5682ed53cb3c13deb' + random_pid = "swh:1:cnt:8068d0075010b590762c6cb5682ed53cb3c13deb" limit = 10000 - err_msg = 'The maximum number of PIDs this endpoint can receive is 1000' + err_msg = "The maximum number of PIDs this endpoint can receive is 1000" pids = [random_pid for i in range(limit)] - url = reverse('api-1-known') - resp = api_client.post(url, data=pids, format='json', - HTTP_ACCEPT='application/json') + url = reverse("api-1-known") + resp = api_client.post( + url, data=pids, format="json", HTTP_ACCEPT="application/json" + ) assert resp.status_code == 413, resp.data - assert resp['Content-Type'] == 'application/json' - assert resp.data == { - 'exception': 'LargePayloadExc', - 'reason': err_msg - } + assert resp["Content-Type"] == "application/json" + assert resp.data == {"exception": "LargePayloadExc", "reason": err_msg} diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py --- a/swh/web/tests/api/views/test_origin.py +++ b/swh/web/tests/api/views/test_origin.py @@ -15,9 +15,7 @@ from swh.web.common.exc import BadInputExc from swh.web.common.utils import reverse from swh.web.common.origin_visits import get_origin_visits -from swh.web.tests.strategies import ( - origin, new_origin, visit_dates, new_snapshots -) +from swh.web.tests.strategies import origin, new_origin, visit_dates, new_snapshots def _scroll_results(api_client, url): @@ -27,15 +25,15 @@ while True: rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" results.extend(rv.data) - if 'Link' in rv: - for link in parse_header_links(rv['Link']): - if link['rel'] == 'next': + if "Link" in rv: + for link in parse_header_links(rv["Link"]): + if link["rel"] == "next": # Found link to next page of results - url = link['url'] + url = link["url"] break else: # No link with 'rel=next' @@ -48,254 +46,269 @@ def test_api_lookup_origin_visits_raise_error(api_client, mocker): - mock_get_origin_visits = mocker.patch( - 'swh.web.api.views.origin.get_origin_visits') - err_msg = 'voluntary error to check the bad request middleware.' + mock_get_origin_visits = mocker.patch("swh.web.api.views.origin.get_origin_visits") + err_msg = "voluntary error to check the bad request middleware." mock_get_origin_visits.side_effect = BadInputExc(err_msg) - url = reverse('api-1-origin-visits', url_args={'origin_url': 'http://foo'}) + url = reverse("api-1-origin-visits", url_args={"origin_url": "http://foo"}) rv = api_client.get(url) assert rv.status_code == 400, rv.data - assert rv['Content-Type'] == 'application/json' - assert rv.data == { - 'exception': 'BadInputExc', - 'reason': err_msg - } + assert rv["Content-Type"] == "application/json" + assert rv.data == {"exception": "BadInputExc", "reason": err_msg} -def test_api_lookup_origin_visits_raise_swh_storage_error_db(api_client, - mocker): - mock_get_origin_visits = mocker.patch( - 'swh.web.api.views.origin.get_origin_visits') - err_msg = 'Storage exploded! Will be back online shortly!' +def test_api_lookup_origin_visits_raise_swh_storage_error_db(api_client, mocker): + mock_get_origin_visits = mocker.patch("swh.web.api.views.origin.get_origin_visits") + err_msg = "Storage exploded! Will be back online shortly!" mock_get_origin_visits.side_effect = StorageDBError(err_msg) - url = reverse('api-1-origin-visits', url_args={'origin_url': 'http://foo'}) + url = reverse("api-1-origin-visits", url_args={"origin_url": "http://foo"}) rv = api_client.get(url) assert rv.status_code == 503, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'StorageDBError', - 'reason': - 'An unexpected error occurred in the backend: %s' % err_msg + "exception": "StorageDBError", + "reason": "An unexpected error occurred in the backend: %s" % err_msg, } -def test_api_lookup_origin_visits_raise_swh_storage_error_api(api_client, - mocker): - mock_get_origin_visits = mocker.patch( - 'swh.web.api.views.origin.get_origin_visits') - err_msg = 'Storage API dropped dead! Will resurrect asap!' +def test_api_lookup_origin_visits_raise_swh_storage_error_api(api_client, mocker): + mock_get_origin_visits = mocker.patch("swh.web.api.views.origin.get_origin_visits") + err_msg = "Storage API dropped dead! Will resurrect asap!" mock_get_origin_visits.side_effect = StorageAPIError(err_msg) - url = reverse( - 'api-1-origin-visits', url_args={'origin_url': 'http://foo'}) + url = reverse("api-1-origin-visits", url_args={"origin_url": "http://foo"}) rv = api_client.get(url) assert rv.status_code == 503, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'StorageAPIError', - 'reason': - 'An unexpected error occurred in the api backend: %s' % err_msg + "exception": "StorageAPIError", + "reason": "An unexpected error occurred in the api backend: %s" % err_msg, } @given(new_origin(), visit_dates(3), new_snapshots(3)) -def test_api_lookup_origin_visits(api_client, archive_data, new_origin, - visit_dates, new_snapshots): +def test_api_lookup_origin_visits( + api_client, archive_data, new_origin, visit_dates, new_snapshots +): archive_data.origin_add_one(new_origin) for i, visit_date in enumerate(visit_dates): origin_visit = archive_data.origin_visit_add( - new_origin.url, visit_date, type='git') + new_origin.url, visit_date, type="git" + ) archive_data.snapshot_add([new_snapshots[i]]) archive_data.origin_visit_update( - new_origin.url, origin_visit.visit, - status='full', - snapshot=new_snapshots[i].id) + new_origin.url, + origin_visit.visit, + status="full", + snapshot=new_snapshots[i].id, + ) all_visits = list(reversed(get_origin_visits(new_origin.to_dict()))) for last_visit, expected_visits in ( - (None, all_visits[:2]), - (all_visits[1]['visit'], all_visits[2:])): + (None, all_visits[:2]), + (all_visits[1]["visit"], all_visits[2:]), + ): - url = reverse('api-1-origin-visits', - url_args={'origin_url': new_origin.url}, - query_params={'per_page': 2, - 'last_visit': last_visit}) + url = reverse( + "api-1-origin-visits", + url_args={"origin_url": new_origin.url}, + query_params={"per_page": 2, "last_visit": last_visit}, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" for i in range(len(expected_visits)): expected_visits[i] = enrich_origin_visit( - expected_visits[i], with_origin_link=False, - with_origin_visit_link=True, request=rv.wsgi_request) + expected_visits[i], + with_origin_link=False, + with_origin_visit_link=True, + request=rv.wsgi_request, + ) assert rv.data == expected_visits @given(new_origin(), visit_dates(3), new_snapshots(3)) -def test_api_lookup_origin_visits_by_id(api_client, archive_data, new_origin, - visit_dates, new_snapshots): +def test_api_lookup_origin_visits_by_id( + api_client, archive_data, new_origin, visit_dates, new_snapshots +): archive_data.origin_add_one(new_origin) for i, visit_date in enumerate(visit_dates): origin_visit = archive_data.origin_visit_add( - new_origin.url, visit_date, type='git') + new_origin.url, visit_date, type="git" + ) archive_data.snapshot_add([new_snapshots[i]]) archive_data.origin_visit_update( - new_origin.url, origin_visit.visit, - status='full', - snapshot=new_snapshots[i].id) + new_origin.url, + origin_visit.visit, + status="full", + snapshot=new_snapshots[i].id, + ) all_visits = list(reversed(get_origin_visits(new_origin.to_dict()))) for last_visit, expected_visits in ( - (None, all_visits[:2]), - (all_visits[1]['visit'], all_visits[2:4])): + (None, all_visits[:2]), + (all_visits[1]["visit"], all_visits[2:4]), + ): - url = reverse('api-1-origin-visits', - url_args={'origin_url': new_origin.url}, - query_params={'per_page': 2, - 'last_visit': last_visit}) + url = reverse( + "api-1-origin-visits", + url_args={"origin_url": new_origin.url}, + query_params={"per_page": 2, "last_visit": last_visit}, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" for i in range(len(expected_visits)): expected_visits[i] = enrich_origin_visit( - expected_visits[i], with_origin_link=False, - with_origin_visit_link=True, request=rv.wsgi_request) + expected_visits[i], + with_origin_link=False, + with_origin_visit_link=True, + request=rv.wsgi_request, + ) assert rv.data == expected_visits @given(new_origin(), visit_dates(3), new_snapshots(3)) -def test_api_lookup_origin_visit(api_client, archive_data, new_origin, - visit_dates, new_snapshots): +def test_api_lookup_origin_visit( + api_client, archive_data, new_origin, visit_dates, new_snapshots +): archive_data.origin_add_one(new_origin) for i, visit_date in enumerate(visit_dates): origin_visit = archive_data.origin_visit_add( - new_origin.url, visit_date, type='git') + new_origin.url, visit_date, type="git" + ) visit_id = origin_visit.visit archive_data.snapshot_add([new_snapshots[i]]) archive_data.origin_visit_update( - new_origin.url, visit_id, - status='full', - snapshot=new_snapshots[i].id) - url = reverse('api-1-origin-visit', - url_args={'origin_url': new_origin.url, - 'visit_id': visit_id}) + new_origin.url, visit_id, status="full", snapshot=new_snapshots[i].id + ) + url = reverse( + "api-1-origin-visit", + url_args={"origin_url": new_origin.url, "visit_id": visit_id}, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" - expected_visit = archive_data.origin_visit_get_by( - new_origin.url, visit_id) + expected_visit = archive_data.origin_visit_get_by(new_origin.url, visit_id) expected_visit = enrich_origin_visit( - expected_visit, with_origin_link=True, - with_origin_visit_link=False, request=rv.wsgi_request) + expected_visit, + with_origin_link=True, + with_origin_visit_link=False, + request=rv.wsgi_request, + ) assert rv.data == expected_visit @given(new_origin()) -def test_api_lookup_origin_visit_latest_no_visit(api_client, archive_data, - new_origin): +def test_api_lookup_origin_visit_latest_no_visit(api_client, archive_data, new_origin): archive_data.origin_add_one(new_origin) - url = reverse('api-1-origin-visit-latest', - url_args={'origin_url': new_origin.url}) + url = reverse("api-1-origin-visit-latest", url_args={"origin_url": new_origin.url}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': 'No visit for origin %s found' % new_origin.url + "exception": "NotFoundExc", + "reason": "No visit for origin %s found" % new_origin.url, } @given(new_origin(), visit_dates(2), new_snapshots(1)) -def test_api_lookup_origin_visit_latest(api_client, archive_data, new_origin, - visit_dates, new_snapshots): +def test_api_lookup_origin_visit_latest( + api_client, archive_data, new_origin, visit_dates, new_snapshots +): archive_data.origin_add_one(new_origin) visit_dates.sort() visit_ids = [] for i, visit_date in enumerate(visit_dates): origin_visit = archive_data.origin_visit_add( - new_origin.url, visit_date, type='git') + new_origin.url, visit_date, type="git" + ) visit_ids.append(origin_visit.visit) archive_data.snapshot_add([new_snapshots[0]]) archive_data.origin_visit_update( - new_origin.url, visit_ids[0], - status='full', - snapshot=new_snapshots[0].id) + new_origin.url, visit_ids[0], status="full", snapshot=new_snapshots[0].id + ) - url = reverse('api-1-origin-visit-latest', - url_args={'origin_url': new_origin.url}) + url = reverse("api-1-origin-visit-latest", url_args={"origin_url": new_origin.url}) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" - expected_visit = archive_data.origin_visit_get_by( - new_origin.url, visit_ids[1]) + expected_visit = archive_data.origin_visit_get_by(new_origin.url, visit_ids[1]) expected_visit = enrich_origin_visit( - expected_visit, with_origin_link=True, - with_origin_visit_link=False, request=rv.wsgi_request) + expected_visit, + with_origin_link=True, + with_origin_visit_link=False, + request=rv.wsgi_request, + ) assert rv.data == expected_visit @given(new_origin(), visit_dates(2), new_snapshots(1)) -def test_api_lookup_origin_visit_latest_with_snapshot(api_client, archive_data, - new_origin, visit_dates, - new_snapshots): +def test_api_lookup_origin_visit_latest_with_snapshot( + api_client, archive_data, new_origin, visit_dates, new_snapshots +): archive_data.origin_add_one(new_origin) visit_dates.sort() visit_ids = [] for i, visit_date in enumerate(visit_dates): origin_visit = archive_data.origin_visit_add( - new_origin.url, visit_date, type='git') + new_origin.url, visit_date, type="git" + ) visit_ids.append(origin_visit.visit) archive_data.snapshot_add([new_snapshots[0]]) archive_data.origin_visit_update( - new_origin.url, visit_ids[0], - status='full', - snapshot=new_snapshots[0].id) + new_origin.url, visit_ids[0], status="full", snapshot=new_snapshots[0].id + ) - url = reverse('api-1-origin-visit-latest', - url_args={'origin_url': new_origin.url}, - query_params={'require_snapshot': True}) + url = reverse( + "api-1-origin-visit-latest", + url_args={"origin_url": new_origin.url}, + query_params={"require_snapshot": True}, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" - expected_visit = archive_data.origin_visit_get_by( - new_origin.url, visit_ids[0]) + expected_visit = archive_data.origin_visit_get_by(new_origin.url, visit_ids[0]) expected_visit = enrich_origin_visit( - expected_visit, with_origin_link=True, - with_origin_visit_link=False, request=rv.wsgi_request) + expected_visit, + with_origin_link=True, + with_origin_visit_link=False, + request=rv.wsgi_request, + ) assert rv.data == expected_visit @@ -305,73 +318,69 @@ all_visits = list(reversed(get_origin_visits(origin))) - max_visit_id = max([v['visit'] for v in all_visits]) + max_visit_id = max([v["visit"] for v in all_visits]) - url = reverse('api-1-origin-visit', - url_args={'origin_url': origin['url'], - 'visit_id': max_visit_id + 1}) + url = reverse( + "api-1-origin-visit", + url_args={"origin_url": origin["url"], "visit_id": max_visit_id + 1}, + ) rv = api_client.get(url) assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': 'Origin %s or its visit with id %s not found!' % - (origin['url'], max_visit_id+1) + "exception": "NotFoundExc", + "reason": "Origin %s or its visit with id %s not found!" + % (origin["url"], max_visit_id + 1), } def test_api_origins(api_client, archive_data): origins = list(archive_data.origin_get_range(0, 10000)) - origin_urls = {origin['url'] for origin in origins} + origin_urls = {origin["url"] for origin in origins} # Get only one - url = reverse('api-1-origins', - query_params={'origin_count': 1}) + url = reverse("api-1-origins", query_params={"origin_count": 1}) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert len(rv.data) == 1 - assert {origin['url'] for origin in rv.data} <= origin_urls + assert {origin["url"] for origin in rv.data} <= origin_urls # Get all - url = reverse('api-1-origins', - query_params={'origin_count': len(origins)}) + url = reverse("api-1-origins", query_params={"origin_count": len(origins)}) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert len(rv.data) == len(origins) - assert {origin['url'] for origin in rv.data} == origin_urls + assert {origin["url"] for origin in rv.data} == origin_urls # Get "all + 10" - url = reverse('api-1-origins', - query_params={'origin_count': len(origins)+10}) + url = reverse("api-1-origins", query_params={"origin_count": len(origins) + 10}) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert len(rv.data) == len(origins) - assert {origin['url'] for origin in rv.data} == origin_urls + assert {origin["url"] for origin in rv.data} == origin_urls -@pytest.mark.parametrize('origin_count', [1, 2, 10, 100]) +@pytest.mark.parametrize("origin_count", [1, 2, 10, 100]) def test_api_origins_scroll(api_client, archive_data, origin_count): origins = list(archive_data.origin_get_range(0, 10000)) - origin_urls = {origin['url'] for origin in origins} + origin_urls = {origin["url"] for origin in origins} - url = reverse('api-1-origins', - query_params={'origin_count': origin_count}) + url = reverse("api-1-origins", query_params={"origin_count": origin_count}) results = _scroll_results(api_client, url) assert len(results) == len(origins) - assert {origin['url'] for origin in results} == origin_urls + assert {origin["url"] for origin in results} == origin_urls @given(origin()) def test_api_origin_by_url(api_client, archive_data, origin): - url = reverse('api-1-origin', - url_args={'origin_url': origin['url']}) + url = reverse("api-1-origin", url_args={"origin_url": origin["url"]}) rv = api_client.get(url) expected_origin = archive_data.origin_get(origin) @@ -379,298 +388,321 @@ expected_origin = enrich_origin(expected_origin, rv.wsgi_request) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == expected_origin @given(new_origin()) def test_api_origin_not_found(api_client, new_origin): - url = reverse('api-1-origin', - url_args={'origin_url': new_origin.url}) + url = reverse("api-1-origin", url_args={"origin_url": new_origin.url}) rv = api_client.get(url) assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': 'Origin with url %s not found!' % new_origin.url + "exception": "NotFoundExc", + "reason": "Origin with url %s not found!" % new_origin.url, } -@pytest.mark.parametrize('backend', ['swh-search', 'swh-storage']) +@pytest.mark.parametrize("backend", ["swh-search", "swh-storage"]) def test_api_origin_search(api_client, mocker, backend): - if backend != 'swh-search': + if backend != "swh-search": # equivalent to not configuring search in the config - mocker.patch('swh.web.common.service.search', None) + mocker.patch("swh.web.common.service.search", None) expected_origins = { - 'https://github.com/wcoder/highlightjs-line-numbers.js', - 'https://github.com/memononen/libtess2', + "https://github.com/wcoder/highlightjs-line-numbers.js", + "https://github.com/memononen/libtess2", } # Search for 'github.com', get only one - url = reverse('api-1-origin-search', - url_args={'url_pattern': 'github.com'}, - query_params={'limit': 1}) + url = reverse( + "api-1-origin-search", + url_args={"url_pattern": "github.com"}, + query_params={"limit": 1}, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert len(rv.data) == 1 - assert {origin['url'] for origin in rv.data} <= expected_origins + assert {origin["url"] for origin in rv.data} <= expected_origins # Search for 'github.com', get all - url = reverse('api-1-origin-search', - url_args={'url_pattern': 'github.com'}, - query_params={'limit': 2}) + url = reverse( + "api-1-origin-search", + url_args={"url_pattern": "github.com"}, + query_params={"limit": 2}, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' - assert {origin['url'] for origin in rv.data} == expected_origins + assert rv["Content-Type"] == "application/json" + assert {origin["url"] for origin in rv.data} == expected_origins # Search for 'github.com', get more than available - url = reverse('api-1-origin-search', - url_args={'url_pattern': 'github.com'}, - query_params={'limit': 10}) + url = reverse( + "api-1-origin-search", + url_args={"url_pattern": "github.com"}, + query_params={"limit": 10}, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' - assert {origin['url'] for origin in rv.data} == expected_origins + assert rv["Content-Type"] == "application/json" + assert {origin["url"] for origin in rv.data} == expected_origins -@pytest.mark.parametrize('backend', ['swh-search', 'swh-storage']) +@pytest.mark.parametrize("backend", ["swh-search", "swh-storage"]) def test_api_origin_search_words(api_client, mocker, backend): - if backend != 'swh-search': + if backend != "swh-search": # equivalent to not configuring search in the config - mocker.patch('swh.web.common.service.search', None) + mocker.patch("swh.web.common.service.search", None) expected_origins = { - 'https://github.com/wcoder/highlightjs-line-numbers.js', - 'https://github.com/memononen/libtess2', + "https://github.com/wcoder/highlightjs-line-numbers.js", + "https://github.com/memononen/libtess2", } - url = reverse('api-1-origin-search', - url_args={'url_pattern': 'github com'}, - query_params={'limit': 2}) + url = reverse( + "api-1-origin-search", + url_args={"url_pattern": "github com"}, + query_params={"limit": 2}, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' - assert {origin['url'] for origin in rv.data} == expected_origins + assert rv["Content-Type"] == "application/json" + assert {origin["url"] for origin in rv.data} == expected_origins - url = reverse('api-1-origin-search', - url_args={'url_pattern': 'com github'}, - query_params={'limit': 2}) + url = reverse( + "api-1-origin-search", + url_args={"url_pattern": "com github"}, + query_params={"limit": 2}, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' - assert {origin['url'] for origin in rv.data} == expected_origins + assert rv["Content-Type"] == "application/json" + assert {origin["url"] for origin in rv.data} == expected_origins - url = reverse('api-1-origin-search', - url_args={'url_pattern': 'memononen libtess2'}, - query_params={'limit': 2}) + url = reverse( + "api-1-origin-search", + url_args={"url_pattern": "memononen libtess2"}, + query_params={"limit": 2}, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert len(rv.data) == 1 - assert {origin['url'] for origin in rv.data} \ - == {'https://github.com/memononen/libtess2'} + assert {origin["url"] for origin in rv.data} == { + "https://github.com/memononen/libtess2" + } - url = reverse('api-1-origin-search', - url_args={'url_pattern': 'libtess2 memononen'}, - query_params={'limit': 2}) + url = reverse( + "api-1-origin-search", + url_args={"url_pattern": "libtess2 memononen"}, + query_params={"limit": 2}, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert len(rv.data) == 1 - assert {origin['url'] for origin in rv.data} \ - == {'https://github.com/memononen/libtess2'} + assert {origin["url"] for origin in rv.data} == { + "https://github.com/memononen/libtess2" + } -@pytest.mark.parametrize('backend', ['swh-search', 'swh-storage']) -@pytest.mark.parametrize('limit', [1, 2, 3, 10]) -def test_api_origin_search_scroll( - api_client, archive_data, mocker, limit, backend): +@pytest.mark.parametrize("backend", ["swh-search", "swh-storage"]) +@pytest.mark.parametrize("limit", [1, 2, 3, 10]) +def test_api_origin_search_scroll(api_client, archive_data, mocker, limit, backend): - if backend != 'swh-search': + if backend != "swh-search": # equivalent to not configuring search in the config - mocker.patch('swh.web.common.service.search', None) + mocker.patch("swh.web.common.service.search", None) expected_origins = { - 'https://github.com/wcoder/highlightjs-line-numbers.js', - 'https://github.com/memononen/libtess2', + "https://github.com/wcoder/highlightjs-line-numbers.js", + "https://github.com/memononen/libtess2", } - url = reverse('api-1-origin-search', - url_args={'url_pattern': 'github.com'}, - query_params={'limit': limit}) + url = reverse( + "api-1-origin-search", + url_args={"url_pattern": "github.com"}, + query_params={"limit": limit}, + ) results = _scroll_results(api_client, url) - assert {origin['url'] for origin in results} == expected_origins + assert {origin["url"] for origin in results} == expected_origins -@pytest.mark.parametrize('backend', ['swh-search', 'swh-storage']) -def test_api_origin_search_limit( - api_client, archive_data, tests_data, mocker, backend): - if backend == 'swh-search': - tests_data['search'].origin_update([ - {'url': 'http://foobar/{}'.format(i)} - for i in range(2000) - ]) +@pytest.mark.parametrize("backend", ["swh-search", "swh-storage"]) +def test_api_origin_search_limit(api_client, archive_data, tests_data, mocker, backend): + if backend == "swh-search": + tests_data["search"].origin_update( + [{"url": "http://foobar/{}".format(i)} for i in range(2000)] + ) else: # equivalent to not configuring search in the config - mocker.patch('swh.web.common.service.search', None) + mocker.patch("swh.web.common.service.search", None) - archive_data.origin_add([ - Origin(url='http://foobar/{}'.format(i)) - for i in range(2000) - ]) + archive_data.origin_add( + [Origin(url="http://foobar/{}".format(i)) for i in range(2000)] + ) - url = reverse('api-1-origin-search', - url_args={'url_pattern': 'foobar'}, - query_params={'limit': 1050}) + url = reverse( + "api-1-origin-search", + url_args={"url_pattern": "foobar"}, + query_params={"limit": 1050}, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert len(rv.data) == 1000 @given(origin()) def test_api_origin_metadata_search(api_client, mocker, origin): - mock_idx_storage = mocker.patch('swh.web.common.service.idx_storage') + mock_idx_storage = mocker.patch("swh.web.common.service.idx_storage") oimsft = mock_idx_storage.origin_intrinsic_metadata_search_fulltext - oimsft.side_effect = lambda conjunction, limit: [{ - 'from_revision': ( - b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed ' - b'\xf2U\xfa\x05B8'), - 'metadata': {'author': 'Jane Doe'}, - 'id': origin['url'], - 'tool': { - 'configuration': { - 'context': ['NpmMapping', 'CodemetaMapping'], - 'type': 'local' + oimsft.side_effect = lambda conjunction, limit: [ + { + "from_revision": ( + b"p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed " b"\xf2U\xfa\x05B8" + ), + "metadata": {"author": "Jane Doe"}, + "id": origin["url"], + "tool": { + "configuration": { + "context": ["NpmMapping", "CodemetaMapping"], + "type": "local", + }, + "id": 3, + "name": "swh-metadata-detector", + "version": "0.0.1", }, - 'id': 3, - 'name': 'swh-metadata-detector', - 'version': '0.0.1' } - }] + ] - url = reverse('api-1-origin-metadata-search', - query_params={'fulltext': 'Jane Doe'}) + url = reverse("api-1-origin-metadata-search", query_params={"fulltext": "Jane Doe"}) rv = api_client.get(url) assert rv.status_code == 200, rv.content - assert rv['Content-Type'] == 'application/json' - expected_data = [{ - 'url': origin['url'], - 'metadata': { - 'metadata': {'author': 'Jane Doe'}, - 'from_revision': ( - '7026b7c1a2af56521e951c01ed20f255fa054238'), - 'tool': { - 'configuration': { - 'context': ['NpmMapping', 'CodemetaMapping'], - 'type': 'local' + assert rv["Content-Type"] == "application/json" + expected_data = [ + { + "url": origin["url"], + "metadata": { + "metadata": {"author": "Jane Doe"}, + "from_revision": ("7026b7c1a2af56521e951c01ed20f255fa054238"), + "tool": { + "configuration": { + "context": ["NpmMapping", "CodemetaMapping"], + "type": "local", + }, + "id": 3, + "name": "swh-metadata-detector", + "version": "0.0.1", }, - 'id': 3, - 'name': 'swh-metadata-detector', - 'version': '0.0.1', - } + }, } - }] + ] assert rv.data == expected_data - oimsft.assert_called_with(conjunction=['Jane Doe'], limit=70) + oimsft.assert_called_with(conjunction=["Jane Doe"], limit=70) @given(origin()) def test_api_origin_metadata_search_limit(api_client, mocker, origin): - mock_idx_storage = mocker.patch('swh.web.common.service.idx_storage') + mock_idx_storage = mocker.patch("swh.web.common.service.idx_storage") oimsft = mock_idx_storage.origin_intrinsic_metadata_search_fulltext - oimsft.side_effect = lambda conjunction, limit: [{ - 'from_revision': ( - b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed ' - b'\xf2U\xfa\x05B8'), - 'metadata': {'author': 'Jane Doe'}, - 'id': origin['url'], - 'tool': { - 'configuration': { - 'context': ['NpmMapping', 'CodemetaMapping'], - 'type': 'local' + oimsft.side_effect = lambda conjunction, limit: [ + { + "from_revision": ( + b"p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed " b"\xf2U\xfa\x05B8" + ), + "metadata": {"author": "Jane Doe"}, + "id": origin["url"], + "tool": { + "configuration": { + "context": ["NpmMapping", "CodemetaMapping"], + "type": "local", + }, + "id": 3, + "name": "swh-metadata-detector", + "version": "0.0.1", }, - 'id': 3, - 'name': 'swh-metadata-detector', - 'version': '0.0.1' } - }] + ] - url = reverse('api-1-origin-metadata-search', - query_params={'fulltext': 'Jane Doe'}) + url = reverse("api-1-origin-metadata-search", query_params={"fulltext": "Jane Doe"}) rv = api_client.get(url) assert rv.status_code == 200, rv.content - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert len(rv.data) == 1 - oimsft.assert_called_with(conjunction=['Jane Doe'], limit=70) + oimsft.assert_called_with(conjunction=["Jane Doe"], limit=70) - url = reverse('api-1-origin-metadata-search', - query_params={'fulltext': 'Jane Doe', - 'limit': 10}) + url = reverse( + "api-1-origin-metadata-search", + query_params={"fulltext": "Jane Doe", "limit": 10}, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.content - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert len(rv.data) == 1 - oimsft.assert_called_with(conjunction=['Jane Doe'], limit=10) + oimsft.assert_called_with(conjunction=["Jane Doe"], limit=10) - url = reverse('api-1-origin-metadata-search', - query_params={'fulltext': 'Jane Doe', - 'limit': 987}) + url = reverse( + "api-1-origin-metadata-search", + query_params={"fulltext": "Jane Doe", "limit": 987}, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.content - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert len(rv.data) == 1 - oimsft.assert_called_with(conjunction=['Jane Doe'], limit=100) + oimsft.assert_called_with(conjunction=["Jane Doe"], limit=100) @given(origin()) def test_api_origin_intrinsic_metadata(api_client, mocker, origin): - mock_idx_storage = mocker.patch('swh.web.common.service.idx_storage') + mock_idx_storage = mocker.patch("swh.web.common.service.idx_storage") oimg = mock_idx_storage.origin_intrinsic_metadata_get - oimg.side_effect = lambda origin_urls: [{ - 'from_revision': ( - b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed ' - b'\xf2U\xfa\x05B8'), - 'metadata': {'author': 'Jane Doe'}, - 'id': origin['url'], - 'tool': { - 'configuration': { - 'context': ['NpmMapping', 'CodemetaMapping'], - 'type': 'local' + oimg.side_effect = lambda origin_urls: [ + { + "from_revision": ( + b"p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed " b"\xf2U\xfa\x05B8" + ), + "metadata": {"author": "Jane Doe"}, + "id": origin["url"], + "tool": { + "configuration": { + "context": ["NpmMapping", "CodemetaMapping"], + "type": "local", + }, + "id": 3, + "name": "swh-metadata-detector", + "version": "0.0.1", }, - 'id': 3, - 'name': 'swh-metadata-detector', - 'version': '0.0.1' } - }] + ] - url = reverse('api-origin-intrinsic-metadata', - url_args={'origin_url': origin['url']}) + url = reverse( + "api-origin-intrinsic-metadata", url_args={"origin_url": origin["url"]} + ) rv = api_client.get(url) - oimg.assert_called_once_with([origin['url']]) + oimg.assert_called_once_with([origin["url"]]) assert rv.status_code == 200, rv.content - assert rv['Content-Type'] == 'application/json' - expected_data = {'author': 'Jane Doe'} + assert rv["Content-Type"] == "application/json" + expected_data = {"author": "Jane Doe"} assert rv.data == expected_data def test_api_origin_metadata_search_invalid(api_client, mocker): - mock_idx_storage = mocker.patch('swh.web.common.service.idx_storage') - url = reverse('api-1-origin-metadata-search') + mock_idx_storage = mocker.patch("swh.web.common.service.idx_storage") + url = reverse("api-1-origin-metadata-search") rv = api_client.get(url) assert rv.status_code == 400, rv.content diff --git a/swh/web/tests/api/views/test_origin_save.py b/swh/web/tests/api/views/test_origin_save.py --- a/swh/web/tests/api/views/test_origin_save.py +++ b/swh/web/tests/api/views/test_origin_save.py @@ -10,13 +10,18 @@ from swh.web.common.utils import reverse from swh.web.common.models import ( - SaveUnauthorizedOrigin, SaveOriginRequest, - SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, - SAVE_REQUEST_PENDING + SaveUnauthorizedOrigin, + SaveOriginRequest, + SAVE_REQUEST_ACCEPTED, + SAVE_REQUEST_REJECTED, + SAVE_REQUEST_PENDING, ) from swh.web.common.models import ( - SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, - SAVE_TASK_SCHEDULED, SAVE_TASK_FAILED, SAVE_TASK_SUCCEED + SAVE_TASK_NOT_CREATED, + SAVE_TASK_NOT_YET_SCHEDULED, + SAVE_TASK_SCHEDULED, + SAVE_TASK_FAILED, + SAVE_TASK_SUCCEED, ) pytestmark = pytest.mark.django_db @@ -24,120 +29,121 @@ @pytest.fixture(autouse=True) def populated_db(): - SaveUnauthorizedOrigin.objects.create( - url='https://github.com/user/illegal_repo') - SaveUnauthorizedOrigin.objects.create( - url='https://gitlab.com/user_to_exclude') + SaveUnauthorizedOrigin.objects.create(url="https://github.com/user/illegal_repo") + SaveUnauthorizedOrigin.objects.create(url="https://gitlab.com/user_to_exclude") def test_invalid_visit_type(api_client): - url = reverse('api-1-save-origin', - url_args={'visit_type': 'foo', - 'origin_url': 'https://github.com/torvalds/linux'}) + url = reverse( + "api-1-save-origin", + url_args={ + "visit_type": "foo", + "origin_url": "https://github.com/torvalds/linux", + }, + ) response = api_client.post(url) assert response.status_code == 400 def test_invalid_origin_url(api_client): - url = reverse('api-1-save-origin', - url_args={'visit_type': 'git', - 'origin_url': 'bar'}) + url = reverse( + "api-1-save-origin", url_args={"visit_type": "git", "origin_url": "bar"} + ) response = api_client.post(url) assert response.status_code == 400 -def check_created_save_request_status(api_client, mocker, origin_url, - scheduler_task_status, - expected_request_status, - expected_task_status=None, - visit_date=None): +def check_created_save_request_status( + api_client, + mocker, + origin_url, + scheduler_task_status, + expected_request_status, + expected_task_status=None, + visit_date=None, +): - mock_scheduler = mocker.patch('swh.web.common.origin_save.scheduler') + mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler") if not scheduler_task_status: mock_scheduler.get_tasks.return_value = [] else: - mock_scheduler.get_tasks.return_value = [{ - 'priority': 'high', - 'policy': 'oneshot', - 'type': 'load-git', - 'arguments': { - 'kwargs': { - 'repo_url': origin_url - }, - 'args': [] - }, - 'status': scheduler_task_status, - 'id': 1, - }] - - mock_scheduler.create_tasks.return_value = [{ - 'priority': 'high', - 'policy': 'oneshot', - 'type': 'load-git', - 'arguments': { - 'kwargs': { - 'repo_url': origin_url - }, - 'args': [] - }, - 'status': 'next_run_not_scheduled', - 'id': 1, - }] - - url = reverse('api-1-save-origin', - url_args={'visit_type': 'git', - 'origin_url': origin_url}) - - mock_visit_date = mocker.patch(('swh.web.common.origin_save.' - '_get_visit_info_for_save_request')) + mock_scheduler.get_tasks.return_value = [ + { + "priority": "high", + "policy": "oneshot", + "type": "load-git", + "arguments": {"kwargs": {"repo_url": origin_url}, "args": []}, + "status": scheduler_task_status, + "id": 1, + } + ] + + mock_scheduler.create_tasks.return_value = [ + { + "priority": "high", + "policy": "oneshot", + "type": "load-git", + "arguments": {"kwargs": {"repo_url": origin_url}, "args": []}, + "status": "next_run_not_scheduled", + "id": 1, + } + ] + + url = reverse( + "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url} + ) + + mock_visit_date = mocker.patch( + ("swh.web.common.origin_save." "_get_visit_info_for_save_request") + ) mock_visit_date.return_value = (visit_date, None) response = api_client.post(url) if expected_request_status != SAVE_REQUEST_REJECTED: assert response.status_code == 200, response.data - assert (response.data['save_request_status'] == - expected_request_status) - assert response.data['save_task_status'] == expected_task_status + assert response.data["save_request_status"] == expected_request_status + assert response.data["save_task_status"] == expected_task_status else: assert response.status_code == 403, response.data -def check_save_request_status(api_client, mocker, origin_url, - expected_request_status, - expected_task_status, - scheduler_task_status='next_run_not_scheduled', - visit_date=None): - mock_scheduler = mocker.patch('swh.web.common.origin_save.scheduler') - mock_scheduler.get_tasks.return_value = [{ - 'priority': 'high', - 'policy': 'oneshot', - 'type': 'load-git', - 'arguments': { - 'kwargs': { - 'repo_url': origin_url - }, - 'args': [] - }, - 'status': scheduler_task_status, - 'id': 1, - }] - - url = reverse('api-1-save-origin', - url_args={'visit_type': 'git', - 'origin_url': origin_url}) - - mock_visit_date = mocker.patch(('swh.web.common.origin_save.' - '_get_visit_info_for_save_request')) +def check_save_request_status( + api_client, + mocker, + origin_url, + expected_request_status, + expected_task_status, + scheduler_task_status="next_run_not_scheduled", + visit_date=None, +): + mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler") + mock_scheduler.get_tasks.return_value = [ + { + "priority": "high", + "policy": "oneshot", + "type": "load-git", + "arguments": {"kwargs": {"repo_url": origin_url}, "args": []}, + "status": scheduler_task_status, + "id": 1, + } + ] + + url = reverse( + "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url} + ) + + mock_visit_date = mocker.patch( + ("swh.web.common.origin_save." "_get_visit_info_for_save_request") + ) mock_visit_date.return_value = (visit_date, None) response = api_client.get(url) assert response.status_code == 200, response.data save_request_data = response.data[0] - assert (save_request_data['save_request_status'] == - expected_request_status) - assert save_request_data['save_task_status'] == expected_task_status + assert save_request_data["save_request_status"] == expected_request_status + assert save_request_data["save_task_status"] == expected_task_status # Check that save task status is still available when # the scheduler task has been archived @@ -145,107 +151,178 @@ response = api_client.get(url) assert response.status_code == 200 save_request_data = response.data[0] - assert save_request_data['save_task_status'] == expected_task_status + assert save_request_data["save_task_status"] == expected_task_status def test_save_request_rejected(api_client, mocker): - origin_url = 'https://github.com/user/illegal_repo' - check_created_save_request_status(api_client, mocker, origin_url, - None, SAVE_REQUEST_REJECTED) - check_save_request_status(api_client, mocker, origin_url, - SAVE_REQUEST_REJECTED, SAVE_TASK_NOT_CREATED) + origin_url = "https://github.com/user/illegal_repo" + check_created_save_request_status( + api_client, mocker, origin_url, None, SAVE_REQUEST_REJECTED + ) + check_save_request_status( + api_client, mocker, origin_url, SAVE_REQUEST_REJECTED, SAVE_TASK_NOT_CREATED + ) def test_save_request_pending(api_client, mocker): - origin_url = 'https://unkwownforge.com/user/repo' - check_created_save_request_status(api_client, mocker, - origin_url, None, SAVE_REQUEST_PENDING, - SAVE_TASK_NOT_CREATED) - check_save_request_status(api_client, mocker, origin_url, - SAVE_REQUEST_PENDING, SAVE_TASK_NOT_CREATED) + origin_url = "https://unkwownforge.com/user/repo" + check_created_save_request_status( + api_client, + mocker, + origin_url, + None, + SAVE_REQUEST_PENDING, + SAVE_TASK_NOT_CREATED, + ) + check_save_request_status( + api_client, mocker, origin_url, SAVE_REQUEST_PENDING, SAVE_TASK_NOT_CREATED + ) def test_save_request_succeed(api_client, mocker): - origin_url = 'https://github.com/Kitware/CMake' - check_created_save_request_status(api_client, mocker, origin_url, - None, SAVE_REQUEST_ACCEPTED, - SAVE_TASK_NOT_YET_SCHEDULED) - check_save_request_status(api_client, mocker, origin_url, - SAVE_REQUEST_ACCEPTED, SAVE_TASK_SCHEDULED, - scheduler_task_status='next_run_scheduled') - check_save_request_status(api_client, mocker, origin_url, - SAVE_REQUEST_ACCEPTED, SAVE_TASK_SUCCEED, - scheduler_task_status='completed', - visit_date=None) + origin_url = "https://github.com/Kitware/CMake" + check_created_save_request_status( + api_client, + mocker, + origin_url, + None, + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_NOT_YET_SCHEDULED, + ) + check_save_request_status( + api_client, + mocker, + origin_url, + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_SCHEDULED, + scheduler_task_status="next_run_scheduled", + ) + check_save_request_status( + api_client, + mocker, + origin_url, + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_SUCCEED, + scheduler_task_status="completed", + visit_date=None, + ) visit_date = datetime.now(tz=timezone.utc) + timedelta(hours=1) - check_save_request_status(api_client, mocker, origin_url, - SAVE_REQUEST_ACCEPTED, SAVE_TASK_SUCCEED, - scheduler_task_status='completed', - visit_date=visit_date) + check_save_request_status( + api_client, + mocker, + origin_url, + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_SUCCEED, + scheduler_task_status="completed", + visit_date=visit_date, + ) def test_save_request_failed(api_client, mocker): - origin_url = 'https://gitlab.com/inkscape/inkscape' - check_created_save_request_status(api_client, mocker, origin_url, - None, SAVE_REQUEST_ACCEPTED, - SAVE_TASK_NOT_YET_SCHEDULED) - check_save_request_status(api_client, mocker, origin_url, - SAVE_REQUEST_ACCEPTED, SAVE_TASK_SCHEDULED, - scheduler_task_status='next_run_scheduled') - check_save_request_status(api_client, mocker, origin_url, - SAVE_REQUEST_ACCEPTED, SAVE_TASK_FAILED, - scheduler_task_status='disabled') + origin_url = "https://gitlab.com/inkscape/inkscape" + check_created_save_request_status( + api_client, + mocker, + origin_url, + None, + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_NOT_YET_SCHEDULED, + ) + check_save_request_status( + api_client, + mocker, + origin_url, + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_SCHEDULED, + scheduler_task_status="next_run_scheduled", + ) + check_save_request_status( + api_client, + mocker, + origin_url, + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_FAILED, + scheduler_task_status="disabled", + ) def test_create_save_request_only_when_needed(api_client, mocker): - origin_url = 'https://github.com/webpack/webpack' - SaveOriginRequest.objects.create(visit_type='git', origin_url=origin_url, - status=SAVE_REQUEST_ACCEPTED, - loading_task_id=56) - - check_created_save_request_status(api_client, mocker, origin_url, - 'next_run_not_scheduled', - SAVE_REQUEST_ACCEPTED, - SAVE_TASK_NOT_YET_SCHEDULED) - - sors = list(SaveOriginRequest.objects.filter(visit_type='git', - origin_url=origin_url)) + origin_url = "https://github.com/webpack/webpack" + SaveOriginRequest.objects.create( + visit_type="git", + origin_url=origin_url, + status=SAVE_REQUEST_ACCEPTED, + loading_task_id=56, + ) + + check_created_save_request_status( + api_client, + mocker, + origin_url, + "next_run_not_scheduled", + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_NOT_YET_SCHEDULED, + ) + + sors = list( + SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url) + ) assert len(sors) == 1 - check_created_save_request_status(api_client, mocker, origin_url, - 'next_run_scheduled', - SAVE_REQUEST_ACCEPTED, - SAVE_TASK_SCHEDULED) - sors = list(SaveOriginRequest.objects.filter(visit_type='git', - origin_url=origin_url)) + check_created_save_request_status( + api_client, + mocker, + origin_url, + "next_run_scheduled", + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_SCHEDULED, + ) + sors = list( + SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url) + ) assert len(sors) == 1 visit_date = datetime.now(tz=timezone.utc) + timedelta(hours=1) - check_created_save_request_status(api_client, mocker, origin_url, - 'completed', SAVE_REQUEST_ACCEPTED, - SAVE_TASK_NOT_YET_SCHEDULED, - visit_date=visit_date) - sors = list(SaveOriginRequest.objects.filter(visit_type='git', - origin_url=origin_url)) + check_created_save_request_status( + api_client, + mocker, + origin_url, + "completed", + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_NOT_YET_SCHEDULED, + visit_date=visit_date, + ) + sors = list( + SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url) + ) assert len(sors) == 2 - check_created_save_request_status(api_client, mocker, origin_url, - 'disabled', SAVE_REQUEST_ACCEPTED, - SAVE_TASK_NOT_YET_SCHEDULED) - sors = list(SaveOriginRequest.objects.filter(visit_type='git', - origin_url=origin_url)) + check_created_save_request_status( + api_client, + mocker, + origin_url, + "disabled", + SAVE_REQUEST_ACCEPTED, + SAVE_TASK_NOT_YET_SCHEDULED, + ) + sors = list( + SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url) + ) assert len(sors) == 3 def test_get_save_requests_unknown_origin(api_client): - unknown_origin_url = 'https://gitlab.com/foo/bar' - url = reverse('api-1-save-origin', - url_args={'visit_type': 'git', - 'origin_url': unknown_origin_url}) + unknown_origin_url = "https://gitlab.com/foo/bar" + url = reverse( + "api-1-save-origin", + url_args={"visit_type": "git", "origin_url": unknown_origin_url}, + ) response = api_client.get(url) assert response.status_code == 404 assert response.data == { - 'exception': 'NotFoundExc', - 'reason': ('No save requests found for visit of type ' - 'git on origin with url %s.') % unknown_origin_url + "exception": "NotFoundExc", + "reason": ( + "No save requests found for visit of type " "git on origin with url %s." + ) + % unknown_origin_url, } diff --git a/swh/web/tests/api/views/test_ping.py b/swh/web/tests/api/views/test_ping.py --- a/swh/web/tests/api/views/test_ping.py +++ b/swh/web/tests/api/views/test_ping.py @@ -7,10 +7,10 @@ def test_api_1_ping(api_client): - url = reverse('api-1-ping') + url = reverse("api-1-ping") rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' - assert rv.data == 'pong' + assert rv["Content-Type"] == "application/json" + assert rv.data == "pong" diff --git a/swh/web/tests/api/views/test_release.py b/swh/web/tests/api/views/test_release.py --- a/swh/web/tests/api/views/test_release.py +++ b/swh/web/tests/api/views/test_release.py @@ -8,113 +8,121 @@ from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.model.model import ( - ObjectType, Person, Release, Timestamp, TimestampWithTimezone + ObjectType, + Person, + Release, + Timestamp, + TimestampWithTimezone, ) from swh.web.common.utils import reverse from swh.web.tests.data import random_sha1 -from swh.web.tests.strategies import ( - release, content, directory -) +from swh.web.tests.strategies import release, content, directory @given(release()) def test_api_release(api_client, archive_data, release): - url = reverse('api-1-release', url_args={'sha1_git': release}) + url = reverse("api-1-release", url_args={"sha1_git": release}) rv = api_client.get(url) expected_release = archive_data.release_get(release) - target_revision = expected_release['target'] - target_url = reverse('api-1-revision', - url_args={'sha1_git': target_revision}, - request=rv.wsgi_request) - expected_release['target_url'] = target_url + target_revision = expected_release["target"] + target_url = reverse( + "api-1-revision", + url_args={"sha1_git": target_revision}, + request=rv.wsgi_request, + ) + expected_release["target_url"] = target_url assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == expected_release @given(content(), directory(), release()) -def test_api_release_target_type_not_a_revision(api_client, archive_data, - content, directory, release): - for target_type, target in ((ObjectType.CONTENT, content), - (ObjectType.DIRECTORY, directory), - (ObjectType.RELEASE, release)): +def test_api_release_target_type_not_a_revision( + api_client, archive_data, content, directory, release +): + for target_type, target in ( + (ObjectType.CONTENT, content), + (ObjectType.DIRECTORY, directory), + (ObjectType.RELEASE, release), + ): if target_type == ObjectType.CONTENT: - target = target['sha1_git'] + target = target["sha1_git"] sample_release = Release( author=Person( - email=b'author@company.org', - fullname=b'author ', - name=b'author' + email=b"author@company.org", + fullname=b"author ", + name=b"author", ), date=TimestampWithTimezone( timestamp=Timestamp( - seconds=int(datetime.now().timestamp()), - microseconds=0), + seconds=int(datetime.now().timestamp()), microseconds=0 + ), offset=0, negative_utc=False, ), - message=b'sample release message', - name=b'sample release', + message=b"sample release message", + name=b"sample release", synthetic=False, target=hash_to_bytes(target), - target_type=target_type + target_type=target_type, ) archive_data.release_add([sample_release]) new_release_id = hash_to_hex(sample_release.id) - url = reverse('api-1-release', - url_args={'sha1_git': new_release_id}) + url = reverse("api-1-release", url_args={"sha1_git": new_release_id}) rv = api_client.get(url) expected_release = archive_data.release_get(new_release_id) if target_type == ObjectType.CONTENT: - url_args = {'q': 'sha1_git:%s' % target} + url_args = {"q": "sha1_git:%s" % target} else: - url_args = {'sha1_git': target} + url_args = {"sha1_git": target} - target_url = reverse('api-1-%s' % target_type.value, - url_args=url_args, - request=rv.wsgi_request) - expected_release['target_url'] = target_url + target_url = reverse( + "api-1-%s" % target_type.value, url_args=url_args, request=rv.wsgi_request + ) + expected_release["target_url"] = target_url assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == expected_release def test_api_release_not_found(api_client): unknown_release_ = random_sha1() - url = reverse('api-1-release', url_args={'sha1_git': unknown_release_}) + url = reverse("api-1-release", url_args={"sha1_git": unknown_release_}) rv = api_client.get(url) assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': 'Release with sha1_git %s not found.' % unknown_release_ + "exception": "NotFoundExc", + "reason": "Release with sha1_git %s not found." % unknown_release_, } @given(release()) def test_api_release_uppercase(api_client, release): - url = reverse('api-1-release-uppercase-checksum', - url_args={'sha1_git': release.upper()}) + url = reverse( + "api-1-release-uppercase-checksum", url_args={"sha1_git": release.upper()} + ) resp = api_client.get(url) assert resp.status_code == 302 - redirect_url = reverse('api-1-release-uppercase-checksum', - url_args={'sha1_git': release}) + redirect_url = reverse( + "api-1-release-uppercase-checksum", url_args={"sha1_git": release} + ) - assert resp['location'] == redirect_url + assert resp["location"] == redirect_url diff --git a/swh/web/tests/api/views/test_revision.py b/swh/web/tests/api/views/test_revision.py --- a/swh/web/tests/api/views/test_revision.py +++ b/swh/web/tests/api/views/test_revision.py @@ -14,7 +14,7 @@ @given(revision()) def test_api_revision(api_client, archive_data, revision): - url = reverse('api-1-revision', url_args={'sha1_git': revision}) + url = reverse("api-1-revision", url_args={"sha1_git": revision}) rv = api_client.get(url) expected_revision = archive_data.revision_get(revision) @@ -22,50 +22,49 @@ enrich_revision(expected_revision, rv.wsgi_request) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == expected_revision def test_api_revision_not_found(api_client): unknown_revision_ = random_sha1() - url = reverse('api-1-revision', - url_args={'sha1_git': unknown_revision_}) + url = reverse("api-1-revision", url_args={"sha1_git": unknown_revision_}) rv = api_client.get(url) assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': 'Revision with sha1_git %s not found.' % unknown_revision_ + "exception": "NotFoundExc", + "reason": "Revision with sha1_git %s not found." % unknown_revision_, } @given(revision()) def test_api_revision_raw_ok(api_client, archive_data, revision): - url = reverse('api-1-revision-raw-message', - url_args={'sha1_git': revision}) + url = reverse("api-1-revision-raw-message", url_args={"sha1_git": revision}) rv = api_client.get(url) - expected_message = archive_data.revision_get(revision)['message'] + expected_message = archive_data.revision_get(revision)["message"] assert rv.status_code == 200 - assert rv['Content-Type'] == 'application/octet-stream' + assert rv["Content-Type"] == "application/octet-stream" assert rv.content == expected_message.encode() def test_api_revision_raw_ko_no_rev(api_client): unknown_revision_ = random_sha1() - url = reverse('api-1-revision-raw-message', - url_args={'sha1_git': unknown_revision_}) + url = reverse( + "api-1-revision-raw-message", url_args={"sha1_git": unknown_revision_} + ) rv = api_client.get(url) assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': 'Revision with sha1_git %s not found.' % unknown_revision_ + "exception": "NotFoundExc", + "reason": "Revision with sha1_git %s not found." % unknown_revision_, } @@ -73,187 +72,193 @@ def test_api_revision_log(api_client, archive_data, revision): per_page = 10 - url = reverse('api-1-revision-log', url_args={'sha1_git': revision}, - query_params={'per_page': per_page}) + url = reverse( + "api-1-revision-log", + url_args={"sha1_git": revision}, + query_params={"per_page": per_page}, + ) rv = api_client.get(url) - expected_log = archive_data.revision_log(revision, limit=per_page+1) - expected_log = list(map(enrich_revision, expected_log, - [rv.wsgi_request] * len(expected_log))) + expected_log = archive_data.revision_log(revision, limit=per_page + 1) + expected_log = list( + map(enrich_revision, expected_log, [rv.wsgi_request] * len(expected_log)) + ) has_next = len(expected_log) > per_page assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == (expected_log[:-1] if has_next else expected_log) if has_next: - assert 'Link' in rv + assert "Link" in rv next_log_url = rv.wsgi_request.build_absolute_uri( - reverse('api-1-revision-log', - url_args={'sha1_git': expected_log[-1]['id']}, - query_params={'per_page': per_page})) - assert next_log_url in rv['Link'] + reverse( + "api-1-revision-log", + url_args={"sha1_git": expected_log[-1]["id"]}, + query_params={"per_page": per_page}, + ) + ) + assert next_log_url in rv["Link"] def test_api_revision_log_not_found(api_client): unknown_revision_ = random_sha1() - url = reverse('api-1-revision-log', - url_args={'sha1_git': unknown_revision_}) + url = reverse("api-1-revision-log", url_args={"sha1_git": unknown_revision_}) rv = api_client.get(url) assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': 'Revision with sha1_git %s not found.' % unknown_revision_ + "exception": "NotFoundExc", + "reason": "Revision with sha1_git %s not found." % unknown_revision_, } - assert not rv.has_header('Link') + assert not rv.has_header("Link") @given(revision()) def test_api_revision_log_context(api_client, archive_data, revision): revisions = archive_data.revision_log(revision, limit=4) - prev_rev = revisions[0]['id'] - rev = revisions[-1]['id'] + prev_rev = revisions[0]["id"] + rev = revisions[-1]["id"] per_page = 10 - url = reverse('api-1-revision-log', - url_args={'sha1_git': rev, - 'prev_sha1s': prev_rev}, - query_params={'per_page': per_page}) + url = reverse( + "api-1-revision-log", + url_args={"sha1_git": rev, "prev_sha1s": prev_rev}, + query_params={"per_page": per_page}, + ) rv = api_client.get(url) expected_log = archive_data.revision_log(rev, limit=per_page) prev_revision = archive_data.revision_get(prev_rev) expected_log.insert(0, prev_revision) - expected_log = list(map(enrich_revision, expected_log, - [rv.wsgi_request] * len(expected_log))) + expected_log = list( + map(enrich_revision, expected_log, [rv.wsgi_request] * len(expected_log)) + ) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == expected_log def test_api_revision_directory_ko_not_found(api_client, mocker): - mock_rev_dir = mocker.patch( - 'swh.web.api.views.revision._revision_directory_by') - mock_rev_dir.side_effect = NotFoundExc('Not found') + mock_rev_dir = mocker.patch("swh.web.api.views.revision._revision_directory_by") + mock_rev_dir.side_effect = NotFoundExc("Not found") - rv = api_client.get('/api/1/revision/999/directory/some/path/to/dir/') + rv = api_client.get("/api/1/revision/999/directory/some/path/to/dir/") assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' - assert rv.data == { - 'exception': 'NotFoundExc', - 'reason': 'Not found' - } + assert rv["Content-Type"] == "application/json" + assert rv.data == {"exception": "NotFoundExc", "reason": "Not found"} mock_rev_dir.assert_called_once_with( - {'sha1_git': '999'}, - 'some/path/to/dir', - '/api/1/revision/999/directory/some/path/to/dir/', - with_data=False + {"sha1_git": "999"}, + "some/path/to/dir", + "/api/1/revision/999/directory/some/path/to/dir/", + with_data=False, ) def test_api_revision_directory_ok_returns_dir_entries(api_client, mocker): - mock_rev_dir = mocker.patch( - 'swh.web.api.views.revision._revision_directory_by') + mock_rev_dir = mocker.patch("swh.web.api.views.revision._revision_directory_by") stub_dir = { - 'type': 'dir', - 'revision': '999', - 'content': [ + "type": "dir", + "revision": "999", + "content": [ { - 'sha1_git': '789', - 'type': 'file', - 'target': '101', - 'target_url': '/api/1/content/sha1_git:101/', - 'name': 'somefile', - 'file_url': '/api/1/revision/999/directory/some/path/' - 'somefile/' + "sha1_git": "789", + "type": "file", + "target": "101", + "target_url": "/api/1/content/sha1_git:101/", + "name": "somefile", + "file_url": "/api/1/revision/999/directory/some/path/" "somefile/", }, { - 'sha1_git': '123', - 'type': 'dir', - 'target': '456', - 'target_url': '/api/1/directory/456/', - 'name': 'to-subdir', - 'dir_url': '/api/1/revision/999/directory/some/path/' - 'to-subdir/', - } - ] + "sha1_git": "123", + "type": "dir", + "target": "456", + "target_url": "/api/1/directory/456/", + "name": "to-subdir", + "dir_url": "/api/1/revision/999/directory/some/path/" "to-subdir/", + }, + ], } mock_rev_dir.return_value = stub_dir - rv = api_client.get('/api/1/revision/999/directory/some/path/') + rv = api_client.get("/api/1/revision/999/directory/some/path/") - stub_dir['content'][0]['target_url'] = rv.wsgi_request.build_absolute_uri( - stub_dir['content'][0]['target_url']) - stub_dir['content'][0]['file_url'] = rv.wsgi_request.build_absolute_uri( - stub_dir['content'][0]['file_url']) - stub_dir['content'][1]['target_url'] = rv.wsgi_request.build_absolute_uri( - stub_dir['content'][1]['target_url']) - stub_dir['content'][1]['dir_url'] = rv.wsgi_request.build_absolute_uri( - stub_dir['content'][1]['dir_url']) + stub_dir["content"][0]["target_url"] = rv.wsgi_request.build_absolute_uri( + stub_dir["content"][0]["target_url"] + ) + stub_dir["content"][0]["file_url"] = rv.wsgi_request.build_absolute_uri( + stub_dir["content"][0]["file_url"] + ) + stub_dir["content"][1]["target_url"] = rv.wsgi_request.build_absolute_uri( + stub_dir["content"][1]["target_url"] + ) + stub_dir["content"][1]["dir_url"] = rv.wsgi_request.build_absolute_uri( + stub_dir["content"][1]["dir_url"] + ) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == stub_dir mock_rev_dir.assert_called_once_with( - {'sha1_git': '999'}, - 'some/path', - '/api/1/revision/999/directory/some/path/', - with_data=False + {"sha1_git": "999"}, + "some/path", + "/api/1/revision/999/directory/some/path/", + with_data=False, ) def test_api_revision_directory_ok_returns_content(api_client, mocker): - mock_rev_dir = mocker.patch( - 'swh.web.api.views.revision._revision_directory_by') + mock_rev_dir = mocker.patch("swh.web.api.views.revision._revision_directory_by") stub_content = { - 'type': 'file', - 'revision': '999', - 'content': { - 'sha1_git': '789', - 'sha1': '101', - 'data_url': '/api/1/content/101/raw/', - } + "type": "file", + "revision": "999", + "content": { + "sha1_git": "789", + "sha1": "101", + "data_url": "/api/1/content/101/raw/", + }, } mock_rev_dir.return_value = stub_content - url = '/api/1/revision/666/directory/some/other/path/' + url = "/api/1/revision/666/directory/some/other/path/" rv = api_client.get(url) - stub_content['content']['data_url'] = rv.wsgi_request.build_absolute_uri( - stub_content['content']['data_url']) + stub_content["content"]["data_url"] = rv.wsgi_request.build_absolute_uri( + stub_content["content"]["data_url"] + ) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == stub_content mock_rev_dir.assert_called_once_with( - {'sha1_git': '666'}, 'some/other/path', url, with_data=False) + {"sha1_git": "666"}, "some/other/path", url, with_data=False + ) @given(revision()) def test_api_revision_uppercase(api_client, revision): - url = reverse('api-1-revision-uppercase-checksum', - url_args={'sha1_git': revision.upper()}) + url = reverse( + "api-1-revision-uppercase-checksum", url_args={"sha1_git": revision.upper()} + ) resp = api_client.get(url) assert resp.status_code == 302 - redirect_url = reverse('api-1-revision', - url_args={'sha1_git': revision}) + redirect_url = reverse("api-1-revision", url_args={"sha1_git": revision}) - assert resp['location'] == redirect_url + assert resp["location"] == redirect_url diff --git a/swh/web/tests/api/views/test_snapshot.py b/swh/web/tests/api/views/test_snapshot.py --- a/swh/web/tests/api/views/test_snapshot.py +++ b/swh/web/tests/api/views/test_snapshot.py @@ -12,20 +12,17 @@ from swh.web.api.utils import enrich_snapshot from swh.web.common.utils import reverse from swh.web.tests.data import random_sha1 -from swh.web.tests.strategies import ( - snapshot, new_snapshot -) +from swh.web.tests.strategies import snapshot, new_snapshot @given(snapshot()) def test_api_snapshot(api_client, archive_data, snapshot): - url = reverse('api-1-snapshot', - url_args={'snapshot_id': snapshot}) + url = reverse("api-1-snapshot", url_args={"snapshot_id": snapshot}) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" expected_data = archive_data.snapshot_get(snapshot) expected_data = enrich_snapshot(expected_data, rv.wsgi_request) assert rv.data == expected_data @@ -39,56 +36,62 @@ snapshot_branches = [] - for k, v in sorted( - archive_data.snapshot_get(snapshot)['branches'].items()): - snapshot_branches.append({ - 'name': k, - 'target_type': v['target_type'], - 'target': v['target'] - }) + for k, v in sorted(archive_data.snapshot_get(snapshot)["branches"].items()): + snapshot_branches.append( + {"name": k, "target_type": v["target_type"], "target": v["target"]} + ) - whole_snapshot = {'id': snapshot, 'branches': {}, 'next_branch': None} + whole_snapshot = {"id": snapshot, "branches": {}, "next_branch": None} while branches_offset < len(snapshot_branches): - branches_from = snapshot_branches[branches_offset]['name'] - url = reverse('api-1-snapshot', - url_args={'snapshot_id': snapshot}, - query_params={'branches_from': branches_from, - 'branches_count': branches_count}) + branches_from = snapshot_branches[branches_offset]["name"] + url = reverse( + "api-1-snapshot", + url_args={"snapshot_id": snapshot}, + query_params={ + "branches_from": branches_from, + "branches_count": branches_count, + }, + ) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" expected_data = archive_data.snapshot_get_branches( - snapshot, branches_from, branches_count) + snapshot, branches_from, branches_count + ) expected_data = enrich_snapshot(expected_data, rv.wsgi_request) branches_offset += branches_count if branches_offset < len(snapshot_branches): - next_branch = snapshot_branches[branches_offset]['name'] - expected_data['next_branch'] = next_branch + next_branch = snapshot_branches[branches_offset]["name"] + expected_data["next_branch"] = next_branch else: - expected_data['next_branch'] = None + expected_data["next_branch"] = None assert rv.data == expected_data - whole_snapshot['branches'].update(expected_data['branches']) + whole_snapshot["branches"].update(expected_data["branches"]) if branches_offset < len(snapshot_branches): next_url = rv.wsgi_request.build_absolute_uri( - reverse('api-1-snapshot', - url_args={'snapshot_id': snapshot}, - query_params={'branches_from': next_branch, - 'branches_count': branches_count})) - assert rv['Link'] == '<%s>; rel="next"' % next_url + reverse( + "api-1-snapshot", + url_args={"snapshot_id": snapshot}, + query_params={ + "branches_from": next_branch, + "branches_count": branches_count, + }, + ) + ) + assert rv["Link"] == '<%s>; rel="next"' % next_url else: - assert not rv.has_header('Link') + assert not rv.has_header("Link") - url = reverse('api-1-snapshot', - url_args={'snapshot_id': snapshot}) + url = reverse("api-1-snapshot", url_args={"snapshot_id": snapshot}) rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == whole_snapshot @@ -97,67 +100,66 @@ snapshot_branches = [] - for k, v in sorted( - archive_data.snapshot_get(snapshot)['branches'].items()): - snapshot_branches.append({ - 'name': k, - 'target_type': v['target_type'], - 'target': v['target'] - }) + for k, v in sorted(archive_data.snapshot_get(snapshot)["branches"].items()): + snapshot_branches.append( + {"name": k, "target_type": v["target_type"], "target": v["target"]} + ) - target_type = random.choice(snapshot_branches)['target_type'] + target_type = random.choice(snapshot_branches)["target_type"] - url = reverse('api-1-snapshot', - url_args={'snapshot_id': snapshot}, - query_params={'target_types': target_type}) + url = reverse( + "api-1-snapshot", + url_args={"snapshot_id": snapshot}, + query_params={"target_types": target_type}, + ) rv = api_client.get(url) expected_data = archive_data.snapshot_get_branches( - snapshot, target_types=target_type) + snapshot, target_types=target_type + ) expected_data = enrich_snapshot(expected_data, rv.wsgi_request) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == expected_data def test_api_snapshot_errors(api_client): unknown_snapshot_ = random_sha1() - url = reverse('api-1-snapshot', - url_args={'snapshot_id': '63ce369'}) + url = reverse("api-1-snapshot", url_args={"snapshot_id": "63ce369"}) rv = api_client.get(url) assert rv.status_code == 400, rv.data - url = reverse('api-1-snapshot', - url_args={'snapshot_id': unknown_snapshot_}) + url = reverse("api-1-snapshot", url_args={"snapshot_id": unknown_snapshot_}) rv = api_client.get(url) assert rv.status_code == 404, rv.data @given(snapshot()) def test_api_snapshot_uppercase(api_client, snapshot): - url = reverse('api-1-snapshot-uppercase-checksum', - url_args={'snapshot_id': snapshot.upper()}) + url = reverse( + "api-1-snapshot-uppercase-checksum", url_args={"snapshot_id": snapshot.upper()} + ) resp = api_client.get(url) assert resp.status_code == 302 - redirect_url = reverse('api-1-snapshot-uppercase-checksum', - url_args={'snapshot_id': snapshot}) + redirect_url = reverse( + "api-1-snapshot-uppercase-checksum", url_args={"snapshot_id": snapshot} + ) - assert resp['location'] == redirect_url + assert resp["location"] == redirect_url @given(new_snapshot(min_size=4)) def test_api_snapshot_null_branch(api_client, archive_data, new_snapshot): snp_dict = new_snapshot.to_dict() - snp_id = hash_to_hex(snp_dict['id']) - for branch in snp_dict['branches'].keys(): - snp_dict['branches'][branch] = None + snp_id = hash_to_hex(snp_dict["id"]) + for branch in snp_dict["branches"].keys(): + snp_dict["branches"][branch] = None break archive_data.snapshot_add([Snapshot.from_dict(snp_dict)]) - url = reverse('api-1-snapshot', - url_args={'snapshot_id': snp_id}) + url = reverse("api-1-snapshot", url_args={"snapshot_id": snp_id}) rv = api_client.get(url) assert rv.status_code == 200, rv.data diff --git a/swh/web/tests/api/views/test_stat.py b/swh/web/tests/api/views/test_stat.py --- a/swh/web/tests/api/views/test_stat.py +++ b/swh/web/tests/api/views/test_stat.py @@ -10,61 +10,63 @@ def test_api_1_stat_counters_raise_error(api_client, mocker): - mock_service = mocker.patch('swh.web.api.views.stat.service') + mock_service = mocker.patch("swh.web.api.views.stat.service") mock_service.stat_counters.side_effect = BadInputExc( - 'voluntary error to check the bad request middleware.') + "voluntary error to check the bad request middleware." + ) - url = reverse('api-1-stat-counters') + url = reverse("api-1-stat-counters") rv = api_client.get(url) assert rv.status_code == 400, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'BadInputExc', - 'reason': 'voluntary error to check the bad request middleware.'} + "exception": "BadInputExc", + "reason": "voluntary error to check the bad request middleware.", + } def test_api_1_stat_counters_raise_from_db(api_client, mocker): - mock_service = mocker.patch('swh.web.api.views.stat.service') + mock_service = mocker.patch("swh.web.api.views.stat.service") mock_service.stat_counters.side_effect = StorageDBError( - 'Storage exploded! Will be back online shortly!') + "Storage exploded! Will be back online shortly!" + ) - url = reverse('api-1-stat-counters') + url = reverse("api-1-stat-counters") rv = api_client.get(url) assert rv.status_code == 503, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'StorageDBError', - 'reason': - 'An unexpected error occurred in the backend: ' - 'Storage exploded! Will be back online shortly!'} + "exception": "StorageDBError", + "reason": "An unexpected error occurred in the backend: " + "Storage exploded! Will be back online shortly!", + } def test_api_1_stat_counters_raise_from_api(api_client, mocker): - mock_service = mocker.patch('swh.web.api.views.stat.service') + mock_service = mocker.patch("swh.web.api.views.stat.service") mock_service.stat_counters.side_effect = StorageAPIError( - 'Storage API dropped dead! Will resurrect from its ashes asap!' + "Storage API dropped dead! Will resurrect from its ashes asap!" ) - url = reverse('api-1-stat-counters') + url = reverse("api-1-stat-counters") rv = api_client.get(url) assert rv.status_code == 503, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == { - 'exception': 'StorageAPIError', - 'reason': - 'An unexpected error occurred in the api backend: ' - 'Storage API dropped dead! Will resurrect from its ashes asap!' + "exception": "StorageAPIError", + "reason": "An unexpected error occurred in the api backend: " + "Storage API dropped dead! Will resurrect from its ashes asap!", } def test_api_1_stat_counters(api_client, archive_data): - url = reverse('api-1-stat-counters') + url = reverse("api-1-stat-counters") rv = api_client.get(url) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" assert rv.data == archive_data.stat_counters() diff --git a/swh/web/tests/api/views/test_vault.py b/swh/web/tests/api/views/test_vault.py --- a/swh/web/tests/api/views/test_vault.py +++ b/swh/web/tests/api/views/test_vault.py @@ -6,110 +6,125 @@ from swh.model import hashutil from swh.web.common.utils import reverse -TEST_OBJ_ID = 'd4905454cc154b492bd6afed48694ae3c579345e' +TEST_OBJ_ID = "d4905454cc154b492bd6afed48694ae3c579345e" -OBJECT_TYPES = ('directory', 'revision_gitfast') +OBJECT_TYPES = ("directory", "revision_gitfast") def test_api_vault_cook(api_client, mocker): - mock_service = mocker.patch('swh.web.api.views.vault.service') + mock_service = mocker.patch("swh.web.api.views.vault.service") for obj_type in OBJECT_TYPES: - fetch_url = reverse(f'api-1-vault-fetch-{obj_type}', - url_args={f'{obj_type[:3]}_id': TEST_OBJ_ID}) + fetch_url = reverse( + f"api-1-vault-fetch-{obj_type}", + url_args={f"{obj_type[:3]}_id": TEST_OBJ_ID}, + ) stub_cook = { - 'fetch_url': fetch_url, - 'obj_id': TEST_OBJ_ID, - 'obj_type': obj_type, - 'progress_message': None, - 'status': 'done', - 'task_uuid': 'de75c902-5ee5-4739-996e-448376a93eff', + "fetch_url": fetch_url, + "obj_id": TEST_OBJ_ID, + "obj_type": obj_type, + "progress_message": None, + "status": "done", + "task_uuid": "de75c902-5ee5-4739-996e-448376a93eff", } - stub_fetch = b'content' + stub_fetch = b"content" mock_service.vault_cook.return_value = stub_cook mock_service.vault_fetch.return_value = stub_fetch - url = reverse(f'api-1-vault-cook-{obj_type}', - url_args={f'{obj_type[:3]}_id': TEST_OBJ_ID}) + url = reverse( + f"api-1-vault-cook-{obj_type}", url_args={f"{obj_type[:3]}_id": TEST_OBJ_ID} + ) - rv = api_client.post(url, {'email': 'test@test.mail'}) + rv = api_client.post(url, {"email": "test@test.mail"}) assert rv.status_code == 200, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" - stub_cook['fetch_url'] = rv.wsgi_request.build_absolute_uri( - stub_cook['fetch_url']) + stub_cook["fetch_url"] = rv.wsgi_request.build_absolute_uri( + stub_cook["fetch_url"] + ) assert rv.data == stub_cook mock_service.vault_cook.assert_called_with( - obj_type, - hashutil.hash_to_bytes(TEST_OBJ_ID), - 'test@test.mail') + obj_type, hashutil.hash_to_bytes(TEST_OBJ_ID), "test@test.mail" + ) rv = api_client.get(fetch_url) assert rv.status_code == 200 - assert rv['Content-Type'] == 'application/gzip' + assert rv["Content-Type"] == "application/gzip" assert rv.content == stub_fetch mock_service.vault_fetch.assert_called_with( - obj_type, hashutil.hash_to_bytes(TEST_OBJ_ID)) + obj_type, hashutil.hash_to_bytes(TEST_OBJ_ID) + ) def test_api_vault_cook_uppercase_hash(api_client): for obj_type in OBJECT_TYPES: - url = reverse(f'api-1-vault-cook-{obj_type}-uppercase-checksum', - url_args={f'{obj_type[:3]}_id': TEST_OBJ_ID.upper()}) - rv = api_client.post(url, {'email': 'test@test.mail'}) + url = reverse( + f"api-1-vault-cook-{obj_type}-uppercase-checksum", + url_args={f"{obj_type[:3]}_id": TEST_OBJ_ID.upper()}, + ) + rv = api_client.post(url, {"email": "test@test.mail"}) assert rv.status_code == 302 - redirect_url = reverse(f'api-1-vault-cook-{obj_type}', - url_args={f'{obj_type[:3]}_id': TEST_OBJ_ID}) + redirect_url = reverse( + f"api-1-vault-cook-{obj_type}", url_args={f"{obj_type[:3]}_id": TEST_OBJ_ID} + ) - assert rv['location'] == redirect_url + assert rv["location"] == redirect_url fetch_url = reverse( - f'api-1-vault-fetch-{obj_type}-uppercase-checksum', - url_args={f'{obj_type[:3]}_id': TEST_OBJ_ID.upper()}) + f"api-1-vault-fetch-{obj_type}-uppercase-checksum", + url_args={f"{obj_type[:3]}_id": TEST_OBJ_ID.upper()}, + ) rv = api_client.get(fetch_url) assert rv.status_code == 302 - redirect_url = reverse(f'api-1-vault-fetch-{obj_type}', - url_args={f'{obj_type[:3]}_id': TEST_OBJ_ID}) + redirect_url = reverse( + f"api-1-vault-fetch-{obj_type}", + url_args={f"{obj_type[:3]}_id": TEST_OBJ_ID}, + ) - assert rv['location'] == redirect_url + assert rv["location"] == redirect_url def test_api_vault_cook_notfound(api_client, mocker): - mock_service = mocker.patch('swh.web.api.views.vault.service') + mock_service = mocker.patch("swh.web.api.views.vault.service") mock_service.vault_cook.return_value = None mock_service.vault_fetch.return_value = None for obj_type in OBJECT_TYPES: - url = reverse(f'api-1-vault-cook-{obj_type}', - url_args={f'{obj_type[:3]}_id': TEST_OBJ_ID}) + url = reverse( + f"api-1-vault-cook-{obj_type}", url_args={f"{obj_type[:3]}_id": TEST_OBJ_ID} + ) rv = api_client.post(url) assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' + assert rv["Content-Type"] == "application/json" - assert rv.data['exception'] == 'NotFoundExc' + assert rv.data["exception"] == "NotFoundExc" mock_service.vault_cook.assert_called_with( - obj_type, hashutil.hash_to_bytes(TEST_OBJ_ID), None) + obj_type, hashutil.hash_to_bytes(TEST_OBJ_ID), None + ) - fetch_url = reverse(f'api-1-vault-fetch-{obj_type}', - url_args={f'{obj_type[:3]}_id': TEST_OBJ_ID}) + fetch_url = reverse( + f"api-1-vault-fetch-{obj_type}", + url_args={f"{obj_type[:3]}_id": TEST_OBJ_ID}, + ) rv = api_client.get(fetch_url) assert rv.status_code == 404, rv.data - assert rv['Content-Type'] == 'application/json' - assert rv.data['exception'] == 'NotFoundExc' + assert rv["Content-Type"] == "application/json" + assert rv.data["exception"] == "NotFoundExc" mock_service.vault_fetch.assert_called_with( - obj_type, hashutil.hash_to_bytes(TEST_OBJ_ID)) + obj_type, hashutil.hash_to_bytes(TEST_OBJ_ID) + ) diff --git a/swh/web/tests/auth/keycloak_mock.py b/swh/web/tests/auth/keycloak_mock.py --- a/swh/web/tests/auth/keycloak_mock.py +++ b/swh/web/tests/auth/keycloak_mock.py @@ -16,33 +16,46 @@ class KeycloackOpenIDConnectMock(KeycloakOpenIDConnect): - def __init__(self, auth_success=True, exp=None): swhweb_config = get_config() - super().__init__(swhweb_config['keycloak']['server_url'], - swhweb_config['keycloak']['realm_name'], - OIDC_SWH_WEB_CLIENT_ID) + super().__init__( + swhweb_config["keycloak"]["server_url"], + swhweb_config["keycloak"]["realm_name"], + OIDC_SWH_WEB_CLIENT_ID, + ) self.auth_success = auth_success self.exp = exp self._keycloak.public_key = lambda: realm_public_key self._keycloak.well_know = lambda: { - 'issuer': f'{self.server_url}realms/{self.realm_name}', - 'authorization_endpoint': (f'{self.server_url}realms/' - f'{self.realm_name}/protocol/' - 'openid-connect/auth'), - 'token_endpoint': (f'{self.server_url}realms/{self.realm_name}/' - 'protocol/openid-connect/token'), - 'token_introspection_endpoint': (f'{self.server_url}realms/' - f'{self.realm_name}/protocol/' - 'openid-connect/token/' - 'introspect'), - 'userinfo_endpoint': (f'{self.server_url}realms/{self.realm_name}/' - 'protocol/openid-connect/userinfo'), - 'end_session_endpoint': (f'{self.server_url}realms/' - f'{self.realm_name}/protocol/' - 'openid-connect/logout'), - 'jwks_uri': (f'{self.server_url}realms/{self.realm_name}/' - 'protocol/openid-connect/certs'), + "issuer": f"{self.server_url}realms/{self.realm_name}", + "authorization_endpoint": ( + f"{self.server_url}realms/" + f"{self.realm_name}/protocol/" + "openid-connect/auth" + ), + "token_endpoint": ( + f"{self.server_url}realms/{self.realm_name}/" + "protocol/openid-connect/token" + ), + "token_introspection_endpoint": ( + f"{self.server_url}realms/" + f"{self.realm_name}/protocol/" + "openid-connect/token/" + "introspect" + ), + "userinfo_endpoint": ( + f"{self.server_url}realms/{self.realm_name}/" + "protocol/openid-connect/userinfo" + ), + "end_session_endpoint": ( + f"{self.server_url}realms/" + f"{self.realm_name}/protocol/" + "openid-connect/logout" + ), + "jwks_uri": ( + f"{self.server_url}realms/{self.realm_name}/" + "protocol/openid-connect/certs" + ), } self.authorization_code = Mock() self.userinfo = Mock() @@ -52,7 +65,7 @@ self.userinfo.return_value = copy(userinfo) else: self.authorization_url = Mock() - exception = Exception('Authentication failed') + exception = Exception("Authentication failed") self.authorization_code.side_effect = exception self.authorization_url.side_effect = exception self.userinfo.side_effect = exception @@ -63,24 +76,23 @@ if self.auth_success: # skip signature expiration check as we use a static oidc_profile # for the tests with expired tokens in it - options['verify_exp'] = False + options["verify_exp"] = False decoded = super().decode_token(token, options) # tweak auth and exp time for tests - expire_in = decoded['exp'] - decoded['auth_time'] + expire_in = decoded["exp"] - decoded["auth_time"] if self.exp is not None: - decoded['exp'] = self.exp - decoded['auth_time'] = self.exp - expire_in + decoded["exp"] = self.exp + decoded["auth_time"] = self.exp - expire_in else: - decoded['auth_time'] = int(timezone.now().timestamp()) - decoded['exp'] = decoded['auth_time'] + expire_in - decoded['groups'] = ['/staff'] + decoded["auth_time"] = int(timezone.now().timestamp()) + decoded["exp"] = decoded["auth_time"] + expire_in + decoded["groups"] = ["/staff"] return decoded def mock_keycloak(mocker, auth_success=True, exp=None): kc_oidc_mock = KeycloackOpenIDConnectMock(auth_success, exp) - mock_get_oidc_client = mocker.patch( - 'swh.web.auth.views.get_oidc_client') + mock_get_oidc_client = mocker.patch("swh.web.auth.views.get_oidc_client") mock_get_oidc_client.return_value = kc_oidc_mock - mocker.patch('swh.web.auth.backends._oidc_client', kc_oidc_mock) + mocker.patch("swh.web.auth.backends._oidc_client", kc_oidc_mock) return kc_oidc_mock diff --git a/swh/web/tests/auth/sample_data.py b/swh/web/tests/auth/sample_data.py --- a/swh/web/tests/auth/sample_data.py +++ b/swh/web/tests/auth/sample_data.py @@ -5,91 +5,97 @@ realm_public_key = ( - 'MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAnqF4xvGjaI54P6WtJvyGayxP8A93u' - 'NcA3TH6jitwmyAalj8dN8/NzK9vrdlSA3Ibvp/XQujPSOP7a35YiYFscEJnogTXQpE/FhZrUY' - 'y21U6ezruVUv4z/ER1cYLb+q5ZI86nXSTNCAbH+lw7rQjlvcJ9KvgHEeA5ALXJ1r55zUmNvuy' - '5o6ke1G3fXbNSXwF4qlWAzo1o7Ms8qNrNyOG8FPx24dvm9xMH7/08IPvh9KUqlnP8h6olpxHr' - 'drX/q4E+Nzj8Tr8p7Z5CimInls40QuOTIhs6C2SwFHUgQgXl9hB9umiZJlwYEpDv0/LO2zYie' - 'Hl5Lv7Iig4FOIXIVCaDGQIDAQAB' + "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAnqF4xvGjaI54P6WtJvyGayxP8A93u" + "NcA3TH6jitwmyAalj8dN8/NzK9vrdlSA3Ibvp/XQujPSOP7a35YiYFscEJnogTXQpE/FhZrUY" + "y21U6ezruVUv4z/ER1cYLb+q5ZI86nXSTNCAbH+lw7rQjlvcJ9KvgHEeA5ALXJ1r55zUmNvuy" + "5o6ke1G3fXbNSXwF4qlWAzo1o7Ms8qNrNyOG8FPx24dvm9xMH7/08IPvh9KUqlnP8h6olpxHr" + "drX/q4E+Nzj8Tr8p7Z5CimInls40QuOTIhs6C2SwFHUgQgXl9hB9umiZJlwYEpDv0/LO2zYie" + "Hl5Lv7Iig4FOIXIVCaDGQIDAQAB" ) oidc_profile = { - 'access_token': ('eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJPSnhV' - 'Q0p0TmJQT0NOUGFNNmc3ZU1zY2pqTXhoem9vNGxZaFhsa1c2TWhBIn0.' - 'eyJqdGkiOiIzMWZjNTBiNy1iYmU1LTRmNTEtOTFlZi04ZTNlZWM1MTMz' - 'MWUiLCJleHAiOjE1ODI3MjM3MDEsIm5iZiI6MCwiaWF0IjoxNTgyNzIz' - 'MTAxLCJpc3MiOiJodHRwOi8vbG9jYWxob3N0OjgwODAvYXV0aC9yZWFs' - 'bXMvU29mdHdhcmVIZXJpdGFnZSIsImF1ZCI6WyJzd2gtd2ViIiwiYWNj' - 'b3VudCJdLCJzdWIiOiJmZWFjZDM0NC1iNDY4LTRhNjUtYTIzNi0xNGY2' - 'MWU2YjcyMDAiLCJ0eXAiOiJCZWFyZXIiLCJhenAiOiJzd2gtd2ViIiwi' - 'YXV0aF90aW1lIjoxNTgyNzIzMTAwLCJzZXNzaW9uX3N0YXRlIjoiZDgy' - 'YjkwZDEtMGE5NC00ZTc0LWFkNjYtZGQ5NTM0MWM3YjZkIiwiYWNyIjoi' - 'MSIsImFsbG93ZWQtb3JpZ2lucyI6WyIqIl0sInJlYWxtX2FjY2VzcyI6' - 'eyJyb2xlcyI6WyJvZmZsaW5lX2FjY2VzcyIsInVtYV9hdXRob3JpemF0' - 'aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xl' - 'cyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtz' - 'Iiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJvcGVuaWQgZW1haWwg' - 'cHJvZmlsZSIsImVtYWlsX3ZlcmlmaWVkIjpmYWxzZSwibmFtZSI6Ikpv' - 'aG4gRG9lIiwiZ3JvdXBzIjpbXSwicHJlZmVycmVkX3VzZXJuYW1lIjoi' - 'am9obmRvZSIsImdpdmVuX25hbWUiOiJKb2huIiwiZmFtaWx5X25hbWUi' - 'OiJEb2UiLCJlbWFpbCI6ImpvaG4uZG9lQGV4YW1wbGUuY29tIn0.neJ-' - 'Pmd87J6Gt0fzDqmXFeoy34Iqb5vNNEEgIKqtqg3moaVkbXrO_9R37DJB' - 'AgdFv0owVONK3GbqPOEICePgG6RFtri999DetNE-O5sB4fwmHPWcHPlO' - 'kcPLbVJqu6zWo-2AzlfAy5bCNvj_wzs2tjFjLeHcRgR1a1WY3uTp5EWc' - 'HITCWQZzZWFGZTZCTlGkpdyJTqxGBdSHRB4NlIVGpYSTBsBsxttFEetl' - 'rpcNd4-5AteFprIr9hn9VasIIF8WdFdtC2e8xGMJW5Q0M3G3Iu-LLNmE' - 'oTIDqtbJ7OrIcGBIwsc3seCV3eCG6kOYwz5w-f8DeOpwcDX58yYPmapJ' - '6A'), - 'expires_in': 600, - 'id_token': ('eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJPSnhVQ0p0' - 'TmJQT0NOUGFNNmc3ZU1zY2pqTXhoem9vNGxZaFhsa1c2TWhBIn0.eyJqdGki' - 'OiI0NDRlYzU1My1iYzhiLTQ2YjYtOTlmYS0zOTc3YTJhZDY1ZmEiLCJleHAi' - 'OjE1ODI3MjM3MDEsIm5iZiI6MCwiaWF0IjoxNTgyNzIzMTAxLCJpc3MiOiJo' - 'dHRwOi8vbG9jYWxob3N0OjgwODAvYXV0aC9yZWFsbXMvU29mdHdhcmVIZXJp' - 'dGFnZSIsImF1ZCI6InN3aC13ZWIiLCJzdWIiOiJmZWFjZDM0NC1iNDY4LTRh' - 'NjUtYTIzNi0xNGY2MWU2YjcyMDAiLCJ0eXAiOiJJRCIsImF6cCI6InN3aC13' - 'ZWIiLCJhdXRoX3RpbWUiOjE1ODI3MjMxMDAsInNlc3Npb25fc3RhdGUiOiJk' - 'ODJiOTBkMS0wYTk0LTRlNzQtYWQ2Ni1kZDk1MzQxYzdiNmQiLCJhY3IiOiIx' - 'IiwiZW1haWxfdmVyaWZpZWQiOmZhbHNlLCJuYW1lIjoiSm9obiBEb2UiLCJn' - 'cm91cHMiOltdLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJqb2huZG9lIiwiZ2l2' - 'ZW5fbmFtZSI6IkpvaG4iLCJmYW1pbHlfbmFtZSI6IkRvZSIsImVtYWlsIjoi' - 'am9obi5kb2VAZXhhbXBsZS5jb20ifQ.YB7bxlz_wgLJSkylVjmqedxQgEMee' - 'JOdi9CFHXV4F3ZWsEZ52CGuJXsozkX2oXvgU06MzzLNEK8ojgrPSNzjRkutL' - 'aaLq_YUzv4iV8fmKUS_aEyiYZbfoBe3Y4dwv2FoPEPCt96iTwpzM5fg_oYw_' - 'PHCq-Yl5SulT1nTrJZpntkf0hRjmxlDO06JMp0aZ8xS8RYJqH48xCRf_DARE' - '0jJV2-UuzOWI6xBATwFfP44kV6wFmErLN5txMgwZzCSB2OCe5Cl1il0eTQTN' - 'ybeSYZeZE61QtuTRUHeP1D1qSbJGy5g_S67SdTkS-hQFvfrrD84qGflIEqnX' - 'ZbYnitD1Typ6Q'), - 'not-before-policy': 0, - 'refresh_expires_in': 1800, - 'refresh_token': ('eyJhbGciOiJIUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJmNjM' - 'zMDE5MS01YTU4LTQxMDAtOGIzYS00ZDdlM2U1NjA3MTgifQ.eyJqdGk' - 'iOiIxYWI5ZWZmMS0xZWZlLTQ3MDMtOGQ2YS03Nzg1NWUwYzQyYTYiLC' - 'JleHAiOjE1ODI3MjQ5MDEsIm5iZiI6MCwiaWF0IjoxNTgyNzIzMTAxL' - 'CJpc3MiOiJodHRwOi8vbG9jYWxob3N0OjgwODAvYXV0aC9yZWFsbXMv' - 'U29mdHdhcmVIZXJpdGFnZSIsImF1ZCI6Imh0dHA6Ly9sb2NhbGhvc3Q' - '6ODA4MC9hdXRoL3JlYWxtcy9Tb2Z0d2FyZUhlcml0YWdlIiwic3ViIj' - 'oiZmVhY2QzNDQtYjQ2OC00YTY1LWEyMzYtMTRmNjFlNmI3MjAwIiwid' - 'HlwIjoiUmVmcmVzaCIsImF6cCI6InN3aC13ZWIiLCJhdXRoX3RpbWUi' - 'OjAsInNlc3Npb25fc3RhdGUiOiJkODJiOTBkMS0wYTk0LTRlNzQtYWQ' - '2Ni1kZDk1MzQxYzdiNmQiLCJyZWFsbV9hY2Nlc3MiOnsicm9sZXMiOl' - 'sib2ZmbGluZV9hY2Nlc3MiLCJ1bWFfYXV0aG9yaXphdGlvbiJdfSwic' - 'mVzb3VyY2VfYWNjZXNzIjp7ImFjY291bnQiOnsicm9sZXMiOlsibWFu' - 'YWdlLWFjY291bnQiLCJtYW5hZ2UtYWNjb3VudC1saW5rcyIsInZpZXc' - 'tcHJvZmlsZSJdfX0sInNjb3BlIjoib3BlbmlkIGVtYWlsIHByb2ZpbG' - 'UifQ.xQYrl2CMP_GQ_TFqhsTz-rTs3WuZz5I37toi1eSsDMI'), - 'scope': 'openid email profile', - 'session_state': 'd82b90d1-0a94-4e74-ad66-dd95341c7b6d', - 'token_type': 'bearer' + "access_token": ( + "eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJPSnhV" + "Q0p0TmJQT0NOUGFNNmc3ZU1zY2pqTXhoem9vNGxZaFhsa1c2TWhBIn0." + "eyJqdGkiOiIzMWZjNTBiNy1iYmU1LTRmNTEtOTFlZi04ZTNlZWM1MTMz" + "MWUiLCJleHAiOjE1ODI3MjM3MDEsIm5iZiI6MCwiaWF0IjoxNTgyNzIz" + "MTAxLCJpc3MiOiJodHRwOi8vbG9jYWxob3N0OjgwODAvYXV0aC9yZWFs" + "bXMvU29mdHdhcmVIZXJpdGFnZSIsImF1ZCI6WyJzd2gtd2ViIiwiYWNj" + "b3VudCJdLCJzdWIiOiJmZWFjZDM0NC1iNDY4LTRhNjUtYTIzNi0xNGY2" + "MWU2YjcyMDAiLCJ0eXAiOiJCZWFyZXIiLCJhenAiOiJzd2gtd2ViIiwi" + "YXV0aF90aW1lIjoxNTgyNzIzMTAwLCJzZXNzaW9uX3N0YXRlIjoiZDgy" + "YjkwZDEtMGE5NC00ZTc0LWFkNjYtZGQ5NTM0MWM3YjZkIiwiYWNyIjoi" + "MSIsImFsbG93ZWQtb3JpZ2lucyI6WyIqIl0sInJlYWxtX2FjY2VzcyI6" + "eyJyb2xlcyI6WyJvZmZsaW5lX2FjY2VzcyIsInVtYV9hdXRob3JpemF0" + "aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xl" + "cyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtz" + "Iiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJvcGVuaWQgZW1haWwg" + "cHJvZmlsZSIsImVtYWlsX3ZlcmlmaWVkIjpmYWxzZSwibmFtZSI6Ikpv" + "aG4gRG9lIiwiZ3JvdXBzIjpbXSwicHJlZmVycmVkX3VzZXJuYW1lIjoi" + "am9obmRvZSIsImdpdmVuX25hbWUiOiJKb2huIiwiZmFtaWx5X25hbWUi" + "OiJEb2UiLCJlbWFpbCI6ImpvaG4uZG9lQGV4YW1wbGUuY29tIn0.neJ-" + "Pmd87J6Gt0fzDqmXFeoy34Iqb5vNNEEgIKqtqg3moaVkbXrO_9R37DJB" + "AgdFv0owVONK3GbqPOEICePgG6RFtri999DetNE-O5sB4fwmHPWcHPlO" + "kcPLbVJqu6zWo-2AzlfAy5bCNvj_wzs2tjFjLeHcRgR1a1WY3uTp5EWc" + "HITCWQZzZWFGZTZCTlGkpdyJTqxGBdSHRB4NlIVGpYSTBsBsxttFEetl" + "rpcNd4-5AteFprIr9hn9VasIIF8WdFdtC2e8xGMJW5Q0M3G3Iu-LLNmE" + "oTIDqtbJ7OrIcGBIwsc3seCV3eCG6kOYwz5w-f8DeOpwcDX58yYPmapJ" + "6A" + ), + "expires_in": 600, + "id_token": ( + "eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJPSnhVQ0p0" + "TmJQT0NOUGFNNmc3ZU1zY2pqTXhoem9vNGxZaFhsa1c2TWhBIn0.eyJqdGki" + "OiI0NDRlYzU1My1iYzhiLTQ2YjYtOTlmYS0zOTc3YTJhZDY1ZmEiLCJleHAi" + "OjE1ODI3MjM3MDEsIm5iZiI6MCwiaWF0IjoxNTgyNzIzMTAxLCJpc3MiOiJo" + "dHRwOi8vbG9jYWxob3N0OjgwODAvYXV0aC9yZWFsbXMvU29mdHdhcmVIZXJp" + "dGFnZSIsImF1ZCI6InN3aC13ZWIiLCJzdWIiOiJmZWFjZDM0NC1iNDY4LTRh" + "NjUtYTIzNi0xNGY2MWU2YjcyMDAiLCJ0eXAiOiJJRCIsImF6cCI6InN3aC13" + "ZWIiLCJhdXRoX3RpbWUiOjE1ODI3MjMxMDAsInNlc3Npb25fc3RhdGUiOiJk" + "ODJiOTBkMS0wYTk0LTRlNzQtYWQ2Ni1kZDk1MzQxYzdiNmQiLCJhY3IiOiIx" + "IiwiZW1haWxfdmVyaWZpZWQiOmZhbHNlLCJuYW1lIjoiSm9obiBEb2UiLCJn" + "cm91cHMiOltdLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJqb2huZG9lIiwiZ2l2" + "ZW5fbmFtZSI6IkpvaG4iLCJmYW1pbHlfbmFtZSI6IkRvZSIsImVtYWlsIjoi" + "am9obi5kb2VAZXhhbXBsZS5jb20ifQ.YB7bxlz_wgLJSkylVjmqedxQgEMee" + "JOdi9CFHXV4F3ZWsEZ52CGuJXsozkX2oXvgU06MzzLNEK8ojgrPSNzjRkutL" + "aaLq_YUzv4iV8fmKUS_aEyiYZbfoBe3Y4dwv2FoPEPCt96iTwpzM5fg_oYw_" + "PHCq-Yl5SulT1nTrJZpntkf0hRjmxlDO06JMp0aZ8xS8RYJqH48xCRf_DARE" + "0jJV2-UuzOWI6xBATwFfP44kV6wFmErLN5txMgwZzCSB2OCe5Cl1il0eTQTN" + "ybeSYZeZE61QtuTRUHeP1D1qSbJGy5g_S67SdTkS-hQFvfrrD84qGflIEqnX" + "ZbYnitD1Typ6Q" + ), + "not-before-policy": 0, + "refresh_expires_in": 1800, + "refresh_token": ( + "eyJhbGciOiJIUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJmNjM" + "zMDE5MS01YTU4LTQxMDAtOGIzYS00ZDdlM2U1NjA3MTgifQ.eyJqdGk" + "iOiIxYWI5ZWZmMS0xZWZlLTQ3MDMtOGQ2YS03Nzg1NWUwYzQyYTYiLC" + "JleHAiOjE1ODI3MjQ5MDEsIm5iZiI6MCwiaWF0IjoxNTgyNzIzMTAxL" + "CJpc3MiOiJodHRwOi8vbG9jYWxob3N0OjgwODAvYXV0aC9yZWFsbXMv" + "U29mdHdhcmVIZXJpdGFnZSIsImF1ZCI6Imh0dHA6Ly9sb2NhbGhvc3Q" + "6ODA4MC9hdXRoL3JlYWxtcy9Tb2Z0d2FyZUhlcml0YWdlIiwic3ViIj" + "oiZmVhY2QzNDQtYjQ2OC00YTY1LWEyMzYtMTRmNjFlNmI3MjAwIiwid" + "HlwIjoiUmVmcmVzaCIsImF6cCI6InN3aC13ZWIiLCJhdXRoX3RpbWUi" + "OjAsInNlc3Npb25fc3RhdGUiOiJkODJiOTBkMS0wYTk0LTRlNzQtYWQ" + "2Ni1kZDk1MzQxYzdiNmQiLCJyZWFsbV9hY2Nlc3MiOnsicm9sZXMiOl" + "sib2ZmbGluZV9hY2Nlc3MiLCJ1bWFfYXV0aG9yaXphdGlvbiJdfSwic" + "mVzb3VyY2VfYWNjZXNzIjp7ImFjY291bnQiOnsicm9sZXMiOlsibWFu" + "YWdlLWFjY291bnQiLCJtYW5hZ2UtYWNjb3VudC1saW5rcyIsInZpZXc" + "tcHJvZmlsZSJdfX0sInNjb3BlIjoib3BlbmlkIGVtYWlsIHByb2ZpbG" + "UifQ.xQYrl2CMP_GQ_TFqhsTz-rTs3WuZz5I37toi1eSsDMI" + ), + "scope": "openid email profile", + "session_state": "d82b90d1-0a94-4e74-ad66-dd95341c7b6d", + "token_type": "bearer", } userinfo = { - 'email': 'john.doe@example.com', - 'email_verified': False, - 'family_name': 'Doe', - 'given_name': 'John', - 'groups': ['/staff'], - 'name': 'John Doe', - 'preferred_username': 'johndoe', - 'sub': 'feacd344-b468-4a65-a236-14f61e6b7200' + "email": "john.doe@example.com", + "email_verified": False, + "family_name": "Doe", + "given_name": "John", + "groups": ["/staff"], + "name": "John Doe", + "preferred_username": "johndoe", + "sub": "feacd344-b468-4a65-a236-14f61e6b7200", } diff --git a/swh/web/tests/auth/test_api_auth.py b/swh/web/tests/auth/test_api_auth.py --- a/swh/web/tests/auth/test_api_auth.py +++ b/swh/web/tests/auth/test_api_auth.py @@ -20,10 +20,10 @@ Check user gets authenticated when querying the web api through a web browser. """ - url = reverse('api-1-stat-counters') + url = reverse("api-1-stat-counters") mock_keycloak(mocker) - client.login(code='', code_verifier='', redirect_uri='') + client.login(code="", code_verifier="", redirect_uri="") response = client.get(url) request = response.wsgi_request @@ -44,13 +44,12 @@ Check user gets authenticated when querying the web api through an HTTP client using bearer token authentication. """ - url = reverse('api-1-stat-counters') + url = reverse("api-1-stat-counters") - access_token = sample_data.oidc_profile['access_token'] + access_token = sample_data.oidc_profile["access_token"] mock_keycloak(mocker) - api_client.credentials( - HTTP_AUTHORIZATION=f"Bearer {access_token}") + api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {access_token}") response = api_client.get(url) request = response.wsgi_request @@ -67,14 +66,13 @@ @pytest.mark.django_db def test_drf_oidc_bearer_token_auth_failure(mocker, api_client): - url = reverse('api-1-stat-counters') + url = reverse("api-1-stat-counters") - access_token = sample_data.oidc_profile['access_token'] + access_token = sample_data.oidc_profile["access_token"] # check for failed authentication but with expected token format mock_keycloak(mocker, auth_success=False) - api_client.credentials( - HTTP_AUTHORIZATION=f"Bearer {access_token}") + api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {access_token}") response = api_client.get(url) request = response.wsgi_request @@ -84,8 +82,7 @@ # check for failed authentication when token format is invalid mock_keycloak(mocker) - api_client.credentials( - HTTP_AUTHORIZATION=f"Bearer invalid-token-format") + api_client.credentials(HTTP_AUTHORIZATION=f"Bearer invalid-token-format") response = api_client.get(url) request = response.wsgi_request @@ -95,13 +92,12 @@ def test_drf_oidc_auth_invalid_or_missing_authorization_type(api_client): - url = reverse('api-1-stat-counters') + url = reverse("api-1-stat-counters") - access_token = sample_data.oidc_profile['access_token'] + access_token = sample_data.oidc_profile["access_token"] # missing authorization type - api_client.credentials( - HTTP_AUTHORIZATION=f"{access_token}") + api_client.credentials(HTTP_AUTHORIZATION=f"{access_token}") response = api_client.get(url) request = response.wsgi_request @@ -110,8 +106,7 @@ assert isinstance(request.user, AnonymousUser) # invalid authorization type - api_client.credentials( - HTTP_AUTHORIZATION=f"Foo token") + api_client.credentials(HTTP_AUTHORIZATION=f"Foo token") response = api_client.get(url) request = response.wsgi_request diff --git a/swh/web/tests/auth/test_backends.py b/swh/web/tests/auth/test_backends.py --- a/swh/web/tests/auth/test_backends.py +++ b/swh/web/tests/auth/test_backends.py @@ -22,25 +22,27 @@ def _authenticate_user(request_factory): - request = request_factory.get(reverse('oidc-login-complete')) + request = request_factory.get(reverse("oidc-login-complete")) - return authenticate(request=request, - code='some-code', - code_verifier='some-code-verifier', - redirect_uri='https://localhost:5004') + return authenticate( + request=request, + code="some-code", + code_verifier="some-code-verifier", + redirect_uri="https://localhost:5004", + ) def _check_authenticated_user(user, decoded_token): assert user is not None assert isinstance(user, OIDCUser) assert user.id != 0 - assert user.username == decoded_token['preferred_username'] - assert user.password == '' - assert user.first_name == decoded_token['given_name'] - assert user.last_name == decoded_token['family_name'] - assert user.email == decoded_token['email'] - assert user.is_staff == ('/staff' in decoded_token['groups']) - assert user.sub == decoded_token['sub'] + assert user.username == decoded_token["preferred_username"] + assert user.password == "" + assert user.first_name == decoded_token["given_name"] + assert user.last_name == decoded_token["family_name"] + assert user.email == decoded_token["email"] + assert user.is_staff == ("/staff" in decoded_token["groups"]) + assert user.sub == decoded_token["sub"] @pytest.mark.django_db @@ -52,20 +54,21 @@ decoded_token = kc_oidc_mock.decode_token(user.access_token) _check_authenticated_user(user, decoded_token) - auth_datetime = datetime.fromtimestamp(decoded_token['auth_time']) - exp_datetime = datetime.fromtimestamp(decoded_token['exp']) - refresh_exp_datetime = ( - auth_datetime + timedelta(seconds=oidc_profile['refresh_expires_in'])) + auth_datetime = datetime.fromtimestamp(decoded_token["auth_time"]) + exp_datetime = datetime.fromtimestamp(decoded_token["exp"]) + refresh_exp_datetime = auth_datetime + timedelta( + seconds=oidc_profile["refresh_expires_in"] + ) - assert user.access_token == oidc_profile['access_token'] + assert user.access_token == oidc_profile["access_token"] assert user.expires_at == exp_datetime - assert user.id_token == oidc_profile['id_token'] - assert user.refresh_token == oidc_profile['refresh_token'] + assert user.id_token == oidc_profile["id_token"] + assert user.refresh_token == oidc_profile["refresh_token"] assert user.refresh_expires_at == refresh_exp_datetime - assert user.scope == oidc_profile['scope'] - assert user.session_state == oidc_profile['session_state'] + assert user.scope == oidc_profile["scope"] + assert user.session_state == oidc_profile["session_state"] - backend_path = 'swh.web.auth.backends.OIDCAuthorizationCodePKCEBackend' + backend_path = "swh.web.auth.backends.OIDCAuthorizationCodePKCEBackend" assert user.backend == backend_path backend_idx = settings.AUTHENTICATION_BACKENDS.index(backend_path) assert get_backends()[backend_idx].get_user(user.id) == user @@ -81,39 +84,35 @@ @pytest.mark.django_db -def test_drf_oidc_bearer_token_auth_backend_success(mocker, - api_request_factory): - url = reverse('api-1-stat-counters') +def test_drf_oidc_bearer_token_auth_backend_success(mocker, api_request_factory): + url = reverse("api-1-stat-counters") drf_auth_backend = OIDCBearerTokenAuthentication() kc_oidc_mock = mock_keycloak(mocker) - access_token = sample_data.oidc_profile['access_token'] + access_token = sample_data.oidc_profile["access_token"] decoded_token = kc_oidc_mock.decode_token(access_token) - request = api_request_factory.get( - url, HTTP_AUTHORIZATION=f"Bearer {access_token}") + request = api_request_factory.get(url, HTTP_AUTHORIZATION=f"Bearer {access_token}") user, _ = drf_auth_backend.authenticate(request) _check_authenticated_user(user, decoded_token) # oidc_profile is not filled when authenticating through bearer token - assert hasattr(user, 'access_token') and user.access_token is None + assert hasattr(user, "access_token") and user.access_token is None @pytest.mark.django_db -def test_drf_oidc_bearer_token_auth_backend_failure(mocker, - api_request_factory): +def test_drf_oidc_bearer_token_auth_backend_failure(mocker, api_request_factory): - url = reverse('api-1-stat-counters') + url = reverse("api-1-stat-counters") drf_auth_backend = OIDCBearerTokenAuthentication() # simulate a failed authentication with a bearer token in expected format mock_keycloak(mocker, auth_success=False) - access_token = sample_data.oidc_profile['access_token'] + access_token = sample_data.oidc_profile["access_token"] - request = api_request_factory.get( - url, HTTP_AUTHORIZATION=f"Bearer {access_token}") + request = api_request_factory.get(url, HTTP_AUTHORIZATION=f"Bearer {access_token}") with pytest.raises(AuthenticationFailed): drf_auth_backend.authenticate(request) @@ -122,7 +121,8 @@ mock_keycloak(mocker) request = api_request_factory.get( - url, HTTP_AUTHORIZATION=f"Bearer invalid-token-format") + url, HTTP_AUTHORIZATION=f"Bearer invalid-token-format" + ) with pytest.raises(AuthenticationFailed): drf_auth_backend.authenticate(request) @@ -130,21 +130,19 @@ def test_drf_oidc_auth_invalid_or_missing_auth_type(api_request_factory): - url = reverse('api-1-stat-counters') + url = reverse("api-1-stat-counters") drf_auth_backend = OIDCBearerTokenAuthentication() - access_token = sample_data.oidc_profile['access_token'] + access_token = sample_data.oidc_profile["access_token"] # Invalid authorization type - request = api_request_factory.get( - url, HTTP_AUTHORIZATION=f"Foo token") + request = api_request_factory.get(url, HTTP_AUTHORIZATION=f"Foo token") with pytest.raises(AuthenticationFailed): drf_auth_backend.authenticate(request) # Missing authorization type - request = api_request_factory.get( - url, HTTP_AUTHORIZATION=f"{access_token}") + request = api_request_factory.get(url, HTTP_AUTHORIZATION=f"{access_token}") with pytest.raises(AuthenticationFailed): drf_auth_backend.authenticate(request) diff --git a/swh/web/tests/auth/test_middlewares.py b/swh/web/tests/auth/test_middlewares.py --- a/swh/web/tests/auth/test_middlewares.py +++ b/swh/web/tests/auth/test_middlewares.py @@ -14,16 +14,16 @@ @pytest.mark.django_db -@modify_settings(MIDDLEWARE={ - 'remove': ['swh.web.auth.middlewares.OIDCSessionRefreshMiddleware'] -}) +@modify_settings( + MIDDLEWARE={"remove": ["swh.web.auth.middlewares.OIDCSessionRefreshMiddleware"]} +) def test_oidc_session_refresh_middleware_disabled(client, mocker): # authenticate but make session expires immediately kc_oidc_mock = mock_keycloak(mocker, exp=int(datetime.now().timestamp())) - client.login(code='', code_verifier='', redirect_uri='') + client.login(code="", code_verifier="", redirect_uri="") kc_oidc_mock.authorization_code.assert_called() - url = reverse('swh-web-homepage') + url = reverse("swh-web-homepage") resp = client.get(url) # no redirection for silent refresh assert resp.status_code != 302 @@ -33,14 +33,15 @@ def test_oidc_session_refresh_middleware_enabled(client, mocker): # authenticate but make session expires immediately kc_oidc_mock = mock_keycloak(mocker, exp=int(datetime.now().timestamp())) - client.login(code='', code_verifier='', redirect_uri='') + client.login(code="", code_verifier="", redirect_uri="") kc_oidc_mock.authorization_code.assert_called() - url = reverse('swh-web-homepage') + url = reverse("swh-web-homepage") resp = client.get(url) # should redirect for silent session refresh assert resp.status_code == 302 - silent_refresh_url = reverse('oidc-login', query_params={'next_path': url, - 'prompt': 'none'}) - assert resp['location'] == silent_refresh_url + silent_refresh_url = reverse( + "oidc-login", query_params={"next_path": url, "prompt": "none"} + ) + assert resp["location"] == silent_refresh_url diff --git a/swh/web/tests/auth/test_utils.py b/swh/web/tests/auth/test_utils.py --- a/swh/web/tests/auth/test_utils.py +++ b/swh/web/tests/auth/test_utils.py @@ -19,7 +19,7 @@ code_verifier, code_challenge = gen_oidc_pkce_codes() # check the code verifier only contains allowed characters - assert re.match(r'[a-zA-Z0-9-\._~]+', code_verifier) + assert re.match(r"[a-zA-Z0-9-\._~]+", code_verifier) # check minimum and maximum authorized length for the # code verifier @@ -27,11 +27,11 @@ assert len(code_verifier) <= 128 # compute code challenge from code verifier - challenge = hashlib.sha256(code_verifier.encode('ascii')).digest() - challenge = urlsafe_b64encode(challenge).decode('ascii') - challenge = challenge.replace('=', '') + challenge = hashlib.sha256(code_verifier.encode("ascii")).digest() + challenge = urlsafe_b64encode(challenge).decode("ascii") + challenge = challenge.replace("=", "") # check base64 padding is not present - assert not code_challenge[-1].endswith('=') + assert not code_challenge[-1].endswith("=") # check code challenge is valid assert code_challenge == challenge diff --git a/swh/web/tests/auth/test_views.py b/swh/web/tests/auth/test_views.py --- a/swh/web/tests/auth/test_views.py +++ b/swh/web/tests/auth/test_views.py @@ -30,7 +30,7 @@ kc_oidc_mock = mock_keycloak(mocker) # user initiates login process - login_url = reverse('oidc-login') + login_url = reverse("oidc-login") response = client.get(login_url) request = response.wsgi_request @@ -39,34 +39,33 @@ assert response.status_code == 302 assert isinstance(request.user, AnonymousUser) - parsed_url = urlparse(response['location']) + parsed_url = urlparse(response["location"]) - authorization_url = kc_oidc_mock.well_known()['authorization_endpoint'] + authorization_url = kc_oidc_mock.well_known()["authorization_endpoint"] query_dict = QueryDict(parsed_url.query) # check redirect url is valid - assert urljoin(response['location'], parsed_url.path) == authorization_url - assert 'client_id' in query_dict - assert query_dict['client_id'] == OIDC_SWH_WEB_CLIENT_ID - assert 'response_type' in query_dict - assert query_dict['response_type'] == 'code' - assert 'redirect_uri' in query_dict - assert query_dict['redirect_uri'] == reverse('oidc-login-complete', - request=request) - assert 'code_challenge_method' in query_dict - assert query_dict['code_challenge_method'] == 'S256' - assert 'scope' in query_dict - assert query_dict['scope'] == 'openid' - assert 'state' in query_dict - assert 'code_challenge' in query_dict + assert urljoin(response["location"], parsed_url.path) == authorization_url + assert "client_id" in query_dict + assert query_dict["client_id"] == OIDC_SWH_WEB_CLIENT_ID + assert "response_type" in query_dict + assert query_dict["response_type"] == "code" + assert "redirect_uri" in query_dict + assert query_dict["redirect_uri"] == reverse("oidc-login-complete", request=request) + assert "code_challenge_method" in query_dict + assert query_dict["code_challenge_method"] == "S256" + assert "scope" in query_dict + assert query_dict["scope"] == "openid" + assert "state" in query_dict + assert "code_challenge" in query_dict # check a login_data has been registered in user session - assert 'login_data' in request.session - login_data = request.session['login_data'] - assert 'code_verifier' in login_data - assert 'state' in login_data - assert 'redirect_uri' in login_data - assert login_data['redirect_uri'] == query_dict['redirect_uri'] + assert "login_data" in request.session + login_data = request.session["login_data"] + assert "code_verifier" in login_data + assert "state" in login_data + assert "redirect_uri" in login_data + assert login_data["redirect_uri"] == query_dict["redirect_uri"] # once a user has identified himself in Keycloak, he is # redirected to the 'oidc-login-complete' view to @@ -74,13 +73,17 @@ # generate authorization code / session state in the same # manner as Keycloak - code = f'{str(uuid.uuid4())}.{str(uuid.uuid4())}.{str(uuid.uuid4())}' + code = f"{str(uuid.uuid4())}.{str(uuid.uuid4())}.{str(uuid.uuid4())}" session_state = str(uuid.uuid4()) - login_complete_url = reverse('oidc-login-complete', - query_params={'code': code, - 'state': login_data['state'], - 'session_state': session_state}) + login_complete_url = reverse( + "oidc-login-complete", + query_params={ + "code": code, + "state": login_data["state"], + "session_state": session_state, + }, + ) # login process finalization response = client.get(login_complete_url) @@ -88,7 +91,7 @@ # should redirect to root url by default assert response.status_code == 302 - assert response['location'] == request.build_absolute_uri('/') + assert response["location"] == request.build_absolute_uri("/") # user should be authenticated assert isinstance(request.user, OIDCUser) @@ -106,22 +109,21 @@ # mock Keycloak client kc_oidc_mock = mock_keycloak(mocker) # login our test user - client.login(code='', code_verifier='', redirect_uri='') + client.login(code="", code_verifier="", redirect_uri="") kc_oidc_mock.authorization_code.assert_called() # user initiates logout - oidc_logout_url = reverse('oidc-logout') + oidc_logout_url = reverse("oidc-logout") response = client.get(oidc_logout_url) request = response.wsgi_request # should redirect to logout page assert response.status_code == 302 - logout_url = reverse('logout', query_params={'remote_user': 1}) - assert response['location'] == request.build_absolute_uri(logout_url) + logout_url = reverse("logout", query_params={"remote_user": 1}) + assert response["location"] == request.build_absolute_uri(logout_url) # should have been logged out in Keycloak - kc_oidc_mock.logout.assert_called_with( - sample_data.oidc_profile['refresh_token']) + kc_oidc_mock.logout.assert_called_with(sample_data.oidc_profile["refresh_token"]) # check effective logout in Django assert isinstance(request.user, AnonymousUser) @@ -136,7 +138,7 @@ mock_keycloak(mocker, auth_success=False) # user initiates login process - login_url = reverse('oidc-login') + login_url = reverse("oidc-login") response = client.get(login_url) request = response.wsgi_request @@ -150,38 +152,41 @@ # Simulate possible errors with OpenID Connect in the login complete view. + def test_oidc_login_complete_view_no_login_data(client, mocker): # user initiates login process - login_url = reverse('oidc-login-complete') + login_url = reverse("oidc-login-complete") response = client.get(login_url) # should render an error page assert_template_used(response, "error.html") - assert_contains(response, 'Login process has not been initialized.', - status_code=500) + assert_contains( + response, "Login process has not been initialized.", status_code=500 + ) def test_oidc_login_complete_view_missing_parameters(client, mocker): # simulate login process has been initialized session = client.session - session['login_data'] = { - 'code_verifier': '', - 'state': str(uuid.uuid4()), - 'redirect_uri': '', - 'next_path': '', - 'prompt': '', + session["login_data"] = { + "code_verifier": "", + "state": str(uuid.uuid4()), + "redirect_uri": "", + "next_path": "", + "prompt": "", } session.save() # user initiates login process - login_url = reverse('oidc-login-complete') + login_url = reverse("oidc-login-complete") response = client.get(login_url) request = response.wsgi_request # should render an error page assert_template_used(response, "error.html") - assert_contains(response, 'Missing query parameters for authentication.', - status_code=400) + assert_contains( + response, "Missing query parameters for authentication.", status_code=400 + ) # no user should be logged in assert isinstance(request.user, AnonymousUser) @@ -193,27 +198,28 @@ # simulate login process has been initialized session = client.session - session['login_data'] = { - 'code_verifier': '', - 'state': str(uuid.uuid4()), - 'redirect_uri': '', - 'next_path': '', - 'prompt': '', + session["login_data"] = { + "code_verifier": "", + "state": str(uuid.uuid4()), + "redirect_uri": "", + "next_path": "", + "prompt": "", } session.save() # user initiates login process - login_url = reverse('oidc-login-complete', - query_params={'code': 'some-code', - 'state': 'some-state'}) + login_url = reverse( + "oidc-login-complete", query_params={"code": "some-code", "state": "some-state"} + ) response = client.get(login_url) request = response.wsgi_request # should render an error page assert_template_used(response, "error.html") - assert_contains(response, 'Wrong CSRF token, aborting login process.', - status_code=400) + assert_contains( + response, "Wrong CSRF token, aborting login process.", status_code=400 + ) # no user should be logged in assert isinstance(request.user, AnonymousUser) @@ -226,27 +232,27 @@ # simulate login process has been initialized session = client.session - session['login_data'] = { - 'code_verifier': '', - 'state': str(uuid.uuid4()), - 'redirect_uri': '', - 'next_path': '', - 'prompt': '', + session["login_data"] = { + "code_verifier": "", + "state": str(uuid.uuid4()), + "redirect_uri": "", + "next_path": "", + "prompt": "", } session.save() # check authentication error is reported - login_url = reverse('oidc-login-complete', - query_params={'code': 'some-code', - 'state': session['login_data']['state']}) + login_url = reverse( + "oidc-login-complete", + query_params={"code": "some-code", "state": session["login_data"]["state"]}, + ) response = client.get(login_url) request = response.wsgi_request # should render an error page assert_template_used(response, "error.html") - assert_contains(response, 'User authentication failed.', - status_code=500) + assert_contains(response, "User authentication failed.", status_code=500) # no user should be logged in assert isinstance(request.user, AnonymousUser) @@ -260,13 +266,13 @@ # mock Keycloak client kc_oidc_mock = mock_keycloak(mocker) # login our test user - client.login(code='', code_verifier='', redirect_uri='') + client.login(code="", code_verifier="", redirect_uri="") - err_msg = 'Authentication server error' + err_msg = "Authentication server error" kc_oidc_mock.logout.side_effect = Exception(err_msg) # user initiates logout process - logout_url = reverse('oidc-logout') + logout_url = reverse("oidc-logout") response = client.get(logout_url) request = response.wsgi_request @@ -283,27 +289,32 @@ # mock Keycloak client mock_keycloak(mocker) - next_path = reverse('swh-web-homepage') + next_path = reverse("swh-web-homepage") # silent session refresh initialization - login_url = reverse('oidc-login', query_params={'next_path': next_path, - 'prompt': 'none'}) + login_url = reverse( + "oidc-login", query_params={"next_path": next_path, "prompt": "none"} + ) response = client.get(login_url) request = response.wsgi_request - login_data = request.session['login_data'] + login_data = request.session["login_data"] # check prompt value has been registered in user session - assert 'prompt' in login_data - assert login_data['prompt'] == 'none' + assert "prompt" in login_data + assert login_data["prompt"] == "none" # simulate a failed silent session refresh session_state = str(uuid.uuid4()) - login_complete_url = reverse('oidc-login-complete', - query_params={'error': 'login_required', - 'state': login_data['state'], - 'session_state': session_state}) + login_complete_url = reverse( + "oidc-login-complete", + query_params={ + "error": "login_required", + "state": login_data["state"], + "session_state": session_state, + }, + ) # login process finalization response = client.get(login_complete_url) @@ -311,6 +322,7 @@ # should redirect to logout page assert response.status_code == 302 - logout_url = reverse('logout', query_params={'next_path': next_path, - 'remote_user': 1}) - assert response['location'] == logout_url + logout_url = reverse( + "logout", query_params={"next_path": next_path, "remote_user": 1} + ) + assert response["location"] == logout_url diff --git a/swh/web/tests/browse/test_utils.py b/swh/web/tests/browse/test_utils.py --- a/swh/web/tests/browse/test_utils.py +++ b/swh/web/tests/browse/test_utils.py @@ -11,118 +11,121 @@ def test_get_mimetype_and_encoding_for_content(): - text = b'Hello world!' - assert (utils.get_mimetype_and_encoding_for_content(text) == - ('text/plain', 'us-ascii')) + text = b"Hello world!" + assert utils.get_mimetype_and_encoding_for_content(text) == ( + "text/plain", + "us-ascii", + ) @given(origin_with_multiple_visits()) def test_get_origin_visit_snapshot_simple(archive_data, origin): - visits = archive_data.origin_visit_get(origin['url']) + visits = archive_data.origin_visit_get(origin["url"]) for visit in visits: - snapshot = archive_data.snapshot_get(visit['snapshot']) + snapshot = archive_data.snapshot_get(visit["snapshot"]) branches = [] releases = [] def _process_branch_data(branch, branch_data): - if branch_data['target_type'] == 'revision': - rev_data = archive_data.revision_get(branch_data['target']) - branches.append({ - 'name': branch, - 'revision': branch_data['target'], - 'directory': rev_data['directory'], - 'date': format_utc_iso_date(rev_data['date']), - 'message': rev_data['message'] - }) - elif branch_data['target_type'] == 'release': - rel_data = archive_data.release_get(branch_data['target']) - rev_data = archive_data.revision_get(rel_data['target']) - releases.append({ - 'name': rel_data['name'], - 'branch_name': branch, - 'date': format_utc_iso_date(rel_data['date']), - 'id': rel_data['id'], - 'message': rel_data['message'], - 'target_type': rel_data['target_type'], - 'target': rel_data['target'], - 'directory': rev_data['directory'] - }) - - for branch in sorted(snapshot['branches'].keys()): - branch_data = snapshot['branches'][branch] - if branch_data['target_type'] == 'alias': - target_data = snapshot['branches'][branch_data['target']] + if branch_data["target_type"] == "revision": + rev_data = archive_data.revision_get(branch_data["target"]) + branches.append( + { + "name": branch, + "revision": branch_data["target"], + "directory": rev_data["directory"], + "date": format_utc_iso_date(rev_data["date"]), + "message": rev_data["message"], + } + ) + elif branch_data["target_type"] == "release": + rel_data = archive_data.release_get(branch_data["target"]) + rev_data = archive_data.revision_get(rel_data["target"]) + releases.append( + { + "name": rel_data["name"], + "branch_name": branch, + "date": format_utc_iso_date(rel_data["date"]), + "id": rel_data["id"], + "message": rel_data["message"], + "target_type": rel_data["target_type"], + "target": rel_data["target"], + "directory": rev_data["directory"], + } + ) + + for branch in sorted(snapshot["branches"].keys()): + branch_data = snapshot["branches"][branch] + if branch_data["target_type"] == "alias": + target_data = snapshot["branches"][branch_data["target"]] _process_branch_data(branch, target_data) else: _process_branch_data(branch, branch_data) - assert branches and releases, 'Incomplete test data.' + assert branches and releases, "Incomplete test data." origin_visit_branches = utils.get_origin_visit_snapshot( - origin, visit_id=visit['visit']) + origin, visit_id=visit["visit"] + ) assert origin_visit_branches == (branches, releases) def test_gen_link(): - assert (utils.gen_link('https://www.softwareheritage.org/', 'swh') == - 'swh') + assert ( + utils.gen_link("https://www.softwareheritage.org/", "swh") + == 'swh' + ) def test_gen_revision_link(): - revision_id = '28a0bc4120d38a394499382ba21d6965a67a3703' - revision_url = reverse('browse-revision', - url_args={'sha1_git': revision_id}) + revision_id = "28a0bc4120d38a394499382ba21d6965a67a3703" + revision_url = reverse("browse-revision", url_args={"sha1_git": revision_id}) - assert (utils.gen_revision_link(revision_id, link_text=None, - link_attrs=None) == - '%s' % (revision_url, revision_id)) - assert (utils.gen_revision_link(revision_id, shorten_id=True, - link_attrs=None) == - '%s' % (revision_url, revision_id[:7])) + assert utils.gen_revision_link( + revision_id, link_text=None, link_attrs=None + ) == '%s' % (revision_url, revision_id) + assert utils.gen_revision_link( + revision_id, shorten_id=True, link_attrs=None + ) == '%s' % (revision_url, revision_id[:7]) def test_gen_person_mail_link(): person_full = { - 'name': 'John Doe', - 'email': 'john.doe@swh.org', - 'fullname': 'John Doe ' + "name": "John Doe", + "email": "john.doe@swh.org", + "fullname": "John Doe ", } - assert (utils.gen_person_mail_link(person_full) == - '%s' % (person_full['email'], - person_full['name'])) + assert utils.gen_person_mail_link(person_full) == '%s' % ( + person_full["email"], + person_full["name"], + ) - link_text = 'Mail' - assert (utils.gen_person_mail_link(person_full, link_text=link_text) == - '%s' % (person_full['email'], - link_text)) + link_text = "Mail" + assert utils.gen_person_mail_link( + person_full, link_text=link_text + ) == '%s' % (person_full["email"], link_text) - person_partial_email = { - 'name': None, - 'email': None, - 'fullname': 'john.doe@swh.org' - } + person_partial_email = {"name": None, "email": None, "fullname": "john.doe@swh.org"} - assert (utils.gen_person_mail_link(person_partial_email) == - '%s' % (person_partial_email['fullname'], - person_partial_email['fullname'])) + assert utils.gen_person_mail_link( + person_partial_email + ) == '%s' % ( + person_partial_email["fullname"], + person_partial_email["fullname"], + ) person_partial = { - 'name': None, - 'email': None, - 'fullname': 'John Doe ' + "name": None, + "email": None, + "fullname": "John Doe ", } - assert (utils.gen_person_mail_link(person_partial) == - person_partial['fullname']) + assert utils.gen_person_mail_link(person_partial) == person_partial["fullname"] - person_none = { - 'name': None, - 'email': None, - 'fullname': None - } + person_none = {"name": None, "email": None, "fullname": None} - assert utils.gen_person_mail_link(person_none) == 'None' + assert utils.gen_person_mail_link(person_none) == "None" diff --git a/swh/web/tests/browse/views/test_content.py b/swh/web/tests/browse/views/test_content.py --- a/swh/web/tests/browse/views/test_content.py +++ b/swh/web/tests/browse/views/test_content.py @@ -8,78 +8,82 @@ from hypothesis import given from swh.web.browse.utils import ( - get_mimetype_and_encoding_for_content, prepare_content_for_display, - _re_encode_content + get_mimetype_and_encoding_for_content, + prepare_content_for_display, + _re_encode_content, ) from swh.web.common.exc import NotFoundExc from swh.web.common.identifiers import get_swh_persistent_id from swh.web.common.utils import gen_path_info, reverse from swh.web.tests.django_asserts import ( - assert_contains, assert_not_contains, assert_template_used + assert_contains, + assert_not_contains, + assert_template_used, ) from swh.web.tests.strategies import ( - content, content_text_non_utf8, content_text_no_highlight, - content_image_type, content_text, invalid_sha1, unknown_content, - content_utf8_detected_as_binary + content, + content_text_non_utf8, + content_text_no_highlight, + content_image_type, + content_text, + invalid_sha1, + unknown_content, + content_utf8_detected_as_binary, ) @given(content_text()) def test_content_view_text(client, archive_data, content): - sha1_git = content['sha1_git'] + sha1_git = content["sha1_git"] - url = reverse('browse-content', - url_args={'query_string': content['sha1']}, - query_params={'path': content['path']}) + url = reverse( + "browse-content", + url_args={"query_string": content["sha1"]}, + query_params={"path": content["path"]}, + ) - url_raw = reverse('browse-content-raw', - url_args={'query_string': content['sha1']}) + url_raw = reverse("browse-content-raw", url_args={"query_string": content["sha1"]}) resp = client.get(url) content_display = _process_content_for_display(archive_data, content) - mimetype = content_display['mimetype'] + mimetype = content_display["mimetype"] assert resp.status_code == 200 - assert_template_used(resp, 'browse/content.html') + assert_template_used(resp, "browse/content.html") - if mimetype.startswith('text/'): - assert_contains(resp, '' % - content_display['language']) - assert_contains(resp, escape(content_display['content_data'])) + if mimetype.startswith("text/"): + assert_contains(resp, '' % content_display["language"]) + assert_contains(resp, escape(content_display["content_data"])) assert_contains(resp, url_raw) - swh_cnt_id = get_swh_persistent_id('content', sha1_git) - swh_cnt_id_url = reverse('browse-swh-id', - url_args={'swh_id': swh_cnt_id}) + swh_cnt_id = get_swh_persistent_id("content", sha1_git) + swh_cnt_id_url = reverse("browse-swh-id", url_args={"swh_id": swh_cnt_id}) assert_contains(resp, swh_cnt_id) assert_contains(resp, swh_cnt_id_url) @given(content_text_no_highlight()) def test_content_view_text_no_highlight(client, archive_data, content): - sha1_git = content['sha1_git'] + sha1_git = content["sha1_git"] - url = reverse('browse-content', - url_args={'query_string': content['sha1']}) + url = reverse("browse-content", url_args={"query_string": content["sha1"]}) - url_raw = reverse('browse-content-raw', - url_args={'query_string': content['sha1']}) + url_raw = reverse("browse-content-raw", url_args={"query_string": content["sha1"]}) resp = client.get(url) content_display = _process_content_for_display(archive_data, content) assert resp.status_code == 200 - assert_template_used(resp, 'browse/content.html') + assert_template_used(resp, "browse/content.html") assert_contains(resp, '') - assert_contains(resp, escape(content_display['content_data'])) + assert_contains(resp, escape(content_display["content_data"])) assert_contains(resp, url_raw) - swh_cnt_id = get_swh_persistent_id('content', sha1_git) - swh_cnt_id_url = reverse('browse-swh-id', - url_args={'swh_id': swh_cnt_id}) + swh_cnt_id = get_swh_persistent_id("content", sha1_git) + swh_cnt_id_url = reverse("browse-swh-id", url_args={"swh_id": swh_cnt_id}) assert_contains(resp, swh_cnt_id) assert_contains(resp, swh_cnt_id_url) @@ -87,300 +91,298 @@ @given(content_text_non_utf8()) def test_content_view_no_utf8_text(client, archive_data, content): - sha1_git = content['sha1_git'] + sha1_git = content["sha1_git"] - url = reverse('browse-content', - url_args={'query_string': content['sha1']}) + url = reverse("browse-content", url_args={"query_string": content["sha1"]}) resp = client.get(url) content_display = _process_content_for_display(archive_data, content) assert resp.status_code == 200 - assert_template_used(resp, 'browse/content.html') - swh_cnt_id = get_swh_persistent_id('content', sha1_git) - swh_cnt_id_url = reverse('browse-swh-id', - url_args={'swh_id': swh_cnt_id}) + assert_template_used(resp, "browse/content.html") + swh_cnt_id = get_swh_persistent_id("content", sha1_git) + swh_cnt_id_url = reverse("browse-swh-id", url_args={"swh_id": swh_cnt_id}) assert_contains(resp, swh_cnt_id_url) - assert_contains(resp, escape(content_display['content_data'])) + assert_contains(resp, escape(content_display["content_data"])) @given(content_image_type()) def test_content_view_image(client, archive_data, content): - url = reverse('browse-content', - url_args={'query_string': content['sha1']}) + url = reverse("browse-content", url_args={"query_string": content["sha1"]}) - url_raw = reverse('browse-content-raw', - url_args={'query_string': content['sha1']}) + url_raw = reverse("browse-content-raw", url_args={"query_string": content["sha1"]}) resp = client.get(url) content_display = _process_content_for_display(archive_data, content) - mimetype = content_display['mimetype'] - content_data = content_display['content_data'] + mimetype = content_display["mimetype"] + content_data = content_display["content_data"] assert resp.status_code == 200 - assert_template_used(resp, 'browse/content.html') - assert_contains(resp, '' - % (mimetype, content_data)) + assert_template_used(resp, "browse/content.html") + assert_contains(resp, '' % (mimetype, content_data)) assert_contains(resp, url_raw) @given(content_text()) def test_content_view_text_with_path(client, archive_data, content): - path = content['path'] + path = content["path"] - url = reverse('browse-content', - url_args={'query_string': content['sha1']}, - query_params={'path': path}) + url = reverse( + "browse-content", + url_args={"query_string": content["sha1"]}, + query_params={"path": path}, + ) resp = client.get(url) assert resp.status_code == 200 - assert_template_used(resp, 'browse/content.html') + assert_template_used(resp, "browse/content.html") assert_contains(resp, '