diff --git a/swh/web/admin/origin_save.py b/swh/web/admin/origin_save.py index bde8a447..e72c220e 100644 --- a/swh/web/admin/origin_save.py +++ b/swh/web/admin/origin_save.py @@ -1,173 +1,173 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json from django.conf import settings from django.contrib.admin.views.decorators import staff_member_required from django.core.exceptions import ObjectDoesNotExist from django.core.paginator import Paginator from django.http import HttpResponse from django.shortcuts import render from django.views.decorators.http import require_POST from swh.web.admin.adminurls import admin_route from swh.web.common.models import ( SaveAuthorizedOrigin, SaveUnauthorizedOrigin, SaveOriginRequest ) from swh.web.common.origin_save import ( create_save_origin_request, SAVE_REQUEST_PENDING, SAVE_REQUEST_REJECTED ) @admin_route(r'origin/save/', view_name='admin-origin-save') @staff_member_required(login_url=settings.LOGIN_URL) def _admin_origin_save(request): return render(request, 'admin/origin-save.html') def _datatables_origin_urls_response(request, urls_query_set): search_value = request.GET['search[value]'] if search_value: urls_query_set = urls_query_set.filter(url__icontains=search_value) column_order = request.GET['order[0][column]'] field_order = request.GET['columns[%s][name]' % column_order] order_dir = request.GET['order[0][dir]'] if order_dir == 'desc': field_order = '-' + field_order urls_query_set = urls_query_set.order_by(field_order) table_data = {} table_data['draw'] = int(request.GET['draw']) table_data['recordsTotal'] = urls_query_set.count() table_data['recordsFiltered'] = urls_query_set.count() length = int(request.GET['length']) page = int(request.GET['start']) / length + 1 paginator = Paginator(urls_query_set, length) urls_query_set = paginator.page(page).object_list table_data['data'] = [{'url': u.url} for u in urls_query_set] table_data_json = json.dumps(table_data, separators=(',', ': ')) return HttpResponse(table_data_json, content_type='application/json') @admin_route(r'origin/save/authorized_urls/list/', view_name='admin-origin-save-authorized-urls-list') @staff_member_required def _admin_origin_save_authorized_urls_list(request): authorized_urls = SaveAuthorizedOrigin.objects.all() return _datatables_origin_urls_response(request, authorized_urls) @admin_route(r'origin/save/authorized_urls/add/(?P.+)/', view_name='admin-origin-save-add-authorized-url') @require_POST @staff_member_required(login_url=settings.LOGIN_URL) def _admin_origin_save_add_authorized_url(request, origin_url): try: SaveAuthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: # add the new authorized url SaveAuthorizedOrigin.objects.create(url=origin_url) # check if pending save requests with that url prefix exist pending_save_requests = \ SaveOriginRequest.objects.filter(origin_url__startswith=origin_url, status=SAVE_REQUEST_PENDING) - # create origin save tasks for previoulsy pending requests + # create origin save tasks for previously pending requests for psr in pending_save_requests: create_save_origin_request(psr.origin_type, psr.origin_url) status_code = 200 else: status_code = 400 return HttpResponse(status=status_code) @admin_route(r'origin/save/authorized_urls/remove/(?P.+)/', view_name='admin-origin-save-remove-authorized-url') @require_POST @staff_member_required(login_url=settings.LOGIN_URL) def _admin_origin_save_remove_authorized_url(request, origin_url): try: entry = SaveAuthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: status_code = 404 else: entry.delete() status_code = 200 return HttpResponse(status=status_code) @admin_route(r'origin/save/unauthorized_urls/list/', view_name='admin-origin-save-unauthorized-urls-list') @staff_member_required(login_url=settings.LOGIN_URL) def _admin_origin_save_unauthorized_urls_list(request): unauthorized_urls = SaveUnauthorizedOrigin.objects.all() return _datatables_origin_urls_response(request, unauthorized_urls) @admin_route(r'origin/save/unauthorized_urls/add/(?P.+)/', view_name='admin-origin-save-add-unauthorized-url') @require_POST @staff_member_required(login_url=settings.LOGIN_URL) def _admin_origin_save_add_unauthorized_url(request, origin_url): try: SaveUnauthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: SaveUnauthorizedOrigin.objects.create(url=origin_url) # check if pending save requests with that url prefix exist pending_save_requests = \ SaveOriginRequest.objects.filter(origin_url__startswith=origin_url, status=SAVE_REQUEST_PENDING) # mark pending requests as rejected for psr in pending_save_requests: psr.status = SAVE_REQUEST_REJECTED psr.save() status_code = 200 else: status_code = 400 return HttpResponse(status=status_code) @admin_route(r'origin/save/unauthorized_urls/remove/(?P.+)/', view_name='admin-origin-save-remove-unauthorized-url') @require_POST @staff_member_required(login_url=settings.LOGIN_URL) def _admin_origin_save_remove_unauthorized_url(request, origin_url): try: entry = SaveUnauthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: status_code = 404 else: entry.delete() status_code = 200 return HttpResponse(status=status_code) @admin_route(r'origin/save/request/accept/(?P.+)/url/(?P.+)/', # noqa view_name='admin-origin-save-request-accept') @require_POST @staff_member_required(login_url=settings.LOGIN_URL) def _admin_origin_save_request_accept(request, origin_type, origin_url): SaveAuthorizedOrigin.objects.create(url=origin_url) create_save_origin_request(origin_type, origin_url) return HttpResponse(status=200) @admin_route(r'origin/save/request/reject/(?P.+)/url/(?P.+)/', # noqa view_name='admin-origin-save-request-reject') @require_POST @staff_member_required(login_url=settings.LOGIN_URL) def _admin_origin_save_request_reject(request, origin_type, origin_url): SaveUnauthorizedOrigin.objects.create(url=origin_url) sor = SaveOriginRequest.objects.get(origin_type=origin_type, origin_url=origin_url, status=SAVE_REQUEST_PENDING) sor.status = SAVE_REQUEST_REJECTED sor.save() return HttpResponse(status=200) diff --git a/swh/web/api/apidoc.py b/swh/web/api/apidoc.py index 1d1fbc5e..d0968891 100644 --- a/swh/web/api/apidoc.py +++ b/swh/web/api/apidoc.py @@ -1,356 +1,356 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import docutils.nodes import docutils.parsers.rst import docutils.utils import functools import os import re from functools import wraps from rest_framework.decorators import api_view from swh.web.common.utils import parse_rst from swh.web.api.apiurls import APIUrls from swh.web.api.apiresponse import make_api_response, error_response class _HTTPDomainDocVisitor(docutils.nodes.NodeVisitor): """ docutils visitor for walking on a parsed rst document containing sphinx httpdomain roles. Its purpose is to extract relevant info regarding swh api endpoints (for instance url arguments) from their docstring written using sphinx httpdomain. """ # httpdomain roles we want to parse (based on sphinxcontrib.httpdomain 1.6) parameter_roles = ('param', 'parameter', 'arg', 'argument') response_json_object_roles = ('resjsonobj', 'resjson', '>jsonobj', '>json') response_json_array_roles = ('resjsonarr', '>jsonarr') query_parameter_roles = ('queryparameter', 'queryparam', 'qparam', 'query') request_header_roles = ('header', 'resheader', 'responseheader') status_code_roles = ('statuscode', 'status', 'code') def __init__(self, document, urls, data): super().__init__(document) self.urls = urls self.url_idx = 0 self.data = data self.args_set = set() self.params_set = set() self.returns_set = set() self.status_codes_set = set() self.reqheaders_set = set() self.resheaders_set = set() self.field_list_visited = False def process_paragraph(self, par): """ Process extracted paragraph text before display. Cleanup document model markups and transform the paragraph into a valid raw rst string (as the apidoc documentation transform rst to html when rendering). """ par = par.replace('\n', ' ') - # keep empahasized and strong text + # keep emphasized and strong text par = par.replace('', '*') par = par.replace('', '*') par = par.replace('', '**') par = par.replace('', '**') # remove parsed document markups par = re.sub('<[^<]+?>', '', par) # api urls cleanup to generate valid links afterwards par = re.sub('\(\w+\)', '', par) # noqa par = re.sub('\[.*\]', '', par) # noqa par = par.replace('//', '/') # transform references to api endpoints into valid rst links par = re.sub(':http:get:`(.*)`', r'`<\1>`_', par) # transform references to some elements into bold text par = re.sub(':http:header:`(.*)`', r'**\1**', par) par = re.sub(':func:`(.*)`', r'**\1**', par) return par def visit_field_list(self, node): """ Visit parsed rst field lists to extract relevant info regarding api endpoint. """ self.field_list_visited = True for child in node.traverse(): # get the parsed field name if isinstance(child, docutils.nodes.field_name): field_name = child.astext() # parse field text elif isinstance(child, docutils.nodes.paragraph): text = self.process_paragraph(str(child)) field_data = field_name.split(' ') # Parameters if field_data[0] in self.parameter_roles: if field_data[2] not in self.args_set: self.data['args'].append({'name': field_data[2], 'type': field_data[1], 'doc': text}) self.args_set.add(field_data[2]) # Query Parameters if field_data[0] in self.query_parameter_roles: if field_data[2] not in self.params_set: self.data['params'].append({'name': field_data[2], 'type': field_data[1], 'doc': text}) self.params_set.add(field_data[2]) # Response type if field_data[0] in self.response_json_array_roles or \ field_data[0] in self.response_json_object_roles: # array if field_data[0] in self.response_json_array_roles: self.data['return_type'] = 'array' # object else: self.data['return_type'] = 'object' # returned object field if field_data[2] not in self.returns_set: self.data['returns'].append({'name': field_data[2], 'type': field_data[1], 'doc': text}) self.returns_set.add(field_data[2]) # Status Codes if field_data[0] in self.status_code_roles: if field_data[1] not in self.status_codes_set: self.data['status_codes'].append({'code': field_data[1], # noqa 'doc': text}) self.status_codes_set.add(field_data[1]) # Request Headers if field_data[0] in self.request_header_roles: if field_data[1] not in self.reqheaders_set: self.data['reqheaders'].append({'name': field_data[1], 'doc': text}) self.reqheaders_set.add(field_data[1]) # Response Headers if field_data[0] in self.response_header_roles: if field_data[1] not in self.resheaders_set: resheader = {'name': field_data[1], 'doc': text} self.data['resheaders'].append(resheader) self.resheaders_set.add(field_data[1]) if resheader['name'] == 'Content-Type' and \ resheader['doc'] == 'application/octet-stream': self.data['return_type'] = 'octet stream' def visit_paragraph(self, node): """ Visit relevant paragraphs to parse """ # only parsed top level paragraphs if isinstance(node.parent, docutils.nodes.block_quote): text = self.process_paragraph(str(node)) # endpoint description if not text.startswith('**') and self.data['description'] != text: self.data['description'] += '\n\n' if self.data['description'] else '' # noqa self.data['description'] += text # http methods elif text.startswith('**Allowed HTTP Methods:**'): text = text.replace('**Allowed HTTP Methods:**', '') http_methods = text.strip().split(',') http_methods = [m[m.find('`')+1:-1].upper() for m in http_methods] self.data['urls'].append({'rule': self.urls[self.url_idx], 'methods': http_methods}) self.url_idx += 1 def visit_literal_block(self, node): """ - Visit litteral blocks + Visit literal blocks """ text = node.astext() - # litteral block in endpoint description + # literal block in endpoint description if not self.field_list_visited: self.data['description'] += ':\n\n\t%s' % text # extract example url if ':swh_web_api:' in text: self.data['examples'].append( '/api/1/' + re.sub('.*`(.*)`.*', r'\1', text)) def visit_bullet_list(self, node): # bullet list in endpoint description if not self.field_list_visited: self.data['description'] += '\n\n' for child in node.traverse(): # process list item if isinstance(child, docutils.nodes.paragraph): line_text = self.process_paragraph(str(child)) self.data['description'] += '\t* %s\n' % line_text def unknown_visit(self, node): pass def depart_document(self, node): """ End of parsing extra processing """ default_methods = ['GET', 'HEAD', 'OPTIONS'] # ensure urls info is present and set default http methods if not self.data['urls']: for url in self.urls: self.data['urls'].append({'rule': url, 'methods': default_methods}) def unknown_departure(self, node): pass def _parse_httpdomain_doc(doc, data): doc_lines = doc.split('\n') doc_lines_filtered = [] urls = [] # httpdomain is a sphinx extension that is unknown to docutils but # fortunately we can still parse its directives' content, # so remove lines with httpdomain directives before executing the # rst parser from docutils for doc_line in doc_lines: if '.. http' not in doc_line: doc_lines_filtered.append(doc_line) else: url = doc_line[doc_line.find('/'):] # emphasize url arguments for html rendering url = re.sub(r'\((\w+)\)', r' **\(\1\)** ', url) urls.append(url) - # parse the rst doctring and do not print system messages about + # parse the rst docstring and do not print system messages about # unknown httpdomain roles document = parse_rst('\n'.join(doc_lines_filtered), report_level=5) # remove the system_message nodes from the parsed document for node in document.traverse(docutils.nodes.system_message): node.parent.remove(node) # visit the document nodes to extract relevant endpoint info visitor = _HTTPDomainDocVisitor(document, urls, data) document.walkabout(visitor) class APIDocException(Exception): """ Custom exception to signal errors in the use of the APIDoc decorators """ class api_doc(object): # noqa: N801 """ Decorate an API function to register it in the API doc route index and create the corresponding DRF route. Args: route (str): documentation page's route noargs (boolean): set to True if the route has no arguments, and its result should be displayed anytime its documentation is requested. Default to False tags (list): Further information on api endpoints. Two values are possibly expected: * hidden: remove the entry points from the listing * upcoming: display the entry point but it is not followable handle_response (boolean): indicate if the decorated function takes care of creating the HTTP response or delegates that task to the apiresponse module api_version (str): api version string """ def __init__(self, route, noargs=False, tags=[], handle_response=False, api_version='1'): super().__init__() self.route = route self.urlpattern = '^' + api_version + route + '$' self.noargs = noargs self.tags = set(tags) self.handle_response = handle_response # @api_doc() Decorator call def __call__(self, f): # If the route is not hidden, add it to the index if 'hidden' not in self.tags: doc_data = self.get_doc_data(f) doc_desc = doc_data['description'] first_dot_pos = doc_desc.find('.') APIUrls.add_route(self.route, doc_desc[:first_dot_pos+1], tags=self.tags) # If the decorated route has arguments, we create a specific # documentation view if not self.noargs: @api_view(['GET', 'HEAD']) def doc_view(request): doc_data = self.get_doc_data(f) return make_api_response(request, None, doc_data) view_name = 'api-%s' % self.route[1:-1].replace('/', '-') APIUrls.add_url_pattern(self.urlpattern, doc_view, view_name) @wraps(f) def documented_view(request, **kwargs): doc_data = self.get_doc_data(f) try: response = f(request, **kwargs) except Exception as exc: return error_response(request, exc, doc_data) if self.handle_response: return response else: return make_api_response(request, response, doc_data) return documented_view @functools.lru_cache(maxsize=32) def get_doc_data(self, f): """ Build documentation data for the decorated api endpoint function """ data = { 'description': '', 'response_data': None, 'urls': [], 'args': [], 'params': [], 'resheaders': [], 'reqheaders': [], 'return_type': '', 'returns': [], 'status_codes': [], 'examples': [], 'route': self.route, 'noargs': self.noargs } if not f.__doc__: raise APIDocException('apidoc %s: expected a docstring' ' for function %s' % (self.__class__.__name__, f.__name__)) # use raw docstring as endpoint documentation if sphinx # httpdomain is not used if '.. http' not in f.__doc__: data['description'] = f.__doc__ # else parse the sphinx httpdomain docstring with docutils # (except when building the swh-web documentation through autodoc # sphinx extension, not needed and raise errors with sphinx >= 1.7) elif 'SWH_WEB_DOC_BUILD' not in os.environ: _parse_httpdomain_doc(f.__doc__, data) # process returned object info for nicer html display returns_list = '' for ret in data['returns']: returns_list += '\t* **%s (%s)**: %s\n' %\ (ret['name'], ret['type'], ret['doc']) data['returns_list'] = returns_list return data diff --git a/swh/web/api/apiresponse.py b/swh/web/api/apiresponse.py index f2e9af46..add77da6 100644 --- a/swh/web/api/apiresponse.py +++ b/swh/web/api/apiresponse.py @@ -1,180 +1,180 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json from rest_framework.response import Response from swh.storage.exc import StorageDBError, StorageAPIError from swh.web.common.exc import NotFoundExc, ForbiddenExc from swh.web.common.utils import shorten_path, gen_path_info from swh.web.api import utils def compute_link_header(rv, options): """Add Link header in returned value results. Args: rv (dict): dictionary with keys: - headers: potential headers with 'link-next' and 'link-prev' keys - results: containing the result to return options (dict): the initial dict to update with result if any Returns: dict: dictionary with optional keys 'link-next' and 'link-prev' """ link_headers = [] if 'headers' not in rv: return {} rv_headers = rv['headers'] if 'link-next' in rv_headers: link_headers.append('<%s>; rel="next"' % ( rv_headers['link-next'])) if 'link-prev' in rv_headers: link_headers.append('<%s>; rel="previous"' % ( rv_headers['link-prev'])) if link_headers: link_header_str = ','.join(link_headers) headers = options.get('headers', {}) headers.update({ 'Link': link_header_str }) return headers return {} def filter_by_fields(request, data): """Extract a request parameter 'fields' if it exists to permit the filtering on - he data dict's keys. + the data dict's keys. If such field is not provided, returns the data as is. """ fields = request.query_params.get('fields') if fields: fields = set(fields.split(',')) data = utils.filter_field_keys(data, fields) return data def transform(rv): """Transform an eventual returned value with multiple layer of information with only what's necessary. If the returned value rv contains the 'results' key, this is the associated value which is returned. Otherwise, return the initial dict without the potential 'headers' key. """ if 'results' in rv: return rv['results'] if 'headers' in rv: rv.pop('headers') return rv def make_api_response(request, data, doc_data={}, options={}): """Generates an API response based on the requested mimetype. Args: request: a DRF Request object data: raw data to return in the API response doc_data: documentation data for HTML response options: optional data that can be used to generate the response Returns: a DRF Response a object """ if data: options['headers'] = compute_link_header(data, options) data = transform(data) data = filter_by_fields(request, data) doc_env = doc_data headers = {} if 'headers' in options: doc_env['headers_data'] = options['headers'] headers = options['headers'] # get request status code doc_env['status_code'] = options.get('status', 200) response_args = {'status': doc_env['status_code'], 'headers': headers, 'content_type': request.accepted_media_type} # when requesting HTML, typically when browsing the API through its # documented views, we need to enrich the input data with documentation # related ones and inform DRF that we request HTML template rendering if request.accepted_media_type == 'text/html': if data: data = json.dumps(data, sort_keys=True, indent=4, separators=(',', ': ')) doc_env['response_data'] = data doc_env['request'] = { 'path': request.path, 'method': request.method, 'absolute_uri': request.build_absolute_uri(), } doc_env['heading'] = shorten_path(str(request.path)) if 'route' in doc_env: doc_env['endpoint_path'] = gen_path_info(doc_env['route']) response_args['data'] = doc_env response_args['template_name'] = 'api/apidoc.html' # otherwise simply return the raw data and let DRF picks # the correct renderer (JSON or YAML) else: response_args['data'] = data return Response(**response_args) def error_response(request, error, doc_data): """Private function to create a custom error response. Args: request: a DRF Request object error: the exception that caused the error doc_data: documentation data for HTML response """ error_code = 400 if isinstance(error, NotFoundExc): error_code = 404 elif isinstance(error, ForbiddenExc): error_code = 403 elif isinstance(error, StorageDBError): error_code = 503 elif isinstance(error, StorageAPIError): error_code = 503 error_opts = {'status': error_code} error_data = { 'exception': error.__class__.__name__, 'reason': str(error), } return make_api_response(request, error_data, doc_data, options=error_opts) diff --git a/swh/web/api/views/origin_save.py b/swh/web/api/views/origin_save.py index d7b4e41d..bf694908 100644 --- a/swh/web/api/views/origin_save.py +++ b/swh/web/api/views/origin_save.py @@ -1,79 +1,79 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.views.decorators.cache import never_cache from swh.web.api.apidoc import api_doc from swh.web.api.apiurls import api_route from swh.web.common.origin_save import ( create_save_origin_request, get_save_origin_requests ) @api_route(r'/origin/save/(?P.+)/url/(?P.+)/', 'api-save-origin', methods=['GET', 'POST'], throttle_scope='swh_save_origin') @never_cache @api_doc('/origin/save/') def api_save_origin(request, origin_type, origin_url): """ .. http:get:: /api/1/origin/save/(origin_type)/url/(origin_url)/ .. http:post:: /api/1/origin/save/(origin_type)/url/(origin_url)/ Request the saving of a software origin into the archive or check the status of previously created save requests. That endpoint enables to create a saving task for a software origin through a POST request. Depending of the provided origin url, the save request can either be: - * immediately **accepted**, for well kwown code hosting providers + * immediately **accepted**, for well known code hosting providers like for instance GitHub or GitLab * **rejected**, in case the url is blacklisted by Software Heritage * **put in pending state** until a manual check is done in order to determine if it can be loaded or not Once a saving request has been accepted, its associated saving task status can then be checked through a GET request on the same url. Returned status can either be: * **not created**: no saving task has been created * **not yet scheduled**: saving task has been created but its execution has not yet been scheduled * **scheduled**: the task execution has been scheduled * **succeed**: the saving task has been successfully executed * **failed**: the saving task has been executed but it failed When issuing a POST request an object will be returned while a GET request will return an array of objects (as multiple save requests might have been submitted for the same origin). :param string origin_type: the type of origin to save (currently only *git* but *hg* and *svn* will soon be available) :param string origin_url: the url of the origin to save :reqheader Accept: the requested response content type, either *application/json* (default) or *application/yaml* :resheader Content-Type: this depends on :http:header:`Accept` header of request :>json string origin_url: the url of the origin to save :>json string origin_type: the type of the origin to save :>json string save_request_date: the date (in iso format) the save request was issued :>json string save_request_status: the status of the save request, either *accepted*, *rejected* or *pending* :>json string save_task_status: the status of the origin saving task, either *not created*, *not yet scheduled*, *scheduled*, *succeed* or *failed* **Allowed HTTP Methods:** :http:method:`get`, :http:method:`post`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid origin type or url has been provided :statuscode 403: the provided origin url is blacklisted """ # noqa if request.method == 'POST': return create_save_origin_request(origin_type, origin_url) else: return get_save_origin_requests(origin_type, origin_url) diff --git a/swh/web/assets/src/bundles/browse/origin-save.js b/swh/web/assets/src/bundles/browse/origin-save.js index abb7f06d..9444b3c8 100644 --- a/swh/web/assets/src/bundles/browse/origin-save.js +++ b/swh/web/assets/src/bundles/browse/origin-save.js @@ -1,196 +1,196 @@ /** * Copyright (C) 2018 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ import {handleFetchError, csrfPost, isGitRepoUrl} from 'utils/functions'; import {validate} from 'validate.js'; let saveRequestsTable; export function initOriginSave() { $(document).ready(() => { $.fn.dataTable.ext.errMode = 'throw'; fetch(Urls.browse_origin_save_types_list()) .then(response => response.json()) .then(data => { for (let originType of data) { $('#swh-input-origin-type').append(``); } }); saveRequestsTable = $('#swh-origin-save-requests').DataTable({ serverSide: true, ajax: Urls.browse_origin_save_requests_list('all'), columns: [ { data: 'save_request_date', name: 'request_date', render: (data, type, row) => { if (type === 'display') { let date = new Date(data); return date.toLocaleString(); } return data; } }, { data: 'origin_type', name: 'origin_type' }, { data: 'origin_url', name: 'origin_url', render: (data, type, row) => { if (type === 'display') { return `${data}`; } return data; } }, { data: 'save_request_status', name: 'status' }, { data: 'save_task_status', name: 'loading_task_status', render: (data, type, row) => { if (data === 'succeed') { let browseOriginUrl = Urls.browse_origin(row.origin_url); if (row.visit_date) { browseOriginUrl += `visit/${row.visit_date}/`; } return `${data}`; } return data; } } ], scrollY: '50vh', scrollCollapse: true, order: [[0, 'desc']] }); $('#swh-origin-save-requests-list-tab').on('shown.bs.tab', () => { saveRequestsTable.draw(); }); - let saveRequestAcceptedALert = + let saveRequestAcceptedAlert = ``; let saveRequestPendingAlert = ``; let saveRequestRejectedAlert = ``; $('#swh-save-origin-form').submit(event => { event.preventDefault(); event.stopPropagation(); $('.alert').alert('close'); if (event.target.checkValidity()) { $(event.target).removeClass('was-validated'); let originType = $('#swh-input-origin-type').val(); let originUrl = $('#swh-input-origin-url').val(); let addSaveOriginRequestUrl = Urls.browse_origin_save_request(originType, originUrl); let grecaptchaData = {'g-recaptcha-response': grecaptcha.getResponse()}; let headers = { 'Accept': 'application/json', 'Content-Type': 'application/json' }; let body = JSON.stringify(grecaptchaData); csrfPost(addSaveOriginRequestUrl, headers, body) .then(handleFetchError) .then(response => response.json()) .then(data => { if (data.save_request_status === 'accepted') { - $('#swh-origin-save-request-status').html(saveRequestAcceptedALert); + $('#swh-origin-save-request-status').html(saveRequestAcceptedAlert); } else { $('#swh-origin-save-request-status').html(saveRequestPendingAlert); } grecaptcha.reset(); }) .catch(response => { if (response.status === 403) { $('#swh-origin-save-request-status').css('color', 'red'); $('#swh-origin-save-request-status').html(saveRequestRejectedAlert); } grecaptcha.reset(); }); } else { $(event.target).addClass('was-validated'); } }); $('#swh-show-origin-save-requests-list').on('click', (event) => { event.preventDefault(); $('.nav-tabs a[href="#swh-origin-save-requests-list"]').tab('show'); }); $('#swh-input-origin-url').on('input', function(event) { let originUrl = $(this).val().trim(); $(this).val(originUrl); $('#swh-input-origin-type option').each(function() { let val = $(this).val(); if (val && originUrl.includes(val)) { $(this).prop('selected', true); } }); }); }); } export function validateSaveOriginUrl(input) { let validUrl = validate({website: input.value}, { website: { url: { schemes: ['http', 'https', 'svn', 'git'] } } }) === undefined; let originType = $('#swh-input-origin-type').val(); if (originType === 'git' && validUrl) { // additional checks for well known code hosting providers let githubIdx = input.value.indexOf('://github.com'); let gitlabIdx = input.value.indexOf('://gitlab.'); let gitSfIdx = input.value.indexOf('://git.code.sf.net'); let bitbucketIdx = input.value.indexOf('://bitbucket.org'); if (githubIdx !== -1 && githubIdx <= 5) { validUrl = isGitRepoUrl(input.value, 'github.com'); } else if (gitlabIdx !== -1 && gitlabIdx <= 5) { let startIdx = gitlabIdx + 3; let idx = input.value.indexOf('/', startIdx); if (idx !== -1) { let gitlabDomain = input.value.substr(startIdx, idx - startIdx); // GitLab repo url needs to be suffixed by '.git' in order to be successfully loaded validUrl = isGitRepoUrl(input.value, gitlabDomain) && input.value.endsWith('.git'); } else { validUrl = false; } } else if (gitSfIdx !== -1 && gitSfIdx <= 5) { validUrl = isGitRepoUrl(input.value, 'git.code.sf.net/p'); } else if (bitbucketIdx !== -1 && bitbucketIdx <= 5) { validUrl = isGitRepoUrl(input.value, 'bitbucket.org'); } } if (validUrl) { input.setCustomValidity(''); } else { input.setCustomValidity('The origin url is not valid or does not reference a code repository'); } } diff --git a/swh/web/assets/src/bundles/browse/origin-search.js b/swh/web/assets/src/bundles/browse/origin-search.js index 6e662280..c822f6fb 100644 --- a/swh/web/assets/src/bundles/browse/origin-search.js +++ b/swh/web/assets/src/bundles/browse/origin-search.js @@ -1,248 +1,248 @@ /** * Copyright (C) 2018 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ import {heapsPermute} from 'utils/heaps-permute'; import {handleFetchError} from 'utils/functions'; let originPatterns; let perPage = 100; let limit = perPage * 2; let offset = 0; let currentData = null; let inSearch = false; function fixTableRowsStyle() { setTimeout(() => { $('#origin-search-results tbody tr').removeAttr('style'); }); } function clearOriginSearchResultsTable() { $('#origin-search-results tbody tr').remove(); } function populateOriginSearchResultsTable(data, offset) { let localOffset = offset % limit; if (data.length > 0) { $('#swh-origin-search-results').show(); $('#swh-no-result').hide(); clearOriginSearchResultsTable(); let table = $('#origin-search-results tbody'); for (let i = localOffset; i < localOffset + perPage && i < data.length; ++i) { let elem = data[i]; let tableRow = ''; tableRow += '' + elem.type + ''; let browseUrl = Urls.browse_origin(elem.url); tableRow += '' + elem.url + ''; tableRow += ''; tableRow += ''; table.append(tableRow); // get async latest visit snapshot and update visit status icon let latestSnapshotUrl = Urls.browse_origin_latest_snapshot(elem.id); fetch(latestSnapshotUrl) .then(response => response.json()) .then(data => { let originId = elem.id; $('#visit-status-origin-' + originId).children().remove(); if (data) { $('#visit-status-origin-' + originId).append(''); } else { $('#visit-status-origin-' + originId).append(''); } }); } fixTableRowsStyle(); } else { $('#swh-origin-search-results').hide(); $('#swh-no-result').text('No origins matching the search criteria were found.'); $('#swh-no-result').show(); } if (data.length - localOffset < perPage || (data.length < limit && (localOffset + perPage) === data.length)) { $('#origins-next-results-button').addClass('disabled'); } else { $('#origins-next-results-button').removeClass('disabled'); } if (offset > 0) { $('#origins-prev-results-button').removeClass('disabled'); } else { $('#origins-prev-results-button').addClass('disabled'); } inSearch = false; if (typeof Storage !== 'undefined') { sessionStorage.setItem('last-swh-origin-search-offset', offset); } setTimeout(() => { window.scrollTo(0, 0); }); } function escapeStringRegexp(str) { let matchOperatorsRe = /[|\\{}()[\]^$+*?.]/g; return str.replace(matchOperatorsRe, '\\\\\\$&'); } function searchOrigins(patterns, limit, searchOffset, offset) { originPatterns = patterns; let patternsArray = patterns.trim().replace(/\s+/g, ' ').split(' '); for (let i = 0; i < patternsArray.length; ++i) { patternsArray[i] = escapeStringRegexp(patternsArray[i]); } let patternsPermut = []; heapsPermute(patternsArray, p => patternsPermut.push(p.join('.*'))); let regex = patternsPermut.join('|'); let withVisit = $('#swh-search-origins-with-visit').prop('checked'); let searchUrl = Urls.browse_origin_search(regex) + `?limit=${limit}&offset=${searchOffset}®exp=true&with_visit=${withVisit}`; clearOriginSearchResultsTable(); $('.swh-loading').addClass('show'); fetch(searchUrl) .then(handleFetchError) .then(response => response.json()) .then(data => { currentData = data; if (typeof Storage !== 'undefined') { sessionStorage.setItem('last-swh-origin-url-patterns', patterns); sessionStorage.setItem('last-swh-origin-search-results', JSON.stringify(data)); sessionStorage.setItem('last-swh-origin-search-offset', offset); } $('.swh-loading').removeClass('show'); populateOriginSearchResultsTable(data, offset); }) .catch(response => { $('.swh-loading').removeClass('show'); inSearch = false; $('#swh-origin-search-results').hide(); $('#swh-no-result').text(`Error ${response.status}: ${response.statusText}`); $('#swh-no-result').show(); }); } function doSearch() { $('#swh-no-result').hide(); let patterns = $('#origins-url-patterns').val(); offset = 0; inSearch = true; // first try to resolve a swh persistent identifier let resolvePidUrl = Urls.api_resolve_swh_pid(patterns); fetch(resolvePidUrl) .then(handleFetchError) .then(response => response.json()) .then(data => { // pid has been successfully resolved, // so redirect to browse page window.location = data.browse_url; }) .catch(response => { // pid resolving failed if (patterns.startsWith('swh:')) { // display a useful error message if the input // looks like a swh pid response.json().then(data => { $('#swh-origin-search-results').hide(); $('.swh-search-pagination').hide(); $('#swh-no-result').text(data.reason); $('#swh-no-result').show(); }); } else { // otherwise, proceed with origins search $('#swh-origin-search-results').show(); $('.swh-search-pagination').show(); searchOrigins(patterns, limit, offset, offset); } }); } export function initOriginSearch() { $(document).ready(() => { if (typeof Storage !== 'undefined') { originPatterns = sessionStorage.getItem('last-swh-origin-url-patterns'); let data = sessionStorage.getItem('last-swh-origin-search-results'); offset = sessionStorage.getItem('last-swh-origin-search-offset'); if (data) { $('#origins-url-patterns').val(originPatterns); offset = parseInt(offset); currentData = JSON.parse(data); populateOriginSearchResultsTable(currentData, offset); } let withVisit = sessionStorage.getItem('last-swh-origin-with-visit'); if (withVisit !== null) { $('#swh-search-origins-with-visit').prop('checked', JSON.parse(withVisit)); } } $('#swh-search-origins').submit(event => { event.preventDefault(); let patterns = $('#origins-url-patterns').val().trim(); if (typeof Storage !== 'undefined') { sessionStorage.setItem('last-swh-origin-url-patterns', patterns); sessionStorage.setItem('last-swh-origin-search-results', ''); sessionStorage.setItem('last-swh-origin-search-offset', ''); } let withVisit = $('#swh-search-origins-with-visit').prop('checked'); let queryParameters = '?q=' + encodeURIComponent(patterns); if (withVisit) { queryParameters += '&with_visit'; } - // Update the url, trigering page reload and effective search + // Update the url, triggering page reload and effective search window.location.search = queryParameters; }); $('#origins-next-results-button').click(event => { if ($('#origins-next-results-button').hasClass('disabled') || inSearch) { return; } inSearch = true; offset += perPage; if (!currentData || (offset >= limit && offset % limit === 0)) { searchOrigins(originPatterns, limit, offset, offset); } else { populateOriginSearchResultsTable(currentData, offset); } event.preventDefault(); }); $('#origins-prev-results-button').click(event => { if ($('#origins-prev-results-button').hasClass('disabled') || inSearch) { return; } inSearch = true; offset -= perPage; if (!currentData || (offset > 0 && (offset + perPage) % limit === 0)) { searchOrigins(originPatterns, limit, (offset + perPage) - limit, offset); } else { populateOriginSearchResultsTable(currentData, offset); } event.preventDefault(); }); $(document).on('shown.bs.tab', 'a[data-toggle="tab"]', e => { if (e.currentTarget.text.trim() === 'Search') { fixTableRowsStyle(); } }); $(window).on('unload', () => { if (typeof Storage !== 'undefined') { sessionStorage.setItem('last-swh-origin-with-visit', JSON.stringify($('#swh-search-origins-with-visit').prop('checked'))); } }); let urlParams = new URLSearchParams(window.location.search); let query = urlParams.get('q'); let withVisit = urlParams.has('with_visit'); let data = sessionStorage.getItem('last-swh-origin-search-results'); if (query && !data) { $('#origins-url-patterns').val(query); if (withVisit) { $('#swh-search-origins-with-visit').prop('checked', true); } doSearch(); } }); } diff --git a/swh/web/assets/src/bundles/origin/visits-histogram.js b/swh/web/assets/src/bundles/origin/visits-histogram.js index 03f75cb9..5c59b92c 100644 --- a/swh/web/assets/src/bundles/origin/visits-histogram.js +++ b/swh/web/assets/src/bundles/origin/visits-histogram.js @@ -1,337 +1,337 @@ /** * Copyright (C) 2018 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ // Creation of a stacked histogram with D3.js for SWH origin visits history // Parameters description: // - container: selector for the div that will contain the histogram // - visitsData: raw swh origin visits data // - currentYear: the visits year to display by default // - yearClickCallback: callback when the user selects a year through the histogram import * as d3 from 'd3'; export function createVisitsHistogram(container, visitsData, currentYear, yearClickCallback) { - // remove previously created hisogram and tooltip if any + // remove previously created histogram and tooltip if any d3.select(container).select('svg').remove(); d3.select('div.d3-tooltip').remove(); // histogram size and margins let width = 1000; let height = 300; let margin = {top: 20, right: 80, bottom: 30, left: 50}; // create responsive svg let svg = d3.select(container) .attr('style', 'padding-bottom: ' + Math.ceil(height * 100 / width) + '%') .append('svg') .attr('viewBox', '0 0 ' + width + ' ' + height); // create tooltip div let tooltip = d3.select('body') .append('div') .attr('class', 'd3-tooltip') .style('opacity', 0); // update width and height without margins width = width - margin.left - margin.right; height = height - margin.top - margin.bottom; // create main svg group element let g = svg.append('g').attr('transform', 'translate(' + margin.left + ',' + margin.top + ')'); // create x scale let x = d3.scaleTime().rangeRound([0, width]); // create y scale let y = d3.scaleLinear().range([height, 0]); - // create oridinal colorscale mapping visit status + // create ordinal colorscale mapping visit status let colors = d3.scaleOrdinal() .domain(['full', 'partial', 'failed', 'ongoing']) .range(['#008000', '#edc344', '#ff0000', '#0000ff']); // first SWH crawls were made in 2015 let startYear = 2015; // set latest display year as the current one let now = new Date(); let endYear = now.getUTCFullYear() + 1; let monthExtent = [new Date(Date.UTC(startYear, 0, 1)), new Date(Date.UTC(endYear, 0, 1))]; // create months bins based on setup extent let monthBins = d3.timeMonths(d3.timeMonth.offset(monthExtent[0], -1), monthExtent[1]); // create years bins based on setup extent let yearBins = d3.timeYears(monthExtent[0], monthExtent[1]); // set x scale domain x.domain(d3.extent(monthBins)); // use D3 histogram layout to create a function that will bin the visits by month let binByMonth = d3.histogram() .value(d => d.date) .domain(x.domain()) .thresholds(monthBins); // use D3 nest function to group the visits by status let visitsByStatus = d3.nest() .key(d => d['status']) .sortKeys(d3.ascending) .entries(visitsData); // prepare data in order to be able to stack visit statuses by month let statuses = []; let histData = []; for (let i = 0; i < monthBins.length; ++i) { histData[i] = {}; } visitsByStatus.forEach(entry => { statuses.push(entry.key); let monthsData = binByMonth(entry.values); for (let i = 0; i < monthsData.length; ++i) { histData[i]['x0'] = monthsData[i]['x0']; histData[i]['x1'] = monthsData[i]['x1']; histData[i][entry.key] = monthsData[i]; } }); // create function to stack visits statuses by month let stacked = d3.stack() .keys(statuses) .value((d, key) => d[key].length); // compute the maximum amount of visits by month let yMax = d3.max(histData, d => { let total = 0; for (let i = 0; i < statuses.length; ++i) { total += d[statuses[i]].length; } return total; }); // set y scale domain y.domain([0, yMax]); // compute ticks values for the y axis let step = 5; let yTickValues = []; for (let i = 0; i <= yMax / step; ++i) { yTickValues.push(i * step); } if (yTickValues.length === 0) { for (let i = 0; i <= yMax; ++i) { yTickValues.push(i); } } else if (yMax % step !== 0) { yTickValues.push(yMax); } // add histogram background grid g.append('g') .attr('class', 'grid') .call(d3.axisLeft(y) .tickValues(yTickValues) .tickSize(-width) .tickFormat('')); // create one fill only rectangle by displayed year // each rectangle will be made visible when hovering the mouse over a year range // user will then be able to select a year by clicking in the rectangle g.append('g') .selectAll('rect') .data(yearBins) .enter().append('rect') .attr('class', d => 'year' + d.getUTCFullYear()) .attr('fill', 'red') .attr('fill-opacity', d => d.getUTCFullYear() === currentYear ? 0.3 : 0) .attr('stroke', 'none') .attr('x', d => { let date = new Date(Date.UTC(d.getUTCFullYear(), 0, 1)); return x(date); }) .attr('y', 0) .attr('height', height) .attr('width', d => { let date = new Date(Date.UTC(d.getUTCFullYear(), 0, 1)); let yearWidth = x(d3.timeYear.offset(date, 1)) - x(date); return yearWidth; }) // mouse event callbacks used to show rectangle years // when hovering the mouse over the histograms .on('mouseover', d => { svg.selectAll('rect.year' + d.getUTCFullYear()) .attr('fill-opacity', 0.5); }) .on('mouseout', d => { svg.selectAll('rect.year' + d.getUTCFullYear()) .attr('fill-opacity', 0); svg.selectAll('rect.year' + currentYear) .attr('fill-opacity', 0.3); }) // callback to select a year after a mouse click // in a rectangle year .on('click', d => { svg.selectAll('rect.year' + currentYear) .attr('fill-opacity', 0); svg.selectAll('rect.yearoutline' + currentYear) .attr('stroke', 'none'); currentYear = d.getUTCFullYear(); svg.selectAll('rect.year' + currentYear) .attr('fill-opacity', 0.5); svg.selectAll('rect.yearoutline' + currentYear) .attr('stroke', 'black'); yearClickCallback(currentYear); }); // create the stacked histogram of visits g.append('g') .selectAll('g') .data(stacked(histData)) .enter().append('g') .attr('fill', d => colors(d.key)) .selectAll('rect') .data(d => d) .enter().append('rect') .attr('class', d => 'month' + d.data.x1.getMonth()) .attr('x', d => x(d.data.x0)) .attr('y', d => y(d[1])) .attr('height', d => y(d[0]) - y(d[1])) .attr('width', d => x(d.data.x1) - x(d.data.x0) - 1) // mouse event callbacks used to show rectangle years - // but also to show tooltips when hovering the mouse + // but also to show tooltip when hovering the mouse // over the histogram bars .on('mouseover', d => { svg.selectAll('rect.year' + d.data.x1.getUTCFullYear()) .attr('fill-opacity', 0.5); tooltip.transition() .duration(200) .style('opacity', 1); let ds = d.data.x1.toISOString().substr(0, 7).split('-'); let tooltipText = '' + ds[1] + ' / ' + ds[0] + ':
'; for (let i = 0; i < statuses.length; ++i) { let visitStatus = statuses[i]; let nbVisits = d.data[visitStatus].length; if (nbVisits === 0) continue; tooltipText += nbVisits + ' ' + visitStatus + ' visits'; if (i !== statuses.length - 1) tooltipText += '
'; } tooltip.html(tooltipText) .style('left', d3.event.pageX + 15 + 'px') .style('top', d3.event.pageY + 'px'); }) .on('mouseout', d => { svg.selectAll('rect.year' + d.data.x1.getUTCFullYear()) .attr('fill-opacity', 0); svg.selectAll('rect.year' + currentYear) .attr('fill-opacity', 0.3); tooltip.transition() .duration(500) .style('opacity', 0); }) .on('mousemove', () => { tooltip.style('left', d3.event.pageX + 15 + 'px') .style('top', d3.event.pageY + 'px'); }) // callback to select a year after a mouse click // inside a histogram bar .on('click', d => { svg.selectAll('rect.year' + currentYear) .attr('fill-opacity', 0); svg.selectAll('rect.yearoutline' + currentYear) .attr('stroke', 'none'); currentYear = d.data.x1.getUTCFullYear(); svg.selectAll('rect.year' + currentYear) .attr('fill-opacity', 0.5); svg.selectAll('rect.yearoutline' + currentYear) .attr('stroke', 'black'); yearClickCallback(currentYear); }); // create one stroke only rectangle by displayed year // that will be displayed on top of the histogram when the user has selected a year g.append('g') .selectAll('rect') .data(yearBins) .enter().append('rect') .attr('class', d => 'yearoutline' + d.getUTCFullYear()) .attr('fill', 'none') .attr('stroke', d => d.getUTCFullYear() === currentYear ? 'black' : 'none') .attr('x', d => { let date = new Date(Date.UTC(d.getUTCFullYear(), 0, 1)); return x(date); }) .attr('y', 0) .attr('height', height) .attr('width', d => { let date = new Date(Date.UTC(d.getUTCFullYear(), 0, 1)); let yearWidth = x(d3.timeYear.offset(date, 1)) - x(date); return yearWidth; }); // add x axis with a tick for every 1st day of each year let xAxis = g.append('g') .attr('class', 'axis') .attr('transform', 'translate(0,' + height + ')') .call( d3.axisBottom(x) .ticks(d3.timeYear.every(1)) .tickFormat(d => d.getUTCFullYear()) ); // shift tick labels in order to display them at the middle // of each year range xAxis.selectAll('text') .attr('transform', d => { let year = d.getUTCFullYear(); let date = new Date(Date.UTC(year, 0, 1)); let yearWidth = x(d3.timeYear.offset(date, 1)) - x(date); return 'translate(' + -yearWidth / 2 + ', 0)'; }); // add y axis for the number of visits g.append('g') .attr('class', 'axis') .call(d3.axisLeft(y).tickValues(yTickValues)); // add legend for visit statuses let legendGroup = g.append('g') .attr('font-family', 'sans-serif') .attr('font-size', 10) .attr('text-anchor', 'end'); legendGroup.append('text') .attr('x', width + margin.right - 5) .attr('y', 9.5) .attr('dy', '0.32em') .text('visit status:'); let legend = legendGroup.selectAll('g') .data(statuses.slice().reverse()) .enter().append('g') .attr('transform', (d, i) => 'translate(0,' + (i + 1) * 20 + ')'); legend.append('rect') .attr('x', width + 2 * margin.right / 3) .attr('width', 19) .attr('height', 19) .attr('fill', colors); legend.append('text') .attr('x', width + 2 * margin.right / 3 - 5) .attr('y', 9.5) .attr('dy', '0.32em') .text(d => d); // add text label for the y axis g.append('text') .attr('transform', 'rotate(-90)') .attr('y', -margin.left) .attr('x', -(height / 2)) .attr('dy', '1em') .style('text-anchor', 'middle') .text('Number of visits'); } diff --git a/swh/web/assets/src/bundles/revision/diff-utils.js b/swh/web/assets/src/bundles/revision/diff-utils.js index 3cc80f18..39f2a964 100644 --- a/swh/web/assets/src/bundles/revision/diff-utils.js +++ b/swh/web/assets/src/bundles/revision/diff-utils.js @@ -1,515 +1,515 @@ /** * Copyright (C) 2018 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ import 'waypoints/lib/jquery.waypoints'; import {staticAsset} from 'utils/functions'; // path to static spinner asset let swhSpinnerSrc = staticAsset('img/swh-spinner.gif'); // number of changed files in the revision let changes = null; let nbChangedFiles = 0; // to track the number of already computed files diffs let nbDiffsComputed = 0; // the no newline at end of file marker from Github let noNewLineMarker = ` `; // to track the total number of added lines in files diffs let nbAdditions = 0; // to track the total number of deleted lines in files diffs let nbDeletions = 0; // to track the already computed diffs by id let computedDiffs = {}; // map a diff id to its computation url let diffsUrls = {}; // to check if a DOM element is in the viewport function isInViewport(elt) { let elementTop = $(elt).offset().top; let elementBottom = elementTop + $(elt).outerHeight(); let viewportTop = $(window).scrollTop(); let viewportBottom = viewportTop + $(window).height(); return elementBottom > viewportTop && elementTop < viewportBottom; } // to format the diffs line numbers function formatDiffLineNumbers(fromLine, toLine, maxNumberChars) { let ret = ''; if (fromLine != null) { for (let i = 0; i < (maxNumberChars - fromLine.length); ++i) { ret += ' '; } ret += fromLine; } if (fromLine != null && toLine != null) { ret += ' '; } if (toLine != null) { for (let i = 0; i < (maxNumberChars - toLine.length); ++i) { ret += ' '; } ret += toLine; } return ret; } // to compute diff and process it for display export function computeDiff(diffUrl, diffId) { // force diff computation ? let force = diffUrl.indexOf('force=true') !== -1; // it no forced computation and diff already computed, do nothing if (!force && computedDiffs.hasOwnProperty(diffId)) { return; } // mark diff computation as already requested computedDiffs[diffId] = true; $(`#${diffId}-loading`).css('visibility', 'visible'); // set spinner visible while requesting diff $(`#${diffId}-loading`).css('display', 'block'); $(`#${diffId}-highlightjs`).css('display', 'none'); // request diff computation and process it fetch(diffUrl) .then(response => response.json()) .then(data => { // increment number of computed diffs ++nbDiffsComputed; // toggle the 'Compute all diffs' button if all diffs have been computed if (nbDiffsComputed === changes.length) { $('#swh-compute-all-diffs').addClass('active'); } // Large diff (> threshold) are not automatically computed, // add a button to force its computation if (data.diff_str.indexOf('Large diff') === 0) { $(`#${diffId}`)[0].innerHTML = data.diff_str + `
`; setDiffVisible(diffId); } else if (data.diff_str.indexOf('@@') !== 0) { $(`#${diffId}`).text(data.diff_str); setDiffVisible(diffId); } else { // prepare code highlighting $(`.${diffId}`).removeClass('nohighlight'); $(`.${diffId}`).addClass(data.language); // set unified diff text $(`#${diffId}`).text(data.diff_str); // code highlighting for unified diff $(`#${diffId}`).each((i, block) => { hljs.highlightBlock(block); hljs.lineNumbersBlock(block); }); // hljs.lineNumbersBlock is asynchronous so we have to postpone our // next treatments by adding it at the end of the current js events queue setTimeout(() => { // process unified diff lines in order to generate side-by-side diffs text - // but also compute line numbers for unified and side-by-side difss + // but also compute line numbers for unified and side-by-side diffs let linesInfoRegExp = new RegExp(/^@@ -(\d+),(\d+) \+(\d+),(\d+) @@$/gm); let baseFromLine = ''; let baseToLine = ''; let fromToLines = []; let fromLines = []; let toLines = []; let maxNumberChars = 0; let diffFromStr = ''; let diffToStr = ''; let linesOffset = 0; $(`#${diffId} .hljs-ln-numbers`).each((i, lnElt) => { let lnText = lnElt.nextSibling.innerText; let linesInfo = linesInfoRegExp.exec(lnText); let fromLine = ''; let toLine = ''; // parsed lines info from the diff output if (linesInfo) { baseFromLine = parseInt(linesInfo[1]) - 1; baseToLine = parseInt(linesInfo[3]) - 1; linesOffset = 0; diffFromStr += (lnText + '\n'); diffToStr += (lnText + '\n'); fromLines.push(''); toLines.push(''); // line removed in the from file } else if (lnText.length > 0 && lnText[0] === '-') { baseFromLine = baseFromLine + 1; fromLine = baseFromLine.toString(); fromLines.push(fromLine); ++nbDeletions; diffFromStr += (lnText + '\n'); ++linesOffset; // line added in the from file } else if (lnText.length > 0 && lnText[0] === '+') { baseToLine = baseToLine + 1; toLine = baseToLine.toString(); toLines.push(toLine); ++nbAdditions; diffToStr += (lnText + '\n'); --linesOffset; // line present in both files } else { baseFromLine = baseFromLine + 1; baseToLine = baseToLine + 1; fromLine = baseFromLine.toString(); toLine = baseToLine.toString(); for (let j = 0; j < Math.abs(linesOffset); ++j) { if (linesOffset > 0) { diffToStr += '\n'; toLines.push(''); } else { diffFromStr += '\n'; fromLines.push(''); } } linesOffset = 0; diffFromStr += (lnText + '\n'); diffToStr += (lnText + '\n'); toLines.push(toLine); fromLines.push(fromLine); } if (!baseFromLine) { fromLine = ''; } if (!baseToLine) { toLine = ''; } fromToLines[i] = [fromLine, toLine]; maxNumberChars = Math.max(maxNumberChars, fromLine.length); maxNumberChars = Math.max(maxNumberChars, toLine.length); }); // set side-by-side diffs text $(`#${diffId}-from`).text(diffFromStr); $(`#${diffId}-to`).text(diffToStr); // code highlighting for side-by-side diffs $(`#${diffId}-from, #${diffId}-to`).each((i, block) => { hljs.highlightBlock(block); hljs.lineNumbersBlock(block); }); // hljs.lineNumbersBlock is asynchronous so we have to postpone our // next treatments by adding it at the end of the current js events queue setTimeout(() => { // diff highlighting for added/removed lines on top of code highlighting $(`.${diffId} .hljs-ln-numbers`).each((i, lnElt) => { let lnText = lnElt.nextSibling.innerText; let linesInfo = linesInfoRegExp.exec(lnText); if (linesInfo) { $(lnElt).parent().addClass('swh-diff-lines-info'); let linesInfoText = $(lnElt).parent().find('.hljs-ln-code .hljs-ln-line').text(); $(lnElt).parent().find('.hljs-ln-code .hljs-ln-line').children().remove(); $(lnElt).parent().find('.hljs-ln-code .hljs-ln-line').text(''); $(lnElt).parent().find('.hljs-ln-code .hljs-ln-line').append(`${linesInfoText}`); } else if (lnText.length > 0 && lnText[0] === '-') { $(lnElt).parent().addClass('swh-diff-removed-line'); } else if (lnText.length > 0 && lnText[0] === '+') { $(lnElt).parent().addClass('swh-diff-added-line'); } }); // set line numbers for unified diff $(`#${diffId} .hljs-ln-numbers`).each((i, lnElt) => { $(lnElt).children().attr('data-line-number', formatDiffLineNumbers(fromToLines[i][0], fromToLines[i][1], maxNumberChars)); }); // set line numbers for the from side-by-side diff $(`#${diffId}-from .hljs-ln-numbers`).each((i, lnElt) => { $(lnElt).children().attr('data-line-number', formatDiffLineNumbers(fromLines[i], null, maxNumberChars)); }); // set line numbers for the to side-by-side diff $(`#${diffId}-to .hljs-ln-numbers`).each((i, lnElt) => { $(lnElt).children().attr('data-line-number', formatDiffLineNumbers(null, toLines[i], maxNumberChars)); }); - // last processings: + // last processing: // - remove the '+' and '-' at the beginning of the diff lines // from code highlighting // - add the "no new line at end of file marker" if needed $(`.${diffId} .hljs-ln-line`).each((i, lnElt) => { if (lnElt.firstChild) { if (lnElt.firstChild.nodeName !== '#text') { let lineText = lnElt.firstChild.innerHTML; if (lineText[0] === '-' || lineText[0] === '+') { lnElt.firstChild.innerHTML = lineText.substr(1); let newTextNode = document.createTextNode(lineText[0]); $(lnElt).prepend(newTextNode); } } $(lnElt).contents().filter((i, elt) => { return elt.nodeType === 3; // Node.TEXT_NODE }).each((i, textNode) => { let swhNoNewLineMarker = '[swh-no-nl-marker]'; if (textNode.textContent.indexOf(swhNoNewLineMarker) !== -1) { textNode.textContent = textNode.textContent.replace(swhNoNewLineMarker, ''); $(lnElt).append($(noNewLineMarker)); } }); } }); // hide the diff mode switch button in case of not generated diffs if (data.diff_str.indexOf('Diffs are not generated for non textual content') !== 0) { $(`#panel_${diffId} .diff-styles`).css('visibility', 'visible'); } setDiffVisible(diffId); }); }); } }); } function setDiffVisible(diffId) { // set the unified diff visible by default $(`#${diffId}-loading`).css('display', 'none'); $(`#${diffId}-highlightjs`).css('display', 'block'); // update displayed counters $('#swh-revision-lines-added').text(`${nbAdditions} additions`); $('#swh-revision-lines-deleted').text(`${nbDeletions} deletions`); $('#swh-nb-diffs-computed').text(nbDiffsComputed); // refresh the waypoints triggering diffs computation as // the DOM layout has been updated Waypoint.refreshAll(); } // to compute all visible diffs in the viewport function computeVisibleDiffs() { $('.swh-file-diff-panel').each((i, elt) => { if (isInViewport(elt)) { let diffId = elt.id.replace('panel_', ''); computeDiff(diffsUrls[diffId], diffId); } }); } function genDiffPanel(diffData) { let diffPanelTitle = diffData.path; if (diffData.type === 'rename') { diffPanelTitle = `${diffData.from_path} → ${diffData.to_path}`; } let diffPanelHtml = `
${diffPanelTitle}
View file
`; return diffPanelHtml; } // setup waypoints to request diffs computation on the fly while scrolling function setupWaypoints() { for (let i = 0; i < changes.length; ++i) { let diffData = changes[i]; // create a waypoint that will trigger diff computation when // the top of the diff panel hits the bottom of the viewport $(`#panel_${diffData.id}`).waypoint({ handler: function() { if (isInViewport(this.element)) { let diffId = this.element.id.replace('panel_', ''); computeDiff(diffsUrls[diffId], diffId); this.destroy(); } }, offset: '100%' }); // create a waypoint that will trigger diff computation when // the bottom of the diff panel hits the top of the viewport $(`#panel_${diffData.id}`).waypoint({ handler: function() { if (isInViewport(this.element)) { let diffId = this.element.id.replace('panel_', ''); computeDiff(diffsUrls[diffId], diffId); this.destroy(); } }, offset: function() { return -$(this.element).height(); } }); } Waypoint.refreshAll(); } // callback to switch from side-by-side diff to unified one export function showUnifiedDiff(event, diffId) { $(`#${diffId}-splitted-diff`).css('display', 'none'); $(`#${diffId}-unified-diff`).css('display', 'block'); } // callback to switch from unified diff to side-by-side one export function showSplittedDiff(event, diffId) { $(`#${diffId}-unified-diff`).css('display', 'none'); $(`#${diffId}-splitted-diff`).css('display', 'block'); } // callback when the user clicks on the 'Compute all diffs' button export function computeAllDiffs(event) { $(event.currentTarget).addClass('active'); for (let diffId in diffsUrls) { if (diffsUrls.hasOwnProperty(diffId)) { computeDiff(diffsUrls[diffId], diffId); } } event.stopPropagation(); } export async function initRevisionDiff(revisionMessageBody, diffRevisionUrl) { await import(/* webpackChunkName: "highlightjs" */ 'utils/highlightjs'); // callback when the 'Changes' tab is activated $(document).on('shown.bs.tab', 'a[data-toggle="tab"]', e => { if (e.currentTarget.text.trim() === 'Changes') { $('#readme-panel').css('display', 'none'); if (changes) { return; } // request computation of revision file changes list // when navigating to the 'Changes' tab and add diff panels // to the DOM when receiving the result fetch(diffRevisionUrl) .then(response => response.json()) .then(data => { changes = data.changes; nbChangedFiles = data.total_nb_changes; let changedFilesText = `${nbChangedFiles} changed file`; if (nbChangedFiles !== 1) { changedFilesText += 's'; } $('#swh-revision-changed-files').text(changedFilesText); $('#swh-total-nb-diffs').text(changes.length); $('#swh-revision-changes-list pre')[0].innerHTML = data.changes_msg; $('#swh-revision-changes-loading').css('display', 'none'); $('#swh-revision-changes-list pre').css('display', 'block'); $('#swh-compute-all-diffs').css('visibility', 'visible'); $('#swh-revision-changes-list').removeClass('in'); if (nbChangedFiles > changes.length) { $('#swh-too-large-revision-diff').css('display', 'block'); $('#swh-nb-loaded-diffs').text(changes.length); } for (let i = 0; i < changes.length; ++i) { let diffData = changes[i]; diffsUrls[diffData.id] = diffData.diff_url; $('#swh-revision-diffs').append(genDiffPanel(diffData)); } setupWaypoints(); computeVisibleDiffs(); }); } else if (e.currentTarget.text.trim() === 'Files') { $('#readme-panel').css('display', 'block'); } }); $(document).ready(() => { if (revisionMessageBody.length > 0) { $('#swh-revision-message').addClass('in'); } else { $('#swh-collapse-revision-message').attr('data-toggle', ''); } let $root = $('html, body'); // callback when the user requests to scroll on a specific diff or back to top $('#swh-revision-changes-list a[href^="#"], #back-to-top a[href^="#"]').click(e => { let href = $.attr(e.currentTarget, 'href'); // disable waypoints while scrolling as we do not want to // launch computation of diffs the user is not interested in // (file changes list can be large) Waypoint.disableAll(); $root.animate( { scrollTop: $(href).offset().top }, { duration: 500, complete: () => { window.location.hash = href; // enable waypoints back after scrolling Waypoint.enableAll(); // compute diffs visible in the viewport computeVisibleDiffs(); } }); return false; }); }); } diff --git a/swh/web/assets/src/bundles/vault/vault-ui.js b/swh/web/assets/src/bundles/vault/vault-ui.js index 529ff50c..5d451c5f 100644 --- a/swh/web/assets/src/bundles/vault/vault-ui.js +++ b/swh/web/assets/src/bundles/vault/vault-ui.js @@ -1,252 +1,252 @@ /** * Copyright (C) 2018 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ import {handleFetchError, handleFetchErrors, csrfPost} from 'utils/functions'; let progress = `
;`; let pollingInterval = 5000; let checkVaultId; function updateProgressBar(progressBar, cookingTask) { if (cookingTask.status === 'new') { progressBar.css('background-color', 'rgba(128, 128, 128, 0.5)'); } else if (cookingTask.status === 'pending') { progressBar.css('background-color', 'rgba(0, 0, 255, 0.5)'); } else if (cookingTask.status === 'done') { progressBar.css('background-color', '#5cb85c'); } else if (cookingTask.status === 'failed') { progressBar.css('background-color', 'rgba(255, 0, 0, 0.5)'); progressBar.css('background-image', 'none'); } progressBar.text(cookingTask.progress_message || cookingTask.status); if (cookingTask.status === 'new' || cookingTask.status === 'pending') { progressBar.addClass('progress-bar-animated'); } else { progressBar.removeClass('progress-bar-striped'); } } let recookTask; // called when the user wants to download a cooked archive export function fetchCookedObject(fetchUrl) { recookTask = null; // first, check if the link is still available from the vault fetch(fetchUrl) .then(response => { // link is still alive, proceed to download if (response.ok) { $('#vault-fetch-iframe').attr('src', fetchUrl); // link is dead } else { // get the associated cooking task let vaultCookingTasks = JSON.parse(localStorage.getItem('swh-vault-cooking-tasks')); for (let i = 0; i < vaultCookingTasks.length; ++i) { if (vaultCookingTasks[i].fetch_url === fetchUrl) { recookTask = vaultCookingTasks[i]; break; } } // display a modal asking the user if he wants to recook the archive $('#vault-recook-object-modal').modal('show'); } }); } // called when the user wants to recook an archive // for which the download link is not available anymore export function recookObject() { if (recookTask) { - // stop cookink tasks status polling + // stop cooking tasks status polling clearTimeout(checkVaultId); // build cook request url let cookingUrl; if (recookTask.object_type === 'directory') { cookingUrl = Urls.api_vault_cook_directory(recookTask.object_id); } else { cookingUrl = Urls.api_vault_cook_revision_gitfast(recookTask.object_id); } if (recookTask.email) { cookingUrl += '?email=' + recookTask.email; } // request archive cooking csrfPost(cookingUrl) .then(handleFetchError) .then(() => { // update task status recookTask.status = 'new'; let vaultCookingTasks = JSON.parse(localStorage.getItem('swh-vault-cooking-tasks')); for (let i = 0; i < vaultCookingTasks.length; ++i) { if (vaultCookingTasks[i].object_id === recookTask.object_id) { vaultCookingTasks[i] = recookTask; break; } } // save updated tasks to local storage localStorage.setItem('swh-vault-cooking-tasks', JSON.stringify(vaultCookingTasks)); // restart cooking tasks status polling checkVaultCookingTasks(); // hide recook archive modal $('#vault-recook-object-modal').modal('hide'); }) // something went wrong .catch(() => { checkVaultCookingTasks(); $('#vault-recook-object-modal').modal('hide'); }); } } function checkVaultCookingTasks() { let vaultCookingTasks = JSON.parse(localStorage.getItem('swh-vault-cooking-tasks')); if (!vaultCookingTasks || vaultCookingTasks.length === 0) { $('.swh-vault-table tbody tr').remove(); checkVaultId = setTimeout(checkVaultCookingTasks, pollingInterval); return; } let cookingTaskRequests = []; let tasks = {}; let currentObjectIds = []; for (let i = 0; i < vaultCookingTasks.length; ++i) { let cookingTask = vaultCookingTasks[i]; currentObjectIds.push(cookingTask.object_id); tasks[cookingTask.object_id] = cookingTask; let cookingUrl; if (cookingTask.object_type === 'directory') { cookingUrl = Urls.api_vault_cook_directory(cookingTask.object_id); } else { cookingUrl = Urls.api_vault_cook_revision_gitfast(cookingTask.object_id); } if (cookingTask.status !== 'done' && cookingTask.status !== 'failed') { cookingTaskRequests.push(fetch(cookingUrl)); } } $('.swh-vault-table tbody tr').each((i, row) => { let objectId = $(row).find('.vault-object-id').data('object-id'); if ($.inArray(objectId, currentObjectIds) === -1) { $(row).remove(); } }); Promise.all(cookingTaskRequests) .then(handleFetchErrors) .then(responses => Promise.all(responses.map(r => r.json()))) .then(cookingTasks => { let table = $('#vault-cooking-tasks tbody'); for (let i = 0; i < cookingTasks.length; ++i) { let cookingTask = tasks[cookingTasks[i].obj_id]; cookingTask.status = cookingTasks[i].status; cookingTask.fetch_url = cookingTasks[i].fetch_url; cookingTask.progress_message = cookingTasks[i].progress_message; } for (let i = 0; i < vaultCookingTasks.length; ++i) { let cookingTask = vaultCookingTasks[i]; let rowTask = $('#vault-task-' + cookingTask.object_id); let downloadLinkWait = 'Waiting for download link to be available'; if (!rowTask.length) { let browseUrl; if (cookingTask.object_type === 'directory') { browseUrl = Urls.browse_directory(cookingTask.object_id); } else { browseUrl = Urls.browse_revision(cookingTask.object_id); } let progressBar = $.parseHTML(progress)[0]; let progressBarContent = $(progressBar).find('.progress-bar'); updateProgressBar(progressBarContent, cookingTask); let tableRow; if (cookingTask.object_type === 'directory') { tableRow = ``; } else { tableRow = ``; } tableRow += ''; tableRow += `${cookingTask.object_type}`; tableRow += `${cookingTask.object_id}`; tableRow += `${progressBar.outerHTML}`; let downloadLink = downloadLinkWait; if (cookingTask.status === 'done') { downloadLink = `'; } else if (cookingTask.status === 'failed') { downloadLink = ''; } tableRow += `${downloadLink}`; tableRow += ''; table.prepend(tableRow); } else { let progressBar = rowTask.find('.progress-bar'); updateProgressBar(progressBar, cookingTask); let downloadLink = rowTask.find('.vault-dl-link'); if (cookingTask.status === 'done') { downloadLink[0].innerHTML = `'; } else if (cookingTask.status === 'failed') { downloadLink[0].innerHTML = ''; } else if (cookingTask.status === 'new') { downloadLink[0].innerHTML = downloadLinkWait; } } } localStorage.setItem('swh-vault-cooking-tasks', JSON.stringify(vaultCookingTasks)); checkVaultId = setTimeout(checkVaultCookingTasks, pollingInterval); }) .catch(() => {}); } export function initUi() { $('#vault-tasks-toggle-selection').change(event => { $('.vault-task-toggle-selection').prop('checked', event.currentTarget.checked); }); $('#vault-remove-tasks').click(() => { clearTimeout(checkVaultId); let tasksToRemove = []; $('.swh-vault-table tbody tr').each((i, row) => { let taskSelected = $(row).find('.vault-task-toggle-selection').prop('checked'); if (taskSelected) { let objectId = $(row).find('.vault-object-id').data('object-id'); tasksToRemove.push(objectId); $(row).remove(); } }); let vaultCookingTasks = JSON.parse(localStorage.getItem('swh-vault-cooking-tasks')); vaultCookingTasks = $.grep(vaultCookingTasks, task => { return $.inArray(task.object_id, tasksToRemove) === -1; }); localStorage.setItem('swh-vault-cooking-tasks', JSON.stringify(vaultCookingTasks)); $('#vault-tasks-toggle-selection').prop('checked', false); checkVaultId = setTimeout(checkVaultCookingTasks, pollingInterval); }); checkVaultId = setTimeout(checkVaultCookingTasks, pollingInterval); $(document).on('shown.bs.tab', 'a[data-toggle="tab"]', e => { if (e.currentTarget.text.trim() === 'Vault') { clearTimeout(checkVaultId); checkVaultCookingTasks(); } }); window.onfocus = () => { clearTimeout(checkVaultId); checkVaultCookingTasks(); }; } diff --git a/swh/web/assets/src/bundles/vendors/index.js b/swh/web/assets/src/bundles/vendors/index.js index 7eeb22ba..3ec7fa9e 100644 --- a/swh/web/assets/src/bundles/vendors/index.js +++ b/swh/web/assets/src/bundles/vendors/index.js @@ -1,40 +1,40 @@ /** * Copyright (C) 2018 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ // vendors bundles centralizing assets used in all swh-web applications -// polyfills in order to use advanded js features (like Promise or fetch) +// polyfills in order to use advanced js features (like Promise or fetch) // in older browsers import '@babel/polyfill'; import 'whatwg-fetch/dist/fetch.umd'; import 'url-search-params-polyfill'; // jquery and bootstrap import 'jquery'; import 'bootstrap-loader/lib/bootstrap.loader?configFilePath=../../../swh/web/assets/config/.bootstraprc!bootstrap-loader/no-op.js'; // admin-lte scripts import 'admin-lte'; // js-cookie import 'js-cookie'; // jquery datatables import 'datatables.net'; import 'datatables.net-bs4'; import 'datatables.net-bs4/css/dataTables.bootstrap4.css'; import './datatables.css'; // iframe-resizer import 'iframe-resizer'; // web fonts import 'typeface-alegreya'; import 'typeface-alegreya-sans'; import 'font-awesome/css/font-awesome.css'; import 'octicons/build/font/octicons.css'; import 'open-iconic/font/css/open-iconic-bootstrap.css'; diff --git a/swh/web/assets/src/bundles/webapp/index.js b/swh/web/assets/src/bundles/webapp/index.js index 576ceb97..0e7af747 100644 --- a/swh/web/assets/src/bundles/webapp/index.js +++ b/swh/web/assets/src/bundles/webapp/index.js @@ -1,23 +1,23 @@ /** * Copyright (C) 2018 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ // webapp entrypoint bundle centralizing global custom stylesheets // and utility js modules used in all swh-web applications -// explicitely import the vendors bundle +// explicitly import the vendors bundle import '../vendors'; // global swh-web custom stylesheets import './webapp.css'; import './breadcrumbs.css'; export * from './webapp-utils'; // utility js modules export * from './code-highlighting'; export * from './readme-rendering'; export * from './pdf-rendering'; diff --git a/swh/web/assets/src/bundles/webapp/pdf-rendering.js b/swh/web/assets/src/bundles/webapp/pdf-rendering.js index 0a3e82ce..9d10ea50 100644 --- a/swh/web/assets/src/bundles/webapp/pdf-rendering.js +++ b/swh/web/assets/src/bundles/webapp/pdf-rendering.js @@ -1,100 +1,100 @@ /** * Copyright (C) 2018 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ // adapted from pdf.js examples located at http://mozilla.github.io/pdf.js/examples/ import {staticAsset} from 'utils/functions'; export async function renderPdf(pdfUrl) { let pdfDoc = null; let pageNum = 1; let pageRendering = false; let pageNumPending = null; let scale = 1.5; let canvas = $('#pdf-canvas')[0]; let ctx = canvas.getContext('2d'); // Get page info from document, resize canvas accordingly, and render page. function renderPage(num) { pageRendering = true; // Using promise to fetch the page pdfDoc.getPage(num).then(page => { let viewport = page.getViewport(scale); canvas.width = viewport.width; canvas.height = viewport.height; // Render PDF page into canvas context let renderContext = { canvasContext: ctx, viewport: viewport }; let renderTask = page.render(renderContext); // Wait for rendering to finish renderTask.promise.then(() => { pageRendering = false; if (pageNumPending !== null) { // New page rendering is pending renderPage(pageNumPending); pageNumPending = null; } }); }); // Update page counters $('#pdf-page-num').text(num); } // If another page rendering in progress, waits until the rendering is - // finised. Otherwise, executes rendering immediately. + // finished. Otherwise, executes rendering immediately. function queueRenderPage(num) { if (pageRendering) { pageNumPending = num; } else { renderPage(num); } } // Displays previous page. function onPrevPage() { if (pageNum <= 1) { return; } pageNum--; queueRenderPage(pageNum); } // Displays next page. function onNextPage() { if (pageNum >= pdfDoc.numPages) { return; } pageNum++; queueRenderPage(pageNum); } let pdfjs = await import(/* webpackChunkName: "pdfjs" */ 'pdfjs-dist'); pdfjs.GlobalWorkerOptions.workerSrc = staticAsset('js/pdf.worker.min.js'); $(document).ready(() => { $('#pdf-prev').click(onPrevPage); $('#pdf-next').click(onNextPage); let loadingTask = pdfjs.getDocument(pdfUrl); loadingTask.promise.then(pdf => { pdfDoc = pdf; $('#pdf-page-count').text(pdfDoc.numPages); // Initial/first page rendering renderPage(pageNum); }, function(reason) { // PDF loading error console.error(reason); }); }); } diff --git a/swh/web/assets/src/bundles/webapp/webapp-utils.js b/swh/web/assets/src/bundles/webapp/webapp-utils.js index 40bcb44d..83271887 100644 --- a/swh/web/assets/src/bundles/webapp/webapp-utils.js +++ b/swh/web/assets/src/bundles/webapp/webapp-utils.js @@ -1,127 +1,127 @@ import objectFitImages from 'object-fit-images'; import {Layout} from 'admin-lte'; let collapseSidebar = false; let previousSidebarState = localStorage.getItem('swh-sidebar-collapsed'); if (previousSidebarState !== undefined) { collapseSidebar = JSON.parse(previousSidebarState); } // adapt implementation of fixLayoutHeight from admin-lte Layout.prototype.fixLayoutHeight = () => { let heights = { window: $(window).height(), header: $('.main-header').outerHeight(), footer: $('.footer').outerHeight(), sidebar: $('.main-sidebar').height(), topbar: $('.swh-top-bar').height() }; let offset = 10; $('.content-wrapper').css('min-height', heights.window - heights.topbar - heights.header - heights.footer - offset); $('.main-sidebar').css('min-height', heights.window - heights.topbar - heights.header - heights.footer - offset); }; $(document).on('DOMContentLoaded', () => { // restore previous sidebar state (collapsed/expanded) if (collapseSidebar) { - // hack to avoid animated transition for collasping sidebar + // hack to avoid animated transition for collapsing sidebar // when loading a page let sidebarTransition = $('.main-sidebar, .main-sidebar:before').css('transition'); let sidebarEltsTransition = $('.sidebar .nav-link p, .main-sidebar .brand-text, .sidebar .user-panel .info').css('transition'); $('.main-sidebar, .main-sidebar:before').css('transition', 'none'); $('.sidebar .nav-link p, .main-sidebar .brand-text, .sidebar .user-panel .info').css('transition', 'none'); $('body').addClass('sidebar-collapse'); $('.swh-words-logo-swh').css('visibility', 'visible'); // restore transitions for user navigation setTimeout(() => { $('.main-sidebar, .main-sidebar:before').css('transition', sidebarTransition); $('.sidebar .nav-link p, .main-sidebar .brand-text, .sidebar .user-panel .info').css('transition', sidebarEltsTransition); }); } }); $(document).on('collapsed.lte.pushmenu', event => { if ($('body').width() > 980) { $('.swh-words-logo-swh').css('visibility', 'visible'); } }); $(document).on('shown.lte.pushmenu', event => { $('.swh-words-logo-swh').css('visibility', 'hidden'); }); function ensureNoFooterOverflow() { $('body').css('padding-bottom', $('footer').outerHeight() + 'px'); } $(document).ready(() => { // redirect to last browse page if any when clicking on the 'Browse' entry // in the sidebar $(`.swh-browse-link`).click(event => { let lastBrowsePage = sessionStorage.getItem('last-browse-page'); if (lastBrowsePage) { event.preventDefault(); window.location = lastBrowsePage; } }); // ensure footer do not overflow main content for mobile devices // or after resizing the browser window ensureNoFooterOverflow(); $(window).resize(function() { ensureNoFooterOverflow(); if ($('body').hasClass('sidebar-collapse') && $('body').width() > 980) { $('.swh-words-logo-swh').css('visibility', 'visible'); } }); // activate css polyfill 'object-fit: contain' in old browsers objectFitImages(); }); export function initPage(page) { $(document).ready(() => { // set relevant sidebar link to page active $(`.swh-${page}-item`).addClass('active'); $(`.swh-${page}-link`).addClass('active'); // triggered when unloading the current page $(window).on('unload', () => { // backup sidebar state (collapsed/expanded) let sidebarCollapsed = $('body').hasClass('sidebar-collapse'); localStorage.setItem('swh-sidebar-collapsed', JSON.stringify(sidebarCollapsed)); // backup current browse page if (page === 'browse') { sessionStorage.setItem('last-browse-page', window.location); } }); }); } export function showModalMessage(title, message) { $('#swh-web-modal-message .modal-title').text(title); $('#swh-web-modal-message .modal-content p').text(message); $('#swh-web-modal-message').modal('show'); } export function showModalConfirm(title, message, callback) { $('#swh-web-modal-confirm .modal-title').text(title); $('#swh-web-modal-confirm .modal-content p').text(message); $('#swh-web-modal-confirm #swh-web-modal-confirm-ok-btn').bind('click', () => { callback(); $('#swh-web-modal-confirm').modal('hide'); $('#swh-web-modal-confirm #swh-web-modal-confirm-ok-btn').unbind('click'); }); $('#swh-web-modal-confirm').modal('show'); } let swhObjectIcons; export function setSwhObjectIcons(icons) { swhObjectIcons = icons; } export function getSwhObjectIcon(swhObjectType) { return swhObjectIcons[swhObjectType]; } diff --git a/swh/web/browse/utils.py b/swh/web/browse/utils.py index 0844c0bd..0047d4c2 100644 --- a/swh/web/browse/utils.py +++ b/swh/web/browse/utils.py @@ -1,1197 +1,1197 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 from collections import defaultdict import magic import math import pypandoc import stat from django.core.cache import cache from django.utils.safestring import mark_safe from importlib import reload from swh.model.identifiers import persistent_identifier from swh.web.common import highlightjs, service from swh.web.common.exc import NotFoundExc, http_status_code_message from swh.web.common.utils import ( reverse, format_utc_iso_date, parse_timestamp, get_origin_visits, get_swh_persistent_id, swh_object_icons ) from swh.web.config import get_config def get_directory_entries(sha1_git): """Function that retrieves the content of a SWH directory from the SWH archive. The directories entries are first sorted in lexicographical order. Sub-directories and regular files are then extracted. Args: sha1_git: sha1_git identifier of the directory Returns: A tuple whose first member corresponds to the sub-directories list and second member the regular files list Raises: NotFoundExc if the directory is not found """ cache_entry_id = 'directory_entries_%s' % sha1_git cache_entry = cache.get(cache_entry_id) if cache_entry: return cache_entry entries = list(service.lookup_directory(sha1_git)) for e in entries: e['perms'] = stat.filemode(e['perms']) if e['type'] == 'rev': - # modify dir entry name to explicitely show it points + # modify dir entry name to explicitly show it points # to a revision e['name'] = '%s @ %s' % (e['name'], e['target'][:7]) dirs = [e for e in entries if e['type'] in ('dir', 'rev')] files = [e for e in entries if e['type'] == 'file'] dirs = sorted(dirs, key=lambda d: d['name']) files = sorted(files, key=lambda f: f['name']) cache.set(cache_entry_id, (dirs, files)) return dirs, files def get_mimetype_and_encoding_for_content(content): """Function that returns the mime type and the encoding associated to a content buffer using the magic module under the hood. Args: content (bytes): a content buffer Returns: A tuple (mimetype, encoding), for instance ('text/plain', 'us-ascii'), associated to the provided content. """ while True: try: magic_result = magic.detect_from_content(content) mime_type = magic_result.mime_type encoding = magic_result.encoding break except Exception: # workaround an issue with the magic module who can fail # if detect_from_content is called multiple times in # a short amount of time reload(magic) return mime_type, encoding # maximum authorized content size in bytes for HTML display # with code highlighting content_display_max_size = get_config()['content_display_max_size'] snapshot_content_max_size = get_config()['snapshot_content_max_size'] def request_content(query_string, max_size=content_display_max_size, raise_if_unavailable=True, reencode=True): """Function that retrieves a SWH content from the SWH archive. Raw bytes content is first retrieved, then the content mime type. If the mime type is not stored in the archive, it will be computed using Python magic module. Args: query_string: a string of the form "[ALGO_HASH:]HASH" where optional ALGO_HASH can be either *sha1*, *sha1_git*, *sha256*, or *blake2s256* (default to *sha1*) and HASH the hexadecimal representation of the hash value max_size: the maximum size for a content to retrieve (default to 1MB, no size limit if None) Returns: A tuple whose first member corresponds to the content raw bytes and second member the content mime type Raises: NotFoundExc if the content is not found """ content_data = service.lookup_content(query_string) filetype = None language = None license = None # requests to the indexer db may fail so properly handle # those cases in order to avoid content display errors try: filetype = service.lookup_content_filetype(query_string) language = service.lookup_content_language(query_string) license = service.lookup_content_license(query_string) except Exception: pass mimetype = 'unknown' encoding = 'unknown' if filetype: mimetype = filetype['mimetype'] encoding = filetype['encoding'] content_data['error_code'] = 200 content_data['error_message'] = '' content_data['error_description'] = '' if not max_size or content_data['length'] < max_size: try: content_raw = service.lookup_content_raw(query_string) except Exception as e: if raise_if_unavailable: raise e else: content_data['raw_data'] = None content_data['error_code'] = 404 content_data['error_description'] = \ 'The bytes of the content are currently not available in the archive.' # noqa content_data['error_message'] = \ http_status_code_message[content_data['error_code']] else: content_data['raw_data'] = content_raw['data'] if not filetype: mimetype, encoding = \ get_mimetype_and_encoding_for_content(content_data['raw_data']) # noqa # encode textual content to utf-8 if needed if reencode and mimetype.startswith('text/'): # probably a malformed UTF-8 content, re-encode it # by replacing invalid chars with a substitution one if encoding == 'unknown-8bit': content_data['raw_data'] = \ content_data['raw_data'].decode('utf-8', 'replace')\ .encode('utf-8') elif 'ascii' not in encoding and encoding not in ['utf-8', 'binary']: # noqa content_data['raw_data'] = \ content_data['raw_data'].decode(encoding, 'replace')\ .encode('utf-8') elif reencode and mimetype.startswith('application/octet-stream'): # file may detect a text content as binary # so try to decode it for display encodings = ['us-ascii'] encodings += ['iso-8859-%s' % i for i in range(1, 17)] for encoding in encodings: try: content_data['raw_data'] = \ content_data['raw_data'].decode(encoding)\ .encode('utf-8') except Exception: pass else: # ensure display in content view mimetype = 'text/plain' break else: content_data['raw_data'] = None content_data['mimetype'] = mimetype content_data['encoding'] = encoding if language: content_data['language'] = language['lang'] else: content_data['language'] = 'not detected' if license: content_data['licenses'] = ', '.join(license['facts'][0]['licenses']) else: content_data['licenses'] = 'not detected' return content_data _browsers_supported_image_mimes = set(['image/gif', 'image/png', 'image/jpeg', 'image/bmp', 'image/webp', 'image/svg', 'image/svg+xml']) def prepare_content_for_display(content_data, mime_type, path): """Function that prepares a content for HTML display. The function tries to associate a programming language to a content in order to perform syntax highlighting client-side using highlightjs. The language is determined using either the content filename or its mime type. If the mime type corresponds to an image format supported by web browsers, the content will be encoded in base64 for displaying the image. Args: content_data (bytes): raw bytes of the content mime_type (string): mime type of the content path (string): path of the content including filename Returns: A dict containing the content bytes (possibly different from the one provided as parameter if it is an image) under the key 'content_data and the corresponding highlightjs language class under the key 'language'. """ language = highlightjs.get_hljs_language_from_filename(path) if not language: language = highlightjs.get_hljs_language_from_mime_type(mime_type) if not language: language = 'nohighlight' elif mime_type.startswith('application/'): mime_type = mime_type.replace('application/', 'text/') if mime_type.startswith('image/'): if mime_type in _browsers_supported_image_mimes: content_data = base64.b64encode(content_data) else: content_data = None if mime_type.startswith('image/svg'): mime_type = 'image/svg+xml' return {'content_data': content_data, 'language': language, 'mimetype': mime_type} def get_origin_visit(origin_info, visit_ts=None, visit_id=None, snapshot_id=None): """Function that returns information about a SWH visit for a given origin. The visit is retrieved from a provided timestamp. The closest visit from that timestamp is selected. Args: origin_info (dict): a dict filled with origin information (id, url, type) visit_ts (int or str): an ISO date string or Unix timestamp to parse Returns: A dict containing the visit info as described below:: {'origin': 2, 'date': '2017-10-08T11:54:25.582463+00:00', 'metadata': {}, 'visit': 25, 'status': 'full'} """ visits = get_origin_visits(origin_info) if not visits: raise NotFoundExc('No SWH visit associated to origin with' ' type %s and url %s!' % (origin_info['type'], origin_info['url'])) if snapshot_id: visit = [v for v in visits if v['snapshot'] == snapshot_id] if len(visit) == 0: raise NotFoundExc( 'Visit for snapshot with id %s for origin with type %s' ' and url %s not found!' % (snapshot_id, origin_info['type'], origin_info['url'])) return visit[0] if visit_id: visit = [v for v in visits if v['visit'] == int(visit_id)] if len(visit) == 0: raise NotFoundExc( 'Visit with id %s for origin with type %s' ' and url %s not found!' % (visit_id, origin_info['type'], origin_info['url'])) return visit[0] if not visit_ts: # returns the latest full visit when no timestamp is provided for v in reversed(visits): if v['status'] == 'full': return v return visits[-1] parsed_visit_ts = math.floor(parse_timestamp(visit_ts).timestamp()) visit_idx = None for i, visit in enumerate(visits): ts = math.floor(parse_timestamp(visit['date']).timestamp()) if i == 0 and parsed_visit_ts <= ts: return visit elif i == len(visits) - 1: if parsed_visit_ts >= ts: return visit else: next_ts = math.floor( parse_timestamp(visits[i+1]['date']).timestamp()) if parsed_visit_ts >= ts and parsed_visit_ts < next_ts: if (parsed_visit_ts - ts) < (next_ts - parsed_visit_ts): visit_idx = i break else: visit_idx = i+1 break if visit_idx is not None: visit = visits[visit_idx] while visit_idx < len(visits) - 1 and \ visit['date'] == visits[visit_idx+1]['date']: visit_idx = visit_idx + 1 visit = visits[visit_idx] return visit else: raise NotFoundExc( 'Visit with timestamp %s for origin with type %s and url %s not found!' % # noqa (visit_ts, origin_info['type'], origin_info['url'])) def process_snapshot_branches(snapshot_branches): """ Process a dictionary describing snapshot branches: extract those targeting revisions and releases, put them in two different lists, then sort those lists in lexicographical order of the branches' names. Args: snapshot_branches (dict): A dict describing the branches of a snapshot as returned for instance by :func:`swh.web.common.service.lookup_snapshot` Returns: tuple: A tuple whose first member is the sorted list of branches targeting revisions and second member the sorted list of branches targeting releases """ # noqa branches = {} releases = {} revision_to_branch = defaultdict(set) revision_to_release = defaultdict(set) release_to_branch = defaultdict(set) for branch_name, target in snapshot_branches.items(): if not target: # FIXME: display branches with an unknown target anyway continue target_id = target['target'] target_type = target['target_type'] if target_type == 'revision': branches[branch_name] = { 'name': branch_name, 'revision': target_id, } revision_to_branch[target_id].add(branch_name) elif target_type == 'release': release_to_branch[target_id].add(branch_name) # FIXME: handle pointers to other object types # FIXME: handle branch aliases releases_info = service.lookup_release_multiple( release_to_branch.keys() ) for release in releases_info: branches_to_update = release_to_branch[release['id']] for branch in branches_to_update: releases[branch] = { 'name': release['name'], 'branch_name': branch, 'date': format_utc_iso_date(release['date']), 'id': release['id'], 'message': release['message'], 'target_type': release['target_type'], 'target': release['target'], } if release['target_type'] == 'revision': revision_to_release[release['target']].update( branches_to_update ) revisions = service.lookup_revision_multiple( set(revision_to_branch.keys()) | set(revision_to_release.keys()) ) for revision in revisions: if not revision: continue revision_data = { 'directory': revision['directory'], 'date': format_utc_iso_date(revision['date']), 'message': revision['message'], } for branch in revision_to_branch[revision['id']]: branches[branch].update(revision_data) for release in revision_to_release[revision['id']]: releases[release]['directory'] = revision['directory'] ret_branches = list(sorted(branches.values(), key=lambda b: b['name'])) ret_releases = list(sorted(releases.values(), key=lambda b: b['name'])) return ret_branches, ret_releases def get_snapshot_content(snapshot_id): """Returns the lists of branches and releases associated to a swh snapshot. That list is put in cache in order to speedup the navigation in the swh-web/browse ui. .. warning:: At most 1000 branches contained in the snapshot will be returned for performance reasons. Args: snapshot_id (str): hexadecimal representation of the snapshot identifier Returns: A tuple with two members. The first one is a list of dict describing the snapshot branches. The second one is a list of dict describing the snapshot releases. Raises: NotFoundExc if the snapshot does not exist """ cache_entry_id = 'swh_snapshot_%s' % snapshot_id cache_entry = cache.get(cache_entry_id) if cache_entry: return cache_entry['branches'], cache_entry['releases'] branches = [] releases = [] if snapshot_id: snapshot = service.lookup_snapshot( snapshot_id, branches_count=snapshot_content_max_size) branches, releases = process_snapshot_branches(snapshot['branches']) cache.set(cache_entry_id, { 'branches': branches, 'releases': releases, }) return branches, releases def get_origin_visit_snapshot(origin_info, visit_ts=None, visit_id=None, snapshot_id=None): """Returns the lists of branches and releases associated to a swh origin for a given visit. The visit is expressed by a timestamp. In the latter case, the closest visit from the provided timestamp will be used. If no visit parameter is provided, it returns the list of branches found for the latest visit. That list is put in cache in order to speedup the navigation in the swh-web/browse ui. .. warning:: At most 1000 branches contained in the snapshot will be returned for performance reasons. Args: origin_info (dict): a dict filled with origin information (id, url, type) visit_ts (int or str): an ISO date string or Unix timestamp to parse - visit_id (int): optional visit id for desambiguation in case + visit_id (int): optional visit id for disambiguation in case several visits have the same timestamp Returns: A tuple with two members. The first one is a list of dict describing the origin branches for the given visit. The second one is a list of dict describing the origin releases for the given visit. Raises: NotFoundExc if the origin or its visit are not found """ visit_info = get_origin_visit(origin_info, visit_ts, visit_id, snapshot_id) return get_snapshot_content(visit_info['snapshot']) def gen_link(url, link_text=None, link_attrs={}): """ Utility function for generating an HTML link to insert in Django templates. Args: url (str): an url link_text (str): optional text for the produced link, if not provided the url will be used link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: An HTML link in the form 'link_text' """ attrs = ' ' for k, v in link_attrs.items(): attrs += '%s="%s" ' % (k, v) if not link_text: link_text = url link = '%s' % (attrs, url, link_text) return mark_safe(link) def gen_person_link(person_id, person_name, snapshot_context=None, link_attrs={}): """ Utility function for generating a link to a SWH person HTML view to insert in Django templates. Args: person_id (int): a SWH person id person_name (str): the associated person name link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: An HTML link in the form 'person_name' """ query_params = None if snapshot_context and snapshot_context['origin_info']: origin_info = snapshot_context['origin_info'] query_params = {'origin_type': origin_info['type'], 'origin': origin_info['url']} if 'timestamp' in snapshot_context['url_args']: query_params['timestamp'] = \ snapshot_context['url_args']['timestamp'] if 'visit_id' in snapshot_context['query_params']: query_params['visit_id'] = \ snapshot_context['query_params']['visit_id'] elif snapshot_context: query_params = {'snapshot_id': snapshot_context['snapshot_id']} person_url = reverse('browse-person', url_args={'person_id': person_id}, query_params=query_params) return gen_link(person_url, person_name or 'None', link_attrs) def gen_revision_url(revision_id, snapshot_context=None): """ Utility function for generating an url to a SWH revision. Args: revision_id (str): a SWH revision id snapshot_context (dict): if provided, generate snapshot-dependent browsing url Returns: str: The url to browse the revision """ query_params = None if snapshot_context and snapshot_context['origin_info']: origin_info = snapshot_context['origin_info'] origin_type = snapshot_context['origin_type'] query_params = {'origin_type': origin_type, 'origin': origin_info['url']} if 'timestamp' in snapshot_context['url_args']: query_params['timestamp'] = \ snapshot_context['url_args']['timestamp'] if 'visit_id' in snapshot_context['query_params']: query_params['visit_id'] = \ snapshot_context['query_params']['visit_id'] elif snapshot_context: query_params = {'snapshot_id': snapshot_context['snapshot_id']} return reverse('browse-revision', url_args={'sha1_git': revision_id}, query_params=query_params) def gen_revision_link(revision_id, shorten_id=False, snapshot_context=None, link_text=None, link_attrs={}): """ Utility function for generating a link to a SWH revision HTML view to insert in Django templates. Args: revision_id (str): a SWH revision id shorten_id (boolean): whether to shorten the revision id to 7 characters for the link text snapshot_context (dict): if provided, generate snapshot-dependent browsing link link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: str: An HTML link in the form 'revision_id' """ if not revision_id: return None revision_url = gen_revision_url(revision_id, snapshot_context) if shorten_id: return gen_link(revision_url, revision_id[:7], link_attrs) else: if not link_text: link_text = revision_id return gen_link(revision_url, link_text, link_attrs) def gen_origin_link(origin_info, link_attrs={}): """ Utility function for generating a link to a SWH origin HTML view to insert in Django templates. Args: - origin_info (dict): a dicted filled with origin information + origin_info (dict): a dict filled with origin information (id, type, url) link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: An HTML link in the form 'Origin: origin_url' """ # noqa origin_browse_url = reverse('browse-origin', url_args={'origin_type': origin_info['type'], 'origin_url': origin_info['url']}) return gen_link(origin_browse_url, 'Origin: ' + origin_info['url'], link_attrs) def gen_directory_link(sha1_git, link_text=None, link_attrs={}): """ Utility function for generating a link to a SWH directory HTML view to insert in Django templates. Args: sha1_git (str): directory identifier link_text (str): optional text for the generated link (the generated url will be used by default) link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: An HTML link in the form 'link_text' """ if not sha1_git: return None directory_url = reverse('browse-directory', url_args={'sha1_git': sha1_git}) if not link_text: link_text = directory_url return gen_link(directory_url, link_text, link_attrs) def gen_snapshot_link(snapshot_id, link_text=None, link_attrs={}): """ Utility function for generating a link to a SWH snapshot HTML view to insert in Django templates. Args: snapshot_id (str): snapshot identifier link_text (str): optional text for the generated link (the generated url will be used by default) link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: An HTML link in the form 'link_text' """ snapshot_url = reverse('browse-snapshot', url_args={'snapshot_id': snapshot_id}) if not link_text: link_text = snapshot_url return gen_link(snapshot_url, link_text, link_attrs) def gen_snapshot_directory_link(snapshot_context, revision_id=None, link_text=None, link_attrs={}): """ Utility function for generating a link to a SWH directory HTML view in the context of a snapshot to insert in Django templates. Args: snapshot_context (dict): the snapshot information revision_id (str): optional revision identifier in order to use the associated directory link_text (str): optional text to use for the generated link link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: An HTML link in the form 'origin_directory_view_url' """ query_params = {'revision': revision_id} if snapshot_context['origin_info']: origin_info = snapshot_context['origin_info'] url_args = {'origin_type': origin_info['type'], 'origin_url': origin_info['url']} if 'timestamp' in snapshot_context['url_args']: url_args['timestamp'] = \ snapshot_context['url_args']['timestamp'] if 'visit_id' in snapshot_context['query_params']: query_params['visit_id'] = \ snapshot_context['query_params']['visit_id'] directory_url = reverse('browse-origin-directory', url_args=url_args, query_params=query_params) else: url_args = {'snapshot_id': snapshot_context['snapshot_id']} directory_url = reverse('browse-snapshot-directory', url_args=url_args, query_params=query_params) if not link_text: link_text = directory_url return gen_link(directory_url, link_text, link_attrs) def gen_content_link(sha1_git, link_text=None, link_attrs={}): """ Utility function for generating a link to a SWH content HTML view to insert in Django templates. Args: sha1_git (str): content identifier link_text (str): optional text for the generated link (the generated url will be used by default) link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: An HTML link in the form 'link_text' """ if not sha1_git: return None content_url = reverse('browse-content', url_args={'query_string': 'sha1_git:' + sha1_git}) if not link_text: link_text = content_url return gen_link(content_url, link_text, link_attrs) def get_revision_log_url(revision_id, snapshot_context=None): """ Utility function for getting the URL for a SWH revision log HTML view (possibly in the context of an origin). Args: revision_id (str): revision identifier the history heads to snapshot_context (dict): if provided, generate snapshot-dependent browsing link Returns: The SWH revision log view URL """ query_params = {'revision': revision_id} if snapshot_context and snapshot_context['origin_info']: origin_info = snapshot_context['origin_info'] url_args = {'origin_type': origin_info['type'], 'origin_url': origin_info['url']} if 'timestamp' in snapshot_context['url_args']: url_args['timestamp'] = \ snapshot_context['url_args']['timestamp'] if 'visit_id' in snapshot_context['query_params']: query_params['visit_id'] = \ snapshot_context['query_params']['visit_id'] revision_log_url = reverse('browse-origin-log', url_args=url_args, query_params=query_params) elif snapshot_context: url_args = {'snapshot_id': snapshot_context['snapshot_id']} revision_log_url = reverse('browse-snapshot-log', url_args=url_args, query_params=query_params) else: revision_log_url = reverse('browse-revision-log', url_args={'sha1_git': revision_id}) return revision_log_url def gen_revision_log_link(revision_id, snapshot_context=None, link_text=None, link_attrs={}): """ Utility function for generating a link to a SWH revision log HTML view (possibly in the context of an origin) to insert in Django templates. Args: revision_id (str): revision identifier the history heads to snapshot_context (dict): if provided, generate snapshot-dependent browsing link link_text (str): optional text to use for the generated link link_attrs (dict): optional attributes (e.g. class) to add to the link Returns: An HTML link in the form 'link_text' """ if not revision_id: return None revision_log_url = get_revision_log_url(revision_id, snapshot_context) if not link_text: link_text = revision_log_url return gen_link(revision_log_url, link_text, link_attrs) def _format_log_entries(revision_log, per_page, snapshot_context=None): revision_log_data = [] for i, log in enumerate(revision_log): if i == per_page: break author_name = 'None' author_link = 'None' if log['author']: author_name = log['author']['name'] or log['author']['fullname'] author_link = gen_person_link(log['author']['id'], author_name, snapshot_context) revision_log_data.append( {'author': author_link, 'revision': gen_revision_link(log['id'], True, snapshot_context), 'message': log['message'], 'date': format_utc_iso_date(log['date']), 'directory': log['directory']}) return revision_log_data def prepare_revision_log_for_display(revision_log, per_page, revs_breadcrumb, snapshot_context=None): """ Utility functions that process raw revision log data for HTML display. Its purpose is to: * add links to relevant SWH browse views * format date in human readable format * truncate the message log It also computes the data needed to generate the links for navigating back and forth in the history log. Args: revision_log (list): raw revision log as returned by the SWH web api per_page (int): number of log entries per page revs_breadcrumb (str): breadcrumbs of revisions navigated so far, in the form 'rev1[/rev2/../revN]'. Each revision corresponds to the first one displayed in the HTML view for history log. snapshot_context (dict): if provided, generate snapshot-dependent browsing link """ current_rev = revision_log[0]['id'] next_rev = None prev_rev = None next_revs_breadcrumb = None prev_revs_breadcrumb = None if len(revision_log) == per_page + 1: prev_rev = revision_log[-1]['id'] prev_rev_bc = current_rev if snapshot_context: prev_rev_bc = prev_rev if revs_breadcrumb: revs = revs_breadcrumb.split('/') next_rev = revs[-1] if len(revs) > 1: next_revs_breadcrumb = '/'.join(revs[:-1]) if len(revision_log) == per_page + 1: prev_revs_breadcrumb = revs_breadcrumb + '/' + prev_rev_bc else: prev_revs_breadcrumb = prev_rev_bc return {'revision_log_data': _format_log_entries(revision_log, per_page, snapshot_context), 'prev_rev': prev_rev, 'prev_revs_breadcrumb': prev_revs_breadcrumb, 'next_rev': next_rev, 'next_revs_breadcrumb': next_revs_breadcrumb} # list of origin types that can be found in the swh archive # TODO: retrieve it dynamically in an efficient way instead # of hardcoding it _swh_origin_types = ['git', 'svn', 'deb', 'hg', 'ftp', 'deposit', 'pypi'] def get_origin_info(origin_url, origin_type=None): """ Get info about a SWH origin. Its main purpose is to automatically find an origin type when it is not provided as parameter. Args: origin_url (str): complete url of a SWH origin origin_type (str): optional origin type Returns: A dict with the following entries: * type: the origin type * url: the origin url * id: the SWH internal id of the origin """ if origin_type: return service.lookup_origin({'type': origin_type, 'url': origin_url}) else: for origin_type in _swh_origin_types: try: origin_info = service.lookup_origin({'type': origin_type, 'url': origin_url}) return origin_info except Exception: pass raise NotFoundExc('Origin with url %s not found!' % origin_url) def get_snapshot_context(snapshot_id=None, origin_type=None, origin_url=None, timestamp=None, visit_id=None): """ Utility function to compute relevant information when navigating the SWH archive in a snapshot context. The snapshot is either referenced by its id or it will be retrieved from an origin visit. Args: snapshot_id (str): hexadecimal representation of a snapshot identifier, all other parameters will be ignored if it is provided origin_type (str): the origin type (git, svn, deposit, ...) origin_url (str): the origin_url (e.g. https://github.com/(user)/(repo)/) timestamp (str): a datetime string for retrieving the closest SWH visit of the origin visit_id (int): optional visit id for disambiguation in case of several visits with the same timestamp Returns: A dict with the following entries: * origin_info: dict containing origin information * visit_info: dict containing SWH visit information * branches: the list of branches for the origin found during the visit * releases: the list of releases for the origin found during the visit * origin_browse_url: the url to browse the origin * origin_branches_url: the url to browse the origin branches * origin_releases_url': the url to browse the origin releases * origin_visit_url: the url to browse the snapshot of the origin found during the visit * url_args: dict containing url arguments to use when browsing in the context of the origin and its visit Raises: NotFoundExc: if no snapshot is found for the visit of an origin. """ # noqa origin_info = None visit_info = None url_args = None query_params = {} branches = [] releases = [] browse_url = None visit_url = None branches_url = None releases_url = None swh_type = 'snapshot' if origin_url: swh_type = 'origin' origin_info = get_origin_info(origin_url, origin_type) visit_info = get_origin_visit(origin_info, timestamp, visit_id, snapshot_id) fmt_date = format_utc_iso_date(visit_info['date']) visit_info['fmt_date'] = fmt_date snapshot_id = visit_info['snapshot'] if not snapshot_id: raise NotFoundExc('No snapshot associated to the visit of origin ' '%s on %s' % (origin_url, fmt_date)) # provided timestamp is not necessarily equals to the one # of the retrieved visit, so get the exact one in order # use it in the urls generated below if timestamp: timestamp = visit_info['date'] branches, releases = \ get_origin_visit_snapshot(origin_info, timestamp, visit_id, snapshot_id) url_args = {'origin_type': origin_type, 'origin_url': origin_info['url']} query_params = {'visit_id': visit_id} browse_url = reverse('browse-origin-visits', url_args=url_args) if timestamp: url_args['timestamp'] = format_utc_iso_date(timestamp, '%Y-%m-%dT%H:%M:%S') visit_url = reverse('browse-origin-directory', url_args=url_args, query_params=query_params) visit_info['url'] = visit_url branches_url = reverse('browse-origin-branches', url_args=url_args, query_params=query_params) releases_url = reverse('browse-origin-releases', url_args=url_args, query_params=query_params) elif snapshot_id: branches, releases = get_snapshot_content(snapshot_id) url_args = {'snapshot_id': snapshot_id} browse_url = reverse('browse-snapshot', url_args=url_args) branches_url = reverse('browse-snapshot-branches', url_args=url_args) releases_url = reverse('browse-snapshot-releases', url_args=url_args) releases = list(reversed(releases)) snapshot_size = service.lookup_snapshot_size(snapshot_id) is_empty = sum(snapshot_size.values()) == 0 swh_snp_id = persistent_identifier('snapshot', snapshot_id) return { 'swh_type': swh_type, 'swh_object_id': swh_snp_id, 'snapshot_id': snapshot_id, 'snapshot_size': snapshot_size, 'is_empty': is_empty, 'origin_info': origin_info, # keep track if the origin type was provided as url argument 'origin_type': origin_type, 'visit_info': visit_info, 'branches': branches, 'releases': releases, 'branch': None, 'release': None, 'browse_url': browse_url, 'branches_url': branches_url, 'releases_url': releases_url, 'url_args': url_args, 'query_params': query_params } # list of common readme names ordered by preference # (lower indices have higher priority) _common_readme_names = [ "readme.markdown", "readme.md", "readme.rst", "readme.txt", "readme" ] def get_readme_to_display(readmes): """ Process a list of readme files found in a directory in order to find the adequate one to display. Args: readmes: a list of dict where keys are readme file names and values are readme sha1s Returns: A tuple (readme_name, readme_sha1) """ readme_name = None readme_url = None readme_sha1 = None readme_html = None lc_readmes = {k.lower(): {'orig_name': k, 'sha1': v} for k, v in readmes.items()} # look for readme names according to the preference order # defined by the _common_readme_names list for common_readme_name in _common_readme_names: if common_readme_name in lc_readmes: readme_name = lc_readmes[common_readme_name]['orig_name'] readme_sha1 = lc_readmes[common_readme_name]['sha1'] readme_url = reverse('browse-content-raw', url_args={'query_string': readme_sha1}) break # otherwise pick the first readme like file if any if not readme_name and len(readmes.items()) > 0: readme_name = next(iter(readmes)) readme_sha1 = readmes[readme_name] readme_url = reverse('browse-content-raw', url_args={'query_string': readme_sha1}) # convert rst README to html server side as there is # no viable solution to perform that task client side if readme_name and readme_name.endswith('.rst'): cache_entry_id = 'readme_%s' % readme_sha1 cache_entry = cache.get(cache_entry_id) if cache_entry: readme_html = cache_entry else: try: rst_doc = request_content(readme_sha1) readme_html = pypandoc.convert_text(rst_doc['raw_data'], 'html', format='rst') cache.set(cache_entry_id, readme_html) except Exception: readme_html = 'Readme bytes are not available' return readme_name, readme_url, readme_html def get_swh_persistent_ids(swh_objects, snapshot_context=None): """ Returns a list of dict containing info related to persistent identifiers of swh objects. Args: swh_objects (list): a list of dict with the following keys: * type: swh object type (content/directory/release/revision/snapshot) * id: swh object id snapshot_context (dict): optional parameter describing the snapshot in which the object has been found Returns: list: a list of dict with the following keys: * object_type: the swh object type (content/directory/release/revision/snapshot) * object_icon: the swh object icon to use in HTML views * swh_id: the computed swh object persistent identifier * swh_id_url: the url resolving the persistent identifier * show_options: boolean indicating if the persistent id options must be displayed in persistent ids HTML view """ # noqa swh_ids = [] for swh_object in swh_objects: if not swh_object['id']: continue swh_id = get_swh_persistent_id(swh_object['type'], swh_object['id']) show_options = swh_object['type'] == 'content' or \ (snapshot_context and snapshot_context['origin_info'] is not None) object_icon = swh_object_icons[swh_object['type']] swh_ids.append({ 'object_type': swh_object['type'], 'object_icon': object_icon, 'swh_id': swh_id, 'swh_id_url': reverse('browse-swh-id', url_args={'swh_id': swh_id}), 'show_options': show_options }) return swh_ids diff --git a/swh/web/common/highlightjs.py b/swh/web/common/highlightjs.py index 774b5e2f..e9debf97 100644 --- a/swh/web/common/highlightjs.py +++ b/swh/web/common/highlightjs.py @@ -1,298 +1,298 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from pygments.lexers import ( get_all_lexers, get_lexer_for_filename ) # set of languages ids that can be highlighted # by highlight.js library _hljs_languages = set([ '1c', 'abnf', 'accesslog', 'actionscript', 'ada', 'angelscript', 'apache', 'applescript', 'arcade', 'arduino', 'armasm', 'asciidoc', 'aspectj', 'autohotkey', 'autoit', 'avrasm', 'awk', 'axapta', 'bash', 'basic', 'bnf', 'brainfuck', 'cal', 'capnproto', 'ceylon', 'clean', 'clojure', 'clojure-repl', 'cmake', 'coffeescript', 'coq', 'cos', 'cpp', 'crmsh', 'crystal', 'cs', 'csp', 'css', 'd', 'dart', 'delphi', 'diff', 'django', 'dns', 'dockerfile', 'dos', 'dsconfig', 'dts', 'dust', 'ebnf', 'elixir', 'elm', 'erb', 'erlang', 'erlang-repl', 'excel', 'fix', 'flix', 'fortran', 'fsharp', 'gams', 'gauss', 'gcode', 'gherkin', 'glsl', 'gml', 'go', 'golo', 'gradle', 'groovy', 'haml', 'handlebars', 'haskell', 'haxe', 'hsp', 'htmlbars', 'http', 'hy', 'inform7', 'ini', 'irpf90', 'isbl', 'java', 'javascript', 'jboss-cli', 'json', 'julia', 'julia-repl', 'kotlin', 'lasso', 'ldif', 'leaf', 'less', 'lisp', 'livecodeserver', 'livescript', 'llvm', 'lsl', 'lua', 'makefile', 'markdown', 'mathematica', 'matlab', 'maxima', 'mel', 'mercury', 'mipsasm', 'mizar', 'mojolicious', 'monkey', 'moonscript', 'n1ql', 'nginx', 'nimrod', 'nix', 'nsis', 'objectivec', 'ocaml', 'openscad', 'oxygene', 'parser3', 'perl', 'pf', 'pgsql', 'php', 'plaintext', 'pony', 'powershell', 'processing', 'profile', 'prolog', 'properties', 'protobuf', 'puppet', 'purebasic', 'python', 'q', 'qml', 'r', 'reasonml', 'rib', 'roboconf', 'routeros', 'rsl', 'ruby', 'ruleslanguage', 'rust', 'sas', 'scala', 'scheme', 'scilab', 'scss', 'shell', 'smali', 'smalltalk', 'sml', 'sqf', 'sql', 'stan', 'stata', 'step21', 'stylus', 'subunit', 'swift', 'taggerscript', 'tap', 'tcl', 'tex', 'thrift', 'tp', 'twig', 'typescript', 'vala', 'vbnet', 'vbscript', 'vbscript-html', 'verilog', 'vhdl', 'vim', 'x86asm', 'xl', 'xml', 'xquery', 'yaml', 'zephir' ]) # languages aliases defined in highlight.js _hljs_languages_aliases = { 'ado': 'stata', 'adoc': 'asciidoc', 'ahk': 'autohotkey', 'apacheconf': 'apache', 'arm': 'armasm', 'as': 'actionscript', 'asc': 'angelscript', 'atom': 'xml', 'bat': 'dos', 'bf': 'brainfuck', 'bind': 'dns', 'c': 'cpp', 'c++': 'cpp', 'capnp': 'capnproto', 'cc': 'cpp', 'clj': 'clojure', 'cls': 'cos', 'cmake.in': 'cmake', 'cmd': 'dos', 'coffee': 'coffeescript', 'console': 'shell', 'cr': 'crystal', 'craftcms': 'twig', 'crm': 'crmsh', 'csharp': 'cs', 'cson': 'coffeescript', 'dcl': 'clean', 'dfm': 'delphi', 'do': 'stata', 'docker': 'dockerfile', 'dpr': 'delphi', 'dst': 'dust', 'erl': 'erlang', 'f90': 'fortran', 'f95': 'fortran', 'feature': 'gherkin', 'freepascal': 'delphi', 'fs': 'fsharp', 'gemspec': 'ruby', 'GML': 'gml', 'gms': 'gams', 'golang': 'go', 'graph': 'roboconf', 'gss': 'gauss', 'gyp': 'python', 'h': 'cpp', 'h++': 'cpp', 'hbs': 'handlebars', 'hpp': 'cpp', 'hs': 'haskell', 'html': 'xml', 'html.handlebars': 'handlebars', 'html.hbs': 'handlebars', 'https': 'http', 'hx': 'haxe', 'hylang': 'hy', 'i7': 'inform7', 'iced': 'coffeescript', 'icl': 'clean', 'instances': 'roboconf', 'ipynb': 'json', 'irb': 'ruby', 'jinja': 'django', 'js': 'javascript', 'jsp': 'java', 'jsx': 'javascript', 'k': 'q', 'kdb': 'q', 'kt': 'kotlin', 'lassoscript': 'lasso', 'lazarus': 'delphi', 'lfm': 'delphi', 'lpr': 'delphi', 'ls': 'livescript', 'm': 'mercury', 'mak': 'makefile', 'md': 'markdown', 'mikrotik': 'routeros', 'mips': 'mipsasm', 'mk': 'makefile', 'mkd': 'markdown', 'mkdown': 'markdown', 'ml': 'ocaml', 'mm': 'objectivec', 'mma': 'mathematica', 'moo': 'mercury', 'moon': 'moonscript', 'nc': 'gcode', 'nginxconf': 'nginx', 'nim': 'nimrod', 'nixos': 'nix', 'obj-c': 'objectivec', 'objc': 'objectivec', 'osascript': 'applescript', 'p21': 'step21', 'pas': 'delphi', 'pascal': 'delphi', 'patch': 'diff', 'pb': 'purebasic', 'pbi': 'purebasic', 'pcmk': 'crmsh', 'pf.conf': 'pf', 'php3': 'php', 'php4': 'php', 'php5': 'php', 'php6': 'php', 'php7': 'php', 'pl': 'perl', 'plist': 'xml', 'pm': 'perl', 'podspec': 'ruby', 'postgres': 'pgsql', 'postgresql': 'pgsql', 'pp': 'puppet', 'ps': 'powershell', 'py': 'python', 'qt': 'qml', 'rb': 'ruby', 're': 'reasonml', 'rs': 'rust', 'rss': 'xml', 'rst': 'nohighlight', 'SAS': 'sas', 'scad': 'openscad', 'sci': 'scilab', 'sh': 'bash', 'st': 'smalltalk', 'step': 'step21', 'stp': 'step21', 'styl': 'stylus', 'sv': 'verilog', 'svh': 'verilog', 'tao': 'xl', 'thor': 'ruby', 'tk': 'tcl', 'toml': 'ini', 'ts': 'typescript', 'txt': 'nohighlight', 'v': 'verilog', 'vb': 'vbnet', 'vbs': 'vbscript', 'wildfly-cli': 'jboss-cli', 'xhtml': 'xml', 'xjb': 'xml', 'xls': 'excel', 'xlsx': 'excel', 'xpath': 'xquery', 'xq': 'xquery', 'xsd': 'xml', 'xsl': 'xml', 'YAML': 'yaml', 'yml': 'yaml', 'zep': 'zephir', 'zone': 'dns', 'zsh': 'bash' } # dictionary mapping pygment lexers to hljs languages _pygments_lexer_to_hljs_language = {} # dictionary mapping mime types to hljs languages _mime_type_to_hljs_language = { 'text/x-c': 'cpp', 'text/x-c++': 'cpp', 'text/x-msdos-batch': 'dos', 'text/x-lisp': 'lisp', 'text/x-shellscript': 'bash', } # dictionary mapping filenames to hljs languages _filename_to_hljs_language = { 'cmakelists.txt': 'cmake' } -# function to fill the above dictionnaries +# function to fill the above dictionaries def _init_pygments_to_hljs_map(): if len(_pygments_lexer_to_hljs_language) == 0: for lexer in get_all_lexers(): lexer_name = lexer[0] lang_aliases = lexer[1] lang_mime_types = lexer[3] lang = None for lang_alias in lang_aliases: if lang_alias in _hljs_languages: lang = lang_alias _pygments_lexer_to_hljs_language[lexer_name] = lang_alias break if lang: for lang_mime_type in lang_mime_types: _mime_type_to_hljs_language[lang_mime_type] = lang def get_hljs_language_from_filename(filename): """Function that tries to associate a language supported by highlight.js from a filename. Args: filename: input filename Returns: highlight.js language id or None if no correspondence has been found """ _init_pygments_to_hljs_map() if filename: filename_lower = filename.lower() if filename_lower in _filename_to_hljs_language: return _filename_to_hljs_language[filename_lower] exts = filename_lower.split('.') # check if file extension matches an hljs language # also handle .ext.in cases for ext in reversed(exts[-2:]): if ext in _hljs_languages: return ext if ext in _hljs_languages_aliases: return _hljs_languages_aliases[ext] # otherwise use Pygments language database lexer = None # try to find a Pygment lexer try: lexer = get_lexer_for_filename(filename) except Exception: pass # if there is a correspondence between the lexer and an hljs # language, return it if lexer and lexer.name in _pygments_lexer_to_hljs_language: return _pygments_lexer_to_hljs_language[lexer.name] # otherwise, try to find a match between the file extensions # associated to the lexer and the hljs language aliases if lexer: exts = [ext.replace('*.', '') for ext in lexer.filenames] for ext in exts: if ext in _hljs_languages_aliases: return _hljs_languages_aliases[ext] return None def get_hljs_language_from_mime_type(mime_type): """Function that tries to associate a language supported by highlight.js from a mime type. Args: mime_type: input mime type Returns: highlight.js language id or None if no correspondence has been found """ _init_pygments_to_hljs_map() if mime_type and mime_type in _mime_type_to_hljs_language: return _mime_type_to_hljs_language[mime_type] return None diff --git a/swh/web/common/service.py b/swh/web/common/service.py index eaf8d1e4..9ec735ee 100644 --- a/swh/web/common/service.py +++ b/swh/web/common/service.py @@ -1,983 +1,983 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import os from collections import defaultdict from swh.model import hashutil from swh.web.common import converters from swh.web.common import query from swh.web.common.exc import NotFoundExc from swh.web import config storage = config.storage() vault = config.vault() idx_storage = config.indexer_storage() MAX_LIMIT = 50 # Top limit the users can ask for def _first_element(l): """Returns the first element in the provided list or None if it is empty or None""" return next(iter(l or []), None) def lookup_multiple_hashes(hashes): """Lookup the passed hashes in a single DB connection, using batch processing. Args: An array of {filename: X, sha1: Y}, string X, hex sha1 string Y. Returns: The same array with elements updated with elem['found'] = true if the hash is present in storage, elem['found'] = false if not. """ hashlist = [hashutil.hash_to_bytes(elem['sha1']) for elem in hashes] content_missing = storage.content_missing_per_sha1(hashlist) missing = [hashutil.hash_to_hex(x) for x in content_missing] for x in hashes: x.update({'found': True}) for h in hashes: if h['sha1'] in missing: h['found'] = False return hashes def lookup_expression(expression, last_sha1, per_page): """Lookup expression in raw content. Args: expression (str): An expression to lookup through raw indexed content last_sha1 (str): Last sha1 seen per_page (int): Number of results per page Returns: List of ctags whose content match the expression """ limit = min(per_page, MAX_LIMIT) ctags = idx_storage.content_ctags_search(expression, last_sha1=last_sha1, limit=limit) for ctag in ctags: ctag = converters.from_swh(ctag, hashess={'id'}) ctag['sha1'] = ctag['id'] ctag.pop('id') yield ctag def lookup_hash(q): """Checks if the storage contains a given content checksum Args: query string of the form Returns: Dict with key found containing the hash info if the hash is present, None if not. """ algo, hash = query.parse_hash(q) found = storage.content_find({algo: hash}) return {'found': found, 'algo': algo} def search_hash(q): """Checks if the storage contains a given content checksum Args: query string of the form Returns: Dict with key found to True or False, according to whether the checksum is present or not """ algo, hash = query.parse_hash(q) found = storage.content_find({algo: hash}) return {'found': found is not None} def lookup_content_provenance(q): """Return provenance information from a specified content. Args: q: query string of the form Yields: provenance information (dict) list if the content is found. """ algo, hash = query.parse_hash(q) provenances = storage.content_find_provenance({algo: hash}) if not provenances: return None return (converters.from_provenance(p) for p in provenances) def _lookup_content_sha1(q): """Given a possible input, query for the content's sha1. Args: q: query string of the form Returns: binary sha1 if found or None """ algo, hash = query.parse_hash(q) if algo != 'sha1': hashes = storage.content_find({algo: hash}) if not hashes: return None return hashes['sha1'] return hash def lookup_content_ctags(q): """Return ctags information from a specified content. Args: q: query string of the form Yields: ctags information (dict) list if the content is found. """ sha1 = _lookup_content_sha1(q) if not sha1: return None ctags = list(idx_storage.content_ctags_get([sha1])) if not ctags: return None for ctag in ctags: yield converters.from_swh(ctag, hashess={'id'}) def lookup_content_filetype(q): """Return filetype information from a specified content. Args: q: query string of the form Yields: filetype information (dict) list if the content is found. """ sha1 = _lookup_content_sha1(q) if not sha1: return None filetype = _first_element(list(idx_storage.content_mimetype_get([sha1]))) if not filetype: return None return converters.from_filetype(filetype) def lookup_content_language(q): """Return language information from a specified content. Args: q: query string of the form Yields: language information (dict) list if the content is found. """ sha1 = _lookup_content_sha1(q) if not sha1: return None lang = _first_element(list(idx_storage.content_language_get([sha1]))) if not lang: return None return converters.from_swh(lang, hashess={'id'}) def lookup_content_license(q): """Return license information from a specified content. Args: q: query string of the form Yields: license information (dict) list if the content is found. """ sha1 = _lookup_content_sha1(q) if not sha1: return None lic = _first_element(idx_storage.content_fossology_license_get([sha1])) if not lic: return None return converters.from_swh({'id': sha1, 'facts': lic[sha1]}, hashess={'id'}) def lookup_origin(origin): """Return information about the origin matching dict origin. Args: origin: origin's dict with keys either 'id' or ('type' AND 'url') Returns: origin information as dict. """ origin_info = storage.origin_get(origin) if not origin_info: if 'id' in origin and origin['id']: msg = 'Origin with id %s not found!' % origin['id'] else: msg = 'Origin with type %s and url %s not found!' % \ (origin['type'], origin['url']) raise NotFoundExc(msg) return converters.from_origin(origin_info) def search_origin(url_pattern, offset=0, limit=50, regexp=False, with_visit=False): """Search for origins whose urls contain a provided string pattern or match a provided regular expression. Args: url_pattern: the string pattern to search for in origin urls offset: number of found origins to skip before returning results limit: the maximum number of found origins to return Returns: - lisf of origin information as dict. + list of origin information as dict. """ origins = storage.origin_search(url_pattern, offset, limit, regexp, with_visit) return map(converters.from_origin, origins) def lookup_person(person_id): """Return information about the person with id person_id. Args: person_id as string Returns: person information as dict. Raises: NotFoundExc if there is no person with the provided id. """ person = _first_element(storage.person_get([person_id])) if not person: raise NotFoundExc('Person with id %s not found' % person_id) return converters.from_person(person) def _to_sha1_bin(sha1_hex): _, sha1_git_bin = query.parse_hash_with_algorithms_or_throws( sha1_hex, ['sha1'], # HACK: sha1_git really 'Only sha1_git is supported.') return sha1_git_bin def lookup_directory(sha1_git): """Return information about the directory with id sha1_git. Args: sha1_git as string Returns: directory information as dict. """ empty_dir_sha1 = '4b825dc642cb6eb9a060e54bf8d69288fbee4904' if sha1_git == empty_dir_sha1: return [] sha1_git_bin = _to_sha1_bin(sha1_git) directory_entries = storage.directory_ls(sha1_git_bin) if directory_entries: return map(converters.from_directory_entry, directory_entries) else: raise NotFoundExc('Directory with sha1_git %s not found' % sha1_git) def lookup_directory_with_path(directory_sha1_git, path_string): """Return directory information for entry with path path_string w.r.t. root directory pointed by directory_sha1_git Args: - directory_sha1_git: sha1_git corresponding to the directory to which we append paths to (hopefully) find the entry - the relative path to the entry starting from the directory pointed by directory_sha1_git Raises: NotFoundExc if the directory entry is not found """ sha1_git_bin = _to_sha1_bin(directory_sha1_git) paths = path_string.strip(os.path.sep).split(os.path.sep) queried_dir = storage.directory_entry_get_by_path( sha1_git_bin, list(map(lambda p: p.encode('utf-8'), paths))) if not queried_dir: raise NotFoundExc(('Directory entry with path %s from %s not found') % (path_string, directory_sha1_git)) return converters.from_directory_entry(queried_dir) def lookup_release(release_sha1_git): """Return information about the release with sha1 release_sha1_git. Args: release_sha1_git: The release's sha1 as hexadecimal Returns: Release information as dict. Raises: ValueError if the identifier provided is not of sha1 nature. """ sha1_git_bin = _to_sha1_bin(release_sha1_git) release = _first_element(storage.release_get([sha1_git_bin])) if not release: raise NotFoundExc('Release with sha1_git %s not found.' % release_sha1_git) return converters.from_release(release) def lookup_release_multiple(sha1_git_list): """Return information about the revisions identified with their sha1_git identifiers. Args: sha1_git_list: A list of revision sha1_git identifiers Returns: Release information as dict. Raises: ValueError if the identifier provided is not of sha1 nature. """ sha1_bin_list = (_to_sha1_bin(sha1_git) for sha1_git in sha1_git_list) releases = storage.release_get(sha1_bin_list) or [] return (converters.from_release(r) for r in releases) def lookup_revision(rev_sha1_git): """Return information about the revision with sha1 revision_sha1_git. Args: revision_sha1_git: The revision's sha1 as hexadecimal Returns: Revision information as dict. Raises: ValueError if the identifier provided is not of sha1 nature. NotFoundExc if there is no revision with the provided sha1_git. """ sha1_git_bin = _to_sha1_bin(rev_sha1_git) revision = _first_element(storage.revision_get([sha1_git_bin])) if not revision: raise NotFoundExc('Revision with sha1_git %s not found.' % rev_sha1_git) return converters.from_revision(revision) def lookup_revision_multiple(sha1_git_list): """Return information about the revisions identified with their sha1_git identifiers. Args: sha1_git_list: A list of revision sha1_git identifiers Returns: Generator of revisions information as dict. Raises: ValueError if the identifier provided is not of sha1 nature. """ sha1_bin_list = (_to_sha1_bin(sha1_git) for sha1_git in sha1_git_list) revisions = storage.revision_get(sha1_bin_list) or [] return (converters.from_revision(r) for r in revisions) def lookup_revision_message(rev_sha1_git): """Return the raw message of the revision with sha1 revision_sha1_git. Args: revision_sha1_git: The revision's sha1 as hexadecimal Returns: Decoded revision message as dict {'message': } Raises: ValueError if the identifier provided is not of sha1 nature. NotFoundExc if the revision is not found, or if it has no message """ sha1_git_bin = _to_sha1_bin(rev_sha1_git) revision = _first_element(storage.revision_get([sha1_git_bin])) if not revision: raise NotFoundExc('Revision with sha1_git %s not found.' % rev_sha1_git) if 'message' not in revision: raise NotFoundExc('No message for revision with sha1_git %s.' % rev_sha1_git) res = {'message': revision['message']} return res def lookup_revision_by(origin_id, branch_name="refs/heads/master", timestamp=None): """Lookup revisions by origin_id, branch_name and timestamp. If: - branch_name is not provided, lookup using 'refs/heads/master' as default. - ts is not provided, use the most recent Args: - origin_id: origin of the revision. - branch_name: revision's branch. - timestamp: revision's time frame. Yields: The revisions matching the criterions. Raises: NotFoundExc if no revision corresponds to the criterion """ res = _first_element(storage.revision_get_by(origin_id, branch_name, timestamp=timestamp, limit=1)) if not res: raise NotFoundExc('Revision for origin %s and branch %s not found.' % (origin_id, branch_name)) return converters.from_revision(res) def lookup_revision_log(rev_sha1_git, limit): """Return information about the revision with sha1 revision_sha1_git. Args: revision_sha1_git: The revision's sha1 as hexadecimal limit: the maximum number of revisions returned Returns: Revision information as dict. Raises: ValueError if the identifier provided is not of sha1 nature. NotFoundExc if there is no revision with the provided sha1_git. """ sha1_git_bin = _to_sha1_bin(rev_sha1_git) revision_entries = storage.revision_log([sha1_git_bin], limit) if not revision_entries: raise NotFoundExc('Revision with sha1_git %s not found.' % rev_sha1_git) return map(converters.from_revision, revision_entries) def lookup_revision_log_by(origin_id, branch_name, timestamp, limit): """Return information about the revision with sha1 revision_sha1_git. Args: origin_id: origin of the revision branch_name: revision's branch timestamp: revision's time frame limit: the maximum number of revisions returned Returns: Revision information as dict. Raises: NotFoundExc if no revision corresponds to the criterion """ revision_entries = storage.revision_log_by(origin_id, branch_name, timestamp, limit=limit) if not revision_entries: return None return map(converters.from_revision, revision_entries) def lookup_revision_with_context_by(origin_id, branch_name, ts, sha1_git, limit=100): """Return information about revision sha1_git, limited to the sub-graph of all transitive parents of sha1_git_root. sha1_git_root being resolved through the lookup of a revision by origin_id, branch_name and ts. In other words, sha1_git is an ancestor of sha1_git_root. Args: - origin_id: origin of the revision. - branch_name: revision's branch. - timestamp: revision's time frame. - sha1_git: one of sha1_git_root's ancestors. - limit: limit the lookup to 100 revisions back. Returns: Pair of (root_revision, revision). Information on sha1_git if it is an ancestor of sha1_git_root including children leading to sha1_git_root Raises: - BadInputExc in case of unknown algo_hash or bad hash. - NotFoundExc if either revision is not found or if sha1_git is not an ancestor of sha1_git_root. """ rev_root = _first_element(storage.revision_get_by(origin_id, branch_name, timestamp=ts, limit=1)) if not rev_root: raise NotFoundExc('Revision with (origin_id: %s, branch_name: %s' ', ts: %s) not found.' % (origin_id, branch_name, ts)) return (converters.from_revision(rev_root), lookup_revision_with_context(rev_root, sha1_git, limit)) def lookup_revision_with_context(sha1_git_root, sha1_git, limit=100): """Return information about revision sha1_git, limited to the sub-graph of all transitive parents of sha1_git_root. In other words, sha1_git is an ancestor of sha1_git_root. Args: sha1_git_root: latest revision. The type is either a sha1 (as an hex string) or a non converted dict. sha1_git: one of sha1_git_root's ancestors limit: limit the lookup to 100 revisions back Returns: Information on sha1_git if it is an ancestor of sha1_git_root including children leading to sha1_git_root Raises: BadInputExc in case of unknown algo_hash or bad hash NotFoundExc if either revision is not found or if sha1_git is not an ancestor of sha1_git_root """ sha1_git_bin = _to_sha1_bin(sha1_git) revision = _first_element(storage.revision_get([sha1_git_bin])) if not revision: raise NotFoundExc('Revision %s not found' % sha1_git) if isinstance(sha1_git_root, str): sha1_git_root_bin = _to_sha1_bin(sha1_git_root) revision_root = _first_element(storage.revision_get([sha1_git_root_bin])) # noqa if not revision_root: raise NotFoundExc('Revision root %s not found' % sha1_git_root) else: sha1_git_root_bin = sha1_git_root['id'] revision_log = storage.revision_log([sha1_git_root_bin], limit) parents = {} children = defaultdict(list) for rev in revision_log: rev_id = rev['id'] parents[rev_id] = [] for parent_id in rev['parents']: parents[rev_id].append(parent_id) children[parent_id].append(rev_id) if revision['id'] not in parents: raise NotFoundExc('Revision %s is not an ancestor of %s' % (sha1_git, sha1_git_root)) revision['children'] = children[revision['id']] return converters.from_revision(revision) def lookup_directory_with_revision(sha1_git, dir_path=None, with_data=False): """Return information on directory pointed by revision with sha1_git. If dir_path is not provided, display top level directory. Otherwise, display the directory pointed by dir_path (if it exists). Args: sha1_git: revision's hash. dir_path: optional directory pointed to by that revision. with_data: boolean that indicates to retrieve the raw data if the path resolves to a content. Default to False (for the api) Returns: Information on the directory pointed to by that revision. Raises: BadInputExc in case of unknown algo_hash or bad hash. NotFoundExc either if the revision is not found or the path referenced does not exist. NotImplementedError in case of dir_path exists but do not reference a type 'dir' or 'file'. """ sha1_git_bin = _to_sha1_bin(sha1_git) revision = _first_element(storage.revision_get([sha1_git_bin])) if not revision: raise NotFoundExc('Revision %s not found' % sha1_git) dir_sha1_git_bin = revision['directory'] if dir_path: paths = dir_path.strip(os.path.sep).split(os.path.sep) entity = storage.directory_entry_get_by_path( dir_sha1_git_bin, list(map(lambda p: p.encode('utf-8'), paths))) if not entity: raise NotFoundExc( "Directory or File '%s' pointed to by revision %s not found" % (dir_path, sha1_git)) else: entity = {'type': 'dir', 'target': dir_sha1_git_bin} if entity['type'] == 'dir': directory_entries = storage.directory_ls(entity['target']) or [] return {'type': 'dir', 'path': '.' if not dir_path else dir_path, 'revision': sha1_git, 'content': map(converters.from_directory_entry, directory_entries)} elif entity['type'] == 'file': # content content = storage.content_find({'sha1_git': entity['target']}) if with_data: c = _first_element(storage.content_get([content['sha1']])) content['data'] = c['data'] return {'type': 'file', 'path': '.' if not dir_path else dir_path, 'revision': sha1_git, 'content': converters.from_content(content)} else: raise NotImplementedError('Entity of type %s not implemented.' % entity['type']) def lookup_content(q): """Lookup the content designed by q. Args: q: The release's sha1 as hexadecimal Raises: NotFoundExc if the requested content is not found """ algo, hash = query.parse_hash(q) c = storage.content_find({algo: hash}) if not c: raise NotFoundExc('Content with %s checksum equals to %s not found!' % (algo, hashutil.hash_to_hex(hash))) return converters.from_content(c) def lookup_content_raw(q): """Lookup the content defined by q. Args: q: query string of the form Returns: dict with 'sha1' and 'data' keys. data representing its raw data decoded. Raises: NotFoundExc if the requested content is not found or if the content bytes are not available in the storage """ c = lookup_content(q) content = _first_element(storage.content_get([c['checksums']['sha1']])) if not content: algo, hash = query.parse_hash(q) raise NotFoundExc('Bytes of content with %s checksum equals to %s ' 'are not available!' % (algo, hashutil.hash_to_hex(hash))) return converters.from_content(content) def stat_counters(): """Return the stat counters for Software Heritage Returns: A dict mapping textual labels to integer values. """ return storage.stat_counters() def _lookup_origin_visits(origin_id, last_visit=None, limit=10): """Yields the origin origin_ids' visits. Args: origin_id (int): origin to list visits for last_visit (int): last visit to lookup from limit (int): Number of elements max to display Yields: Dictionaries of origin_visit for that origin """ limit = min(limit, MAX_LIMIT) yield from storage.origin_visit_get( origin_id, last_visit=last_visit, limit=limit) def lookup_origin_visits(origin_id, last_visit=None, per_page=10): """Yields the origin origin_ids' visits. Args: origin_id: origin to list visits for Yields: Dictionaries of origin_visit for that origin """ visits = _lookup_origin_visits(origin_id, last_visit=last_visit, limit=per_page) for visit in visits: yield converters.from_origin_visit(visit) def lookup_origin_visit(origin_id, visit_id): """Return information about visit visit_id with origin origin_id. Args: origin_id: origin concerned by the visit visit_id: the visit identifier to lookup Yields: The dict origin_visit concerned """ visit = storage.origin_visit_get_by(origin_id, visit_id) if not visit: raise NotFoundExc('Origin with id %s or its visit ' 'with id %s not found!' % (origin_id, visit_id)) return converters.from_origin_visit(visit) def lookup_snapshot_size(snapshot_id): """Count the number of branches in the snapshot with the given id Args: snapshot_id (str): sha1 identifier of the snapshot Returns: dict: A dict whose keys are the target types of branches and values their corresponding amount """ snapshot_id_bin = _to_sha1_bin(snapshot_id) snapshot_size = storage.snapshot_count_branches(snapshot_id_bin) if 'revision' not in snapshot_size: snapshot_size['revision'] = 0 if 'release' not in snapshot_size: snapshot_size['release'] = 0 return snapshot_size def lookup_snapshot(snapshot_id, branches_from='', branches_count=None, target_types=None): """Return information about a snapshot, aka the list of named branches found during a specific visit of an origin. Args: snapshot_id (str): sha1 identifier of the snapshot branches_from (str): optional parameter used to skip branches whose name is lesser than it before returning them branches_count (int): optional parameter used to restrain the amount of returned branches target_types (list): optional parameter used to filter the target types of branch to return (possible values that can be contained in that list are `'content', 'directory', 'revision', 'release', 'snapshot', 'alias'`) Returns: A dict filled with the snapshot content. """ snapshot_id_bin = _to_sha1_bin(snapshot_id) snapshot = storage.snapshot_get_branches(snapshot_id_bin, branches_from.encode(), branches_count, target_types) if not snapshot: raise NotFoundExc('Snapshot with id %s not found!' % snapshot_id) return converters.from_snapshot(snapshot) def lookup_latest_origin_snapshot(origin_id, allowed_statuses=None): """Return information about the latest snapshot of an origin. .. warning:: At most 1000 branches contained in the snapshot will be returned for performance reasons. Args: origin_id: integer identifier of the origin allowed_statuses: list of visit statuses considered to find the latest snapshot for the visit. For instance, ``allowed_statuses=['full']`` will only consider visits that have successfully run to completion. Returns: A dict filled with the snapshot content. """ snapshot = storage.snapshot_get_latest(origin_id, allowed_statuses) return converters.from_snapshot(snapshot) def lookup_entity_by_uuid(uuid): """Return the entity's hierarchy from its uuid. Args: uuid: entity's identifier. Returns: List of hierarchy entities from the entity with uuid. """ uuid = query.parse_uuid4(uuid) for entity in storage.entity_get(uuid): entity = converters.from_swh(entity, convert={'last_seen', 'uuid'}, convert_fn=lambda x: str(x)) yield entity def lookup_revision_through(revision, limit=100): """Retrieve a revision from the criterion stored in revision dictionary. Args: revision: Dictionary of criterion to lookup the revision with. Here are the supported combination of possible values: - origin_id, branch_name, ts, sha1_git - origin_id, branch_name, ts - sha1_git_root, sha1_git - sha1_git Returns: None if the revision is not found or the actual revision. """ if 'origin_id' in revision and \ 'branch_name' in revision and \ 'ts' in revision and \ 'sha1_git' in revision: return lookup_revision_with_context_by(revision['origin_id'], revision['branch_name'], revision['ts'], revision['sha1_git'], limit) if 'origin_id' in revision and \ 'branch_name' in revision and \ 'ts' in revision: return lookup_revision_by(revision['origin_id'], revision['branch_name'], revision['ts']) if 'sha1_git_root' in revision and \ 'sha1_git' in revision: return lookup_revision_with_context(revision['sha1_git_root'], revision['sha1_git'], limit) if 'sha1_git' in revision: return lookup_revision(revision['sha1_git']) # this should not happen raise NotImplementedError('Should not happen!') def lookup_directory_through_revision(revision, path=None, limit=100, with_data=False): """Retrieve the directory information from the revision. Args: revision: dictionary of criterion representing a revision to lookup path: directory's path to lookup. limit: optional query parameter to limit the revisions log (default to 100). For now, note that this limit could impede the transitivity conclusion about sha1_git not being an ancestor of. with_data: indicate to retrieve the content's raw data if path resolves to a content. Returns: The directory pointing to by the revision criterions at path. """ rev = lookup_revision_through(revision, limit) if not rev: raise NotFoundExc('Revision with criterion %s not found!' % revision) return (rev['id'], lookup_directory_with_revision(rev['id'], path, with_data)) def vault_cook(obj_type, obj_id, email=None): """Cook a vault bundle. """ return vault.cook(obj_type, obj_id, email=email) def vault_fetch(obj_type, obj_id): """Fetch a vault bundle. """ return vault.fetch(obj_type, obj_id) def vault_progress(obj_type, obj_id): """Get the current progress of a vault bundle. """ return vault.progress(obj_type, obj_id) def diff_revision(rev_id): """Get the list of file changes (insertion / deletion / modification / renaming) for a particular revision. """ rev_sha1_git_bin = _to_sha1_bin(rev_id) changes = storage.diff_revision(rev_sha1_git_bin, track_renaming=True) for change in changes: change['from'] = converters.from_directory_entry(change['from']) change['to'] = converters.from_directory_entry(change['to']) if change['from_path']: change['from_path'] = change['from_path'].decode('utf-8') if change['to_path']: change['to_path'] = change['to_path'].decode('utf-8') return changes diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py index 38343bab..8d800999 100644 --- a/swh/web/common/utils.py +++ b/swh/web/common/utils.py @@ -1,412 +1,412 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import docutils.parsers.rst import docutils.utils import re import requests from datetime import datetime, timezone from dateutil import parser as date_parser from dateutil import tz from django.core.cache import cache from django.urls import reverse as django_reverse from django.http import QueryDict from swh.model.exceptions import ValidationError from swh.model.identifiers import ( persistent_identifier, parse_persistent_identifier, CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT ) from swh.web.common import service from swh.web.common.exc import BadInputExc from swh.web.config import get_config swh_object_icons = { 'branch': 'fa fa-code-fork', 'branches': 'fa fa-code-fork', 'content': 'fa fa-file-text', 'directory': 'fa fa-folder', 'person': 'fa fa-user', 'revisions history': 'fa fa-history', 'release': 'fa fa-tag', 'releases': 'fa fa-tag', 'revision': 'octicon octicon-git-commit', 'snapshot': 'fa fa-camera', 'visits': 'fa fa-calendar', } def reverse(viewname, url_args=None, query_params=None, current_app=None, urlconf=None): """An override of django reverse function supporting query parameters. Args: viewname (str): the name of the django view from which to compute a url url_args (dict): dictionary of url arguments indexed by their names query_params (dict): dictionary of query parameters to append to the reversed url - current_app (str): the name of the django app tighted to the view + current_app (str): the name of the django app tighten to the view urlconf (str): url configuration module Returns: str: the url of the requested view with processed arguments and query parameters """ if url_args: url_args = {k: v for k, v in url_args.items() if v is not None} url = django_reverse(viewname, urlconf=urlconf, kwargs=url_args, current_app=current_app) if query_params: query_params = {k: v for k, v in query_params.items() if v} if query_params and len(query_params) > 0: query_dict = QueryDict('', mutable=True) for k in sorted(query_params.keys()): query_dict[k] = query_params[k] url += ('?' + query_dict.urlencode(safe='/;:')) return url def datetime_to_utc(date): """Returns datetime in UTC without timezone info Args: date (datetime.datetime): input datetime with timezone info Returns: datetime.datetime: datetime in UTC without timezone info """ if date.tzinfo: return date.astimezone(tz.gettz('UTC')).replace(tzinfo=timezone.utc) else: return date def parse_timestamp(timestamp): """Given a time or timestamp (as string), parse the result as UTC datetime. Returns: datetime.datetime: a timezone-aware datetime representing the parsed value or None if the parsing fails. Samples: - 2016-01-12 - 2016-01-12T09:19:12+0100 - Today is January 1, 2047 at 8:21:00AM - 1452591542 """ if not timestamp: return None try: date = date_parser.parse(timestamp, ignoretz=False, fuzzy=True) return datetime_to_utc(date) except Exception: try: return datetime.utcfromtimestamp(float(timestamp)).replace( tzinfo=timezone.utc) except (ValueError, OverflowError) as e: raise BadInputExc(e) def shorten_path(path): """Shorten the given path: for each hash present, only return the first 8 characters followed by an ellipsis""" sha256_re = r'([0-9a-f]{8})[0-9a-z]{56}' sha1_re = r'([0-9a-f]{8})[0-9a-f]{32}' ret = re.sub(sha256_re, r'\1...', path) return re.sub(sha1_re, r'\1...', ret) def format_utc_iso_date(iso_date, fmt='%d %B %Y, %H:%M UTC'): - """Turns a string reprensation of an ISO 8601 date string + """Turns a string representation of an ISO 8601 date string to UTC and format it into a more human readable one. For instance, from the following input string: '2017-05-04T13:27:13+02:00' the following one is returned: '04 May 2017, 11:27 UTC'. Custom format string may also be provided as parameter Args: iso_date (str): a string representation of an ISO 8601 date fmt (str): optional date formatting string Returns: str: a formatted string representation of the input iso date """ if not iso_date: return iso_date date = parse_timestamp(iso_date) return date.strftime(fmt) def gen_path_info(path): """Function to generate path data navigation for use with a breadcrumb in the swh web ui. For instance, from a path /folder1/folder2/folder3, it returns the following list:: [{'name': 'folder1', 'path': 'folder1'}, {'name': 'folder2', 'path': 'folder1/folder2'}, {'name': 'folder3', 'path': 'folder1/folder2/folder3'}] Args: path: a filesystem path Returns: list: a list of path data for navigation as illustrated above. """ path_info = [] if path: sub_paths = path.strip('/').split('/') path_from_root = '' for p in sub_paths: path_from_root += '/' + p path_info.append({'name': p, 'path': path_from_root.strip('/')}) return path_info def get_origin_visits(origin_info): """Function that returns the list of visits for a swh origin. That list is put in cache in order to speedup the navigation in the swh web browse ui. Args: origin_id (int): the id of the swh origin to fetch visits from Returns: list: A list of dict describing the origin visits with the following keys: * **date**: UTC visit date in ISO format, * **origin**: the origin id * **status**: the visit status, either *full* or *partial* * **visit**: the visit id Raises: NotFoundExc: if the origin is not found """ cache_entry_id = 'origin_%s_visits' % origin_info['id'] cache_entry = cache.get(cache_entry_id) last_snapshot = service.lookup_latest_origin_snapshot(origin_info['id']) if cache_entry and \ (not last_snapshot or last_snapshot['id'] == cache_entry[-1]['snapshot']): return cache_entry origin_visits = [] per_page = service.MAX_LIMIT last_visit = None while 1: visits = list(service.lookup_origin_visits(origin_info['id'], last_visit=last_visit, per_page=per_page)) origin_visits += visits if len(visits) < per_page: break else: if not last_visit: last_visit = per_page else: last_visit += per_page def _visit_sort_key(visit): ts = parse_timestamp(visit['date']).timestamp() return ts + (float(visit['visit']) / 10e3) for v in origin_visits: if 'metadata' in v: del v['metadata'] origin_visits = [dict(t) for t in set([tuple(d.items()) for d in origin_visits])] origin_visits = sorted(origin_visits, key=lambda v: _visit_sort_key(v)) cache.set(cache_entry_id, origin_visits) return origin_visits def get_swh_persistent_id(object_type, object_id, scheme_version=1): """ Returns the persistent identifier for a swh object based on: * the object type * the object id * the swh identifiers scheme version Args: object_type (str): the swh object type (content/directory/release/revision/snapshot) object_id (str): the swh object id (hexadecimal representation of its hash value) scheme_version (int): the scheme version of the swh persistent identifiers Returns: str: the swh object persistent identifier Raises: BadInputExc: if the provided parameters do not enable to generate a valid identifier """ try: swh_id = persistent_identifier(object_type, object_id, scheme_version) except ValidationError as e: raise BadInputExc('Invalid object (%s) for swh persistent id. %s' % (object_id, e)) else: return swh_id def resolve_swh_persistent_id(swh_id, query_params=None): """ Try to resolve a SWH persistent id into an url for browsing the pointed object. Args: swh_id (str): a SWH persistent identifier query_params (django.http.QueryDict): optional dict filled with query parameters to append to the browse url Returns: dict: a dict with the following keys: * **swh_id_parsed (swh.model.identifiers.PersistentId)**: the parsed identifier * **browse_url (str)**: the url for browsing the pointed object Raises: BadInputExc: if the provided identifier can not be parsed """ # noqa try: swh_id_parsed = parse_persistent_identifier(swh_id) object_type = swh_id_parsed.object_type object_id = swh_id_parsed.object_id browse_url = None query_dict = QueryDict('', mutable=True) if query_params and len(query_params) > 0: for k in sorted(query_params.keys()): query_dict[k] = query_params[k] if 'origin' in swh_id_parsed.metadata: query_dict['origin'] = swh_id_parsed.metadata['origin'] if object_type == CONTENT: query_string = 'sha1_git:' + object_id fragment = '' if 'lines' in swh_id_parsed.metadata: lines = swh_id_parsed.metadata['lines'].split('-') fragment += '#L' + lines[0] if len(lines) > 1: fragment += '-L' + lines[1] browse_url = reverse('browse-content', url_args={'query_string': query_string}, query_params=query_dict) + fragment elif object_type == DIRECTORY: browse_url = reverse('browse-directory', url_args={'sha1_git': object_id}, query_params=query_dict) elif object_type == RELEASE: browse_url = reverse('browse-release', url_args={'sha1_git': object_id}, query_params=query_dict) elif object_type == REVISION: browse_url = reverse('browse-revision', url_args={'sha1_git': object_id}, query_params=query_dict) elif object_type == SNAPSHOT: browse_url = reverse('browse-snapshot', url_args={'snapshot_id': object_id}, query_params=query_dict) except ValidationError as ve: raise BadInputExc('Error when parsing identifier. %s' % ' '.join(ve.messages)) else: return {'swh_id_parsed': swh_id_parsed, 'browse_url': browse_url} def parse_rst(text, report_level=2): """ Parse a reStructuredText string with docutils. Args: text (str): string with reStructuredText markups in it report_level (int): level of docutils report messages to print (1 info 2 warning 3 error 4 severe 5 none) Returns: docutils.nodes.document: a parsed docutils document """ parser = docutils.parsers.rst.Parser() components = (docutils.parsers.rst.Parser,) settings = docutils.frontend.OptionParser( components=components).get_default_values() settings.report_level = report_level document = docutils.utils.new_document('rst-doc', settings=settings) parser.parse(text, document) return document def get_client_ip(request): """ Return the client IP address from an incoming HTTP request. Args: request (django.http.HttpRequest): the incoming HTTP request Returns: str: The client IP address """ x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR') if x_forwarded_for: ip = x_forwarded_for.split(',')[0] else: ip = request.META.get('REMOTE_ADDR') return ip def is_recaptcha_valid(request, recaptcha_response): """ Verify if the response for Google reCAPTCHA is valid. Args: request (django.http.HttpRequest): the incoming HTTP request recaptcha_response (str): the reCAPTCHA response Returns: bool: Wether the reCAPTCHA response is valid or not """ config = get_config() return requests.post( config['grecaptcha']['validation_url'], data={ 'secret': config['grecaptcha']['private_key'], 'response': recaptcha_response, 'remoteip': get_client_ip(request) }, verify=True ).json().get("success", False) def context_processor(request): """ Django context processor used to inject variables in all swh-web templates. """ return {'swh_object_icons': swh_object_icons} diff --git a/swh/web/templates/browse/vault-ui.html b/swh/web/templates/browse/vault-ui.html index 15522985..692ff44c 100644 --- a/swh/web/templates/browse/vault-ui.html +++ b/swh/web/templates/browse/vault-ui.html @@ -1,69 +1,69 @@ {% extends "./layout.html" %} {% comment %} Copyright (C) 2017-2018 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% load render_bundle from webpack_loader %} {% block navbar-content %}

Download archived software

{% endblock %} {% block browse-content %}

This interface enables to track the status of the different Software Heritage Vault cooking tasks created while browsing the archive.

Once a cooking task is finished, a link will be made available in order to download the associated archive.

Object type Object id Cooking status
{% endblock %}