diff --git a/cypress/fixtures/save-task-info.json b/cypress/fixtures/save-task-info.json new file mode 100644 index 00000000..dafba3d3 --- /dev/null +++ b/cypress/fixtures/save-task-info.json @@ -0,0 +1,16 @@ +{ + "scheduled": "2020-06-24T11:48:12.561643+00:00", + "started": "2020-06-24T12:59:11.103188+00:00", + "ended": "2020-06-24T12:59:12.065313+00:00", + "status": "eventful", + "type": "load-git", + "arguments": { + "args": [], + "kwargs": { + "url": "https://gitlab.inria.fr/solverstack/maphys/maphys/" + } + }, + "duration": "1.0600971020758152", + "message": "[2020-06-24 12:59:12,063: INFO/ForkPoolWorker-161] Task swh.loader.git.tasks.UpdateGitRepository[4ff8b555-9535-4e75-b8ec-8e76165e14ec] succeeded in 1.0600971020758152s: {'status': 'eventful'}", + "name": "swh.loader.git.tasks.UpdateGitRepository" +} \ No newline at end of file diff --git a/cypress/integration/origin-save.spec.js b/cypress/integration/origin-save.spec.js index cb0cc0a7..b7ab39b6 100644 --- a/cypress/integration/origin-save.spec.js +++ b/cypress/integration/origin-save.spec.js @@ -1,192 +1,234 @@ /** * Copyright (C) 2019-2020 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ let url; let origin; const $ = Cypress.$; const saveCodeMsg = { 'success': 'The "save code now" request has been accepted and will be processed as soon as possible.', 'warning': 'The "save code now" request has been put in pending state and may be accepted for processing after manual review.', 'rejected': 'The "save code now" request has been rejected because the provided origin url is blacklisted.', 'rateLimit': 'The rate limit for "save code now" requests has been reached. Please try again later.', 'unknownError': 'An unexpected error happened when submitting the "save code now request', 'csrfError': 'CSRF Failed: Referrer checking failed - no Referrer.' }; function makeOriginSaveRequest(originType, originUrl) { cy.get('#swh-input-visit-type') .select(originType) .get('#swh-input-origin-url') .type(originUrl) .get('#swh-save-origin-form') .submit(); } function checkAlertVisible(alertType, msg) { cy.get('#swh-origin-save-request-status') .should('be.visible') .find(`.alert-${alertType}`) .should('be.visible') .and('contain', msg); } // Stub requests to save an origin function stubSaveRequest(requestUrl, objectType, status, originUrl, taskStatus, responseStatus = 200, errorMessage = '') { let response; if (responseStatus !== 200 && errorMessage) { response = {'detail': errorMessage}; } else { response = genOriginSaveResponse(objectType, status, originUrl, Date().toString(), taskStatus); } cy.route({ method: 'POST', status: responseStatus, url: requestUrl, response: response }).as('saveRequest'); } // Mocks API response : /save/(:object_type)/(:origin_url) // object_type : {'git', 'hg', 'svn'} function genOriginSaveResponse(objectType, saveRequestStatus, originUrl, saveRequestDate, saveTaskStatus) { return { 'visit_type': objectType, 'save_request_status': saveRequestStatus, 'origin_url': originUrl, 'id': 1, 'save_request_date': saveRequestDate, 'save_task_status': saveTaskStatus, 'visit_date': null }; }; describe('Origin Save Tests', function() { before(function() { url = this.Urls.origin_save(); origin = this.origin[0]; this.originSaveUrl = this.Urls.origin_save_request(origin.type, origin.url); }); beforeEach(function() { cy.visit(url); cy.server(); }); it('should display accepted message when accepted', function() { stubSaveRequest(this.originSaveUrl, origin.type, 'accepted', origin.url, 'not yet scheduled'); makeOriginSaveRequest(origin.type, origin.url); cy.wait('@saveRequest').then(() => { checkAlertVisible('success', saveCodeMsg['success']); }); }); it('should validate gitlab subproject url', function() { const gitlabSubProjectUrl = 'https://gitlab.com/user/project/sub/'; const originSaveUrl = this.Urls.origin_save_request('git', gitlabSubProjectUrl); stubSaveRequest(originSaveUrl, 'git', 'accepted', gitlabSubProjectUrl, 'not yet scheduled'); makeOriginSaveRequest('git', gitlabSubProjectUrl); cy.wait('@saveRequest').then(() => { checkAlertVisible('success', saveCodeMsg['success']); }); }); it('should display warning message when pending', function() { stubSaveRequest(this.originSaveUrl, origin.type, 'pending', origin.url, 'not created'); makeOriginSaveRequest(origin.type, origin.url); cy.wait('@saveRequest').then(() => { checkAlertVisible('warning', saveCodeMsg['warning']); }); }); it('should show error when csrf validation failed (status: 403)', function() { stubSaveRequest(this.originSaveUrl, origin.type, 'rejected', origin.url, 'not created', 403, saveCodeMsg['csrfError']); makeOriginSaveRequest(origin.type, origin.url); cy.wait('@saveRequest').then(() => { checkAlertVisible('danger', saveCodeMsg['csrfError']); }); }); it('should show error when origin is rejected (status: 403)', function() { stubSaveRequest(this.originSaveUrl, origin.type, 'rejected', origin.url, 'not created', 403, saveCodeMsg['rejected']); makeOriginSaveRequest(origin.type, origin.url); cy.wait('@saveRequest').then(() => { checkAlertVisible('danger', saveCodeMsg['rejected']); }); }); it('should show error when rate limited (status: 429)', function() { stubSaveRequest(this.originSaveUrl, origin.type, 'Request was throttled. Expected available in 60 seconds.', origin.url, 'not created', 429); makeOriginSaveRequest(origin.type, origin.url); cy.wait('@saveRequest').then(() => { checkAlertVisible('danger', saveCodeMsg['rateLimit']); }); }); it('should show error when unknown error occurs (status other than 200, 403, 429)', function() { stubSaveRequest(this.originSaveUrl, origin.type, 'Error', origin.url, 'not created', 406); makeOriginSaveRequest(origin.type, origin.url); cy.wait('@saveRequest').then(() => { checkAlertVisible('danger', saveCodeMsg['unknownError']); }); }); it('should display origin save info in the requests table', function() { - cy.fixture('origin-save').then(originSaveJSON => { - cy.route('GET', '/save/requests/list/**', originSaveJSON); - cy.get('#swh-origin-save-requests-list-tab').click(); - cy.get('tbody tr').then(rows => { - let i = 0; - for (let row of rows) { - const cells = row.cells; - const requestDateStr = new Date(originSaveJSON.data[i].save_request_date).toLocaleString(); - const saveStatus = originSaveJSON.data[i].save_task_status; - assert.equal($(cells[0]).text(), requestDateStr); - assert.equal($(cells[1]).text(), originSaveJSON.data[i].visit_type); - let html = ''; - if (saveStatus === 'succeed') { - let browseOriginUrl = `${this.Urls.browse_origin()}?origin_url=${originSaveJSON.data[i].origin_url}`; - browseOriginUrl += `&timestamp=${originSaveJSON.data[i].visit_date}`; - html += `${originSaveJSON.data[i].origin_url}`; - } else { - html += originSaveJSON.data[i].origin_url; - } - html += ` `; - html += ''; - assert.equal($(cells[2]).html(), html); - assert.equal($(cells[3]).text(), originSaveJSON.data[i].save_request_status); - assert.equal($(cells[4]).text(), saveStatus); - ++i; + cy.fixture('origin-save').as('originSaveJSON'); + cy.route('GET', '/save/requests/list/**', '@originSaveJSON'); + cy.get('#swh-origin-save-requests-list-tab').click(); + cy.get('tbody tr').then(rows => { + let i = 0; + for (let row of rows) { + const cells = row.cells; + const requestDateStr = new Date(this.originSaveJSON.data[i].save_request_date).toLocaleString(); + const saveStatus = this.originSaveJSON.data[i].save_task_status; + assert.equal($(cells[0]).text(), requestDateStr); + assert.equal($(cells[1]).text(), this.originSaveJSON.data[i].visit_type); + let html = ''; + if (saveStatus === 'succeed') { + let browseOriginUrl = `${this.Urls.browse_origin()}?origin_url=${this.originSaveJSON.data[i].origin_url}`; + browseOriginUrl += `&timestamp=${this.originSaveJSON.data[i].visit_date}`; + html += `${this.originSaveJSON.data[i].origin_url}`; + } else { + html += this.originSaveJSON.data[i].origin_url; } - }); + html += ` `; + html += ''; + assert.equal($(cells[2]).html(), html); + assert.equal($(cells[3]).text(), this.originSaveJSON.data[i].save_request_status); + assert.equal($(cells[4]).text(), saveStatus); + ++i; + } }); }); + it('should display/close task info popover when clicking on the info button', function() { + cy.fixture('origin-save').as('originSaveJSON'); + cy.fixture('save-task-info').as('saveTaskInfoJSON'); + cy.route('GET', '/save/requests/list/**', '@originSaveJSON'); + cy.route('GET', '/save/task/info/**', '@saveTaskInfoJSON'); + + cy.get('#swh-origin-save-requests-list-tab').click(); + cy.get('.swh-save-request-info') + .eq(0) + .click(); + + cy.get('.swh-save-request-info-popover') + .should('be.visible'); + + cy.get('.swh-save-request-info') + .eq(0) + .click(); + + cy.get('.swh-save-request-info-popover') + .should('not.be.visible'); + }); + + it('should hide task info popover when clicking on the close button', function() { + cy.fixture('origin-save').as('originSaveJSON'); + cy.fixture('save-task-info').as('saveTaskInfoJSON'); + cy.route('GET', '/save/requests/list/**', '@originSaveJSON'); + cy.route('GET', '/save/task/info/**', '@saveTaskInfoJSON'); + + cy.get('#swh-origin-save-requests-list-tab').click(); + cy.get('.swh-save-request-info') + .eq(0) + .click(); + + cy.get('.swh-save-request-info-popover') + .should('be.visible'); + + cy.get('.swh-save-request-info-close') + .click(); + + cy.get('.swh-save-request-info-popover') + .should('not.be.visible'); + }); + }); diff --git a/swh/web/admin/origin_save.py b/swh/web/admin/origin_save.py index 41a740d0..e3cf650d 100644 --- a/swh/web/admin/origin_save.py +++ b/swh/web/admin/origin_save.py @@ -1,229 +1,215 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json from django.conf import settings from django.contrib.admin.views.decorators import staff_member_required from django.core.exceptions import ObjectDoesNotExist from django.core.paginator import Paginator from django.http import HttpResponse from django.shortcuts import render from django.views.decorators.http import require_POST from swh.web.admin.adminurls import admin_route from swh.web.common.models import ( SaveAuthorizedOrigin, SaveUnauthorizedOrigin, SaveOriginRequest, ) from swh.web.common.origin_save import ( create_save_origin_request, - get_save_origin_task_info, SAVE_REQUEST_PENDING, SAVE_REQUEST_REJECTED, ) @admin_route(r"origin/save/", view_name="admin-origin-save") @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save(request): return render(request, "admin/origin-save.html") def _datatables_origin_urls_response(request, urls_query_set): search_value = request.GET["search[value]"] if search_value: urls_query_set = urls_query_set.filter(url__icontains=search_value) column_order = request.GET["order[0][column]"] field_order = request.GET["columns[%s][name]" % column_order] order_dir = request.GET["order[0][dir]"] if order_dir == "desc": field_order = "-" + field_order urls_query_set = urls_query_set.order_by(field_order) table_data = {} table_data["draw"] = int(request.GET["draw"]) table_data["recordsTotal"] = urls_query_set.count() table_data["recordsFiltered"] = urls_query_set.count() length = int(request.GET["length"]) page = int(request.GET["start"]) / length + 1 paginator = Paginator(urls_query_set, length) urls_query_set = paginator.page(page).object_list table_data["data"] = [{"url": u.url} for u in urls_query_set] table_data_json = json.dumps(table_data, separators=(",", ": ")) return HttpResponse(table_data_json, content_type="application/json") @admin_route( r"origin/save/authorized_urls/list/", view_name="admin-origin-save-authorized-urls-list", ) @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_authorized_urls_list(request): authorized_urls = SaveAuthorizedOrigin.objects.all() return _datatables_origin_urls_response(request, authorized_urls) @admin_route( r"origin/save/authorized_urls/add/(?P.+)/", view_name="admin-origin-save-add-authorized-url", ) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_add_authorized_url(request, origin_url): try: SaveAuthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: # add the new authorized url SaveAuthorizedOrigin.objects.create(url=origin_url) # check if pending save requests with that url prefix exist pending_save_requests = SaveOriginRequest.objects.filter( origin_url__startswith=origin_url, status=SAVE_REQUEST_PENDING ) # create origin save tasks for previously pending requests for psr in pending_save_requests: create_save_origin_request(psr.visit_type, psr.origin_url) status_code = 200 else: status_code = 400 return HttpResponse(status=status_code) @admin_route( r"origin/save/authorized_urls/remove/(?P.+)/", view_name="admin-origin-save-remove-authorized-url", ) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_remove_authorized_url(request, origin_url): try: entry = SaveAuthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: status_code = 404 else: entry.delete() status_code = 200 return HttpResponse(status=status_code) @admin_route( r"origin/save/unauthorized_urls/list/", view_name="admin-origin-save-unauthorized-urls-list", ) @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_unauthorized_urls_list(request): unauthorized_urls = SaveUnauthorizedOrigin.objects.all() return _datatables_origin_urls_response(request, unauthorized_urls) @admin_route( r"origin/save/unauthorized_urls/add/(?P.+)/", view_name="admin-origin-save-add-unauthorized-url", ) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_add_unauthorized_url(request, origin_url): try: SaveUnauthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: SaveUnauthorizedOrigin.objects.create(url=origin_url) # check if pending save requests with that url prefix exist pending_save_requests = SaveOriginRequest.objects.filter( origin_url__startswith=origin_url, status=SAVE_REQUEST_PENDING ) # mark pending requests as rejected for psr in pending_save_requests: psr.status = SAVE_REQUEST_REJECTED psr.save() status_code = 200 else: status_code = 400 return HttpResponse(status=status_code) @admin_route( r"origin/save/unauthorized_urls/remove/(?P.+)/", view_name="admin-origin-save-remove-unauthorized-url", ) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_remove_unauthorized_url(request, origin_url): try: entry = SaveUnauthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: status_code = 404 else: entry.delete() status_code = 200 return HttpResponse(status=status_code) @admin_route( r"origin/save/request/accept/(?P.+)/url/(?P.+)/", view_name="admin-origin-save-request-accept", ) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_request_accept(request, visit_type, origin_url): try: SaveAuthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: SaveAuthorizedOrigin.objects.create(url=origin_url) create_save_origin_request(visit_type, origin_url) return HttpResponse(status=200) @admin_route( r"origin/save/request/reject/(?P.+)/url/(?P.+)/", view_name="admin-origin-save-request-reject", ) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_request_reject(request, visit_type, origin_url): try: SaveUnauthorizedOrigin.objects.get(url=origin_url) except ObjectDoesNotExist: SaveUnauthorizedOrigin.objects.create(url=origin_url) sor = SaveOriginRequest.objects.get( visit_type=visit_type, origin_url=origin_url, status=SAVE_REQUEST_PENDING ) sor.status = SAVE_REQUEST_REJECTED sor.save() return HttpResponse(status=200) @admin_route( r"origin/save/request/remove/(?P.+)/", view_name="admin-origin-save-request-remove", ) @require_POST @staff_member_required(view_func=None, login_url=settings.LOGIN_URL) def _admin_origin_save_request_remove(request, sor_id): try: entry = SaveOriginRequest.objects.get(id=sor_id) except ObjectDoesNotExist: status_code = 404 else: entry.delete() status_code = 200 return HttpResponse(status=status_code) - - -@admin_route( - r"origin/save/task/info/(?P.+)/", - view_name="admin-origin-save-task-info", -) -@staff_member_required(view_func=None, login_url=settings.LOGIN_URL) -def _save_origin_task_info(request, save_request_id): - request_info = get_save_origin_task_info(save_request_id) - for date_field in ("scheduled", "started", "ended"): - if date_field in request_info and request_info[date_field] is not None: - request_info[date_field] = request_info[date_field].isoformat() - return HttpResponse(json.dumps(request_info), content_type="application/json") diff --git a/swh/web/assets/src/bundles/admin/origin-save.js b/swh/web/assets/src/bundles/admin/origin-save.js index 414d2016..b2a22803 100644 --- a/swh/web/assets/src/bundles/admin/origin-save.js +++ b/swh/web/assets/src/bundles/admin/origin-save.js @@ -1,451 +1,361 @@ /** * Copyright (C) 2018-2020 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ import {handleFetchError, csrfPost, htmlAlert} from 'utils/functions'; import {swhSpinnerSrc} from 'utils/constants'; let authorizedOriginTable; let unauthorizedOriginTable; let pendingSaveRequestsTable; let acceptedSaveRequestsTable; let rejectedSaveRequestsTable; function enableRowSelection(tableSel) { $(`${tableSel} tbody`).on('click', 'tr', function() { if ($(this).hasClass('selected')) { $(this).removeClass('selected'); $(tableSel).closest('.tab-pane').find('.swh-action-need-selection').prop('disabled', true); } else { $(`${tableSel} tr.selected`).removeClass('selected'); $(this).addClass('selected'); $(tableSel).closest('.tab-pane').find('.swh-action-need-selection').prop('disabled', false); } }); } export function initOriginSaveAdmin() { $(document).ready(() => { $.fn.dataTable.ext.errMode = 'throw'; authorizedOriginTable = $('#swh-authorized-origin-urls').DataTable({ serverSide: true, ajax: Urls.admin_origin_save_authorized_urls_list(), columns: [{data: 'url', name: 'url'}], scrollY: '50vh', scrollCollapse: true, info: false }); enableRowSelection('#swh-authorized-origin-urls'); swh.webapp.addJumpToPagePopoverToDataTable(authorizedOriginTable); unauthorizedOriginTable = $('#swh-unauthorized-origin-urls').DataTable({ serverSide: true, ajax: Urls.admin_origin_save_unauthorized_urls_list(), columns: [{data: 'url', name: 'url'}], scrollY: '50vh', scrollCollapse: true, info: false }); enableRowSelection('#swh-unauthorized-origin-urls'); swh.webapp.addJumpToPagePopoverToDataTable(unauthorizedOriginTable); let columnsData = [ { data: 'id', name: 'id', visible: false, searchable: false }, { data: 'save_request_date', name: 'request_date', render: (data, type, row) => { if (type === 'display') { let date = new Date(data); return date.toLocaleString(); } return data; } }, { data: 'visit_type', name: 'visit_type' }, { data: 'origin_url', name: 'origin_url', render: (data, type, row) => { if (type === 'display') { let html = ''; const sanitizedURL = $.fn.dataTable.render.text().display(data); if (row.save_task_status === 'succeed') { let browseOriginUrl = `${Urls.browse_origin()}?origin_url=${sanitizedURL}`; browseOriginUrl += `&timestamp=${row.visit_date}`; html += `${sanitizedURL}`; } else { html += sanitizedURL; } html += ` `; return html; } return data; } } ]; pendingSaveRequestsTable = $('#swh-origin-save-pending-requests').DataTable({ serverSide: true, processing: true, language: { processing: `` }, ajax: Urls.origin_save_requests_list('pending'), searchDelay: 1000, columns: columnsData, scrollY: '50vh', scrollCollapse: true, order: [[0, 'desc']], responsive: { details: { type: 'none' } } }); enableRowSelection('#swh-origin-save-pending-requests'); swh.webapp.addJumpToPagePopoverToDataTable(pendingSaveRequestsTable); rejectedSaveRequestsTable = $('#swh-origin-save-rejected-requests').DataTable({ serverSide: true, processing: true, language: { processing: `` }, ajax: Urls.origin_save_requests_list('rejected'), searchDelay: 1000, columns: columnsData, scrollY: '50vh', scrollCollapse: true, order: [[0, 'desc']], responsive: { details: { type: 'none' } } }); enableRowSelection('#swh-origin-save-rejected-requests'); swh.webapp.addJumpToPagePopoverToDataTable(rejectedSaveRequestsTable); columnsData.push({ data: 'save_task_status', name: 'save_task_status' }); columnsData.push({ name: 'info', render: (data, type, row) => { if (row.save_task_status === 'succeed' || row.save_task_status === 'failed') { return '`; + `onclick="swh.save.displaySaveRequestInfo(event, ${row.id})">`; } else { return ''; } } }); acceptedSaveRequestsTable = $('#swh-origin-save-accepted-requests').DataTable({ serverSide: true, processing: true, language: { processing: `` }, ajax: Urls.origin_save_requests_list('accepted'), searchDelay: 1000, columns: columnsData, scrollY: '50vh', scrollCollapse: true, order: [[0, 'desc']], responsive: { details: { type: 'none' } } }); enableRowSelection('#swh-origin-save-accepted-requests'); swh.webapp.addJumpToPagePopoverToDataTable(acceptedSaveRequestsTable); $('#swh-origin-save-requests-nav-item').on('shown.bs.tab', () => { pendingSaveRequestsTable.draw(); }); $('#swh-origin-save-url-filters-nav-item').on('shown.bs.tab', () => { authorizedOriginTable.draw(); }); $('#swh-authorized-origins-tab').on('shown.bs.tab', () => { authorizedOriginTable.draw(); }); $('#swh-unauthorized-origins-tab').on('shown.bs.tab', () => { unauthorizedOriginTable.draw(); }); $('#swh-save-requests-pending-tab').on('shown.bs.tab', () => { pendingSaveRequestsTable.draw(); }); $('#swh-save-requests-accepted-tab').on('shown.bs.tab', () => { acceptedSaveRequestsTable.draw(); }); $('#swh-save-requests-rejected-tab').on('shown.bs.tab', () => { rejectedSaveRequestsTable.draw(); }); $('#swh-save-requests-pending-tab').click(() => { pendingSaveRequestsTable.ajax.reload(null, false); }); $('#swh-save-requests-accepted-tab').click(() => { acceptedSaveRequestsTable.ajax.reload(null, false); }); $('#swh-save-requests-rejected-tab').click(() => { rejectedSaveRequestsTable.ajax.reload(null, false); }); $('body').on('click', e => { if ($(e.target).parents('.popover').length > 0) { event.stopPropagation(); } else if ($(e.target).parents('.swh-save-request-info').length === 0) { $('.swh-save-request-info').popover('dispose'); } }); }); } export function addAuthorizedOriginUrl() { let originUrl = $('#swh-authorized-url-prefix').val(); let addOriginUrl = Urls.admin_origin_save_add_authorized_url(originUrl); csrfPost(addOriginUrl) .then(handleFetchError) .then(() => { authorizedOriginTable.row.add({'url': originUrl}).draw(); $('.swh-add-authorized-origin-status').html( htmlAlert('success', 'The origin url prefix has been successfully added in the authorized list.', true) ); }) .catch(response => { $('.swh-add-authorized-origin-status').html( htmlAlert('warning', 'The provided origin url prefix is already registered in the authorized list.', true) ); }); } export function removeAuthorizedOriginUrl() { let originUrl = $('#swh-authorized-origin-urls tr.selected').text(); if (originUrl) { let removeOriginUrl = Urls.admin_origin_save_remove_authorized_url(originUrl); csrfPost(removeOriginUrl) .then(handleFetchError) .then(() => { authorizedOriginTable.row('.selected').remove().draw(); }) .catch(() => {}); } } export function addUnauthorizedOriginUrl() { let originUrl = $('#swh-unauthorized-url-prefix').val(); let addOriginUrl = Urls.admin_origin_save_add_unauthorized_url(originUrl); csrfPost(addOriginUrl) .then(handleFetchError) .then(() => { unauthorizedOriginTable.row.add({'url': originUrl}).draw(); $('.swh-add-unauthorized-origin-status').html( htmlAlert('success', 'The origin url prefix has been successfully added in the unauthorized list.', true) ); }) .catch(() => { $('.swh-add-unauthorized-origin-status').html( htmlAlert('warning', 'The provided origin url prefix is already registered in the unauthorized list.', true) ); }); } export function removeUnauthorizedOriginUrl() { let originUrl = $('#swh-unauthorized-origin-urls tr.selected').text(); if (originUrl) { let removeOriginUrl = Urls.admin_origin_save_remove_unauthorized_url(originUrl); csrfPost(removeOriginUrl) .then(handleFetchError) .then(() => { unauthorizedOriginTable.row('.selected').remove().draw(); }) .catch(() => {}); } } export function acceptOriginSaveRequest() { let selectedRow = pendingSaveRequestsTable.row('.selected'); if (selectedRow.length) { let acceptOriginSaveRequestCallback = () => { let rowData = selectedRow.data(); let acceptSaveRequestUrl = Urls.admin_origin_save_request_accept(rowData['visit_type'], rowData['origin_url']); csrfPost(acceptSaveRequestUrl) .then(() => { pendingSaveRequestsTable.ajax.reload(null, false); }); }; swh.webapp.showModalConfirm( 'Accept origin save request ?', 'Are you sure to accept this origin save request ?', acceptOriginSaveRequestCallback); } } export function rejectOriginSaveRequest() { let selectedRow = pendingSaveRequestsTable.row('.selected'); if (selectedRow.length) { let rejectOriginSaveRequestCallback = () => { let rowData = selectedRow.data(); let rejectSaveRequestUrl = Urls.admin_origin_save_request_reject(rowData['visit_type'], rowData['origin_url']); csrfPost(rejectSaveRequestUrl) .then(() => { pendingSaveRequestsTable.ajax.reload(null, false); }); }; swh.webapp.showModalConfirm( 'Reject origin save request ?', 'Are you sure to reject this origin save request ?', rejectOriginSaveRequestCallback); } } function removeOriginSaveRequest(requestTable) { let selectedRow = requestTable.row('.selected'); if (selectedRow.length) { let requestId = selectedRow.data()['id']; let removeOriginSaveRequestCallback = () => { let removeSaveRequestUrl = Urls.admin_origin_save_request_remove(requestId); csrfPost(removeSaveRequestUrl) .then(() => { requestTable.ajax.reload(null, false); }); }; swh.webapp.showModalConfirm( 'Remove origin save request ?', 'Are you sure to remove this origin save request ?', removeOriginSaveRequestCallback); } } export function removePendingOriginSaveRequest() { removeOriginSaveRequest(pendingSaveRequestsTable); } export function removeAcceptedOriginSaveRequest() { removeOriginSaveRequest(acceptedSaveRequestsTable); } export function removeRejectedOriginSaveRequest() { removeOriginSaveRequest(rejectedSaveRequestsTable); } - -export function displaySaveRequestInfo(event, saveRequestId) { - event.stopPropagation(); - const saveRequestTaskInfoUrl = Urls.admin_origin_save_task_info(saveRequestId); - $('.swh-save-request-info').popover('dispose'); - $(event.target).popover({ - 'title': 'Save request task information', - 'content': `
-
- -

Fetching task information ...

-
-
`, - 'html': true, - 'placement': 'left', - 'sanitizeFn': swh.webapp.filterXSS - }); - $(event.target).popover('show'); - fetch(saveRequestTaskInfoUrl) - .then(response => response.json()) - .then(saveRequestTaskInfo => { - let content; - if ($.isEmptyObject(saveRequestTaskInfo)) { - content = 'Not available'; - } else { - let saveRequestInfo = []; - saveRequestInfo.push({ - key: 'Task type', - value: saveRequestTaskInfo.type - }); - if (saveRequestTaskInfo.hasOwnProperty('task_name')) { - saveRequestInfo.push({ - key: 'Task name', - value: saveRequestTaskInfo.name - }); - } - saveRequestInfo.push({ - key: 'Task arguments', - value: JSON.stringify(saveRequestTaskInfo.arguments, null, 2) - }); - saveRequestInfo.push({ - key: 'Task id', - value: saveRequestTaskInfo.id - }); - saveRequestInfo.push({ - key: 'Task backend id', - value: saveRequestTaskInfo.backend_id - }); - saveRequestInfo.push({ - key: 'Task scheduling date', - value: new Date(saveRequestTaskInfo.scheduled).toLocaleString() - }); - saveRequestInfo.push({ - key: 'Task termination date', - value: new Date(saveRequestTaskInfo.ended).toLocaleString() - }); - if (saveRequestTaskInfo.hasOwnProperty('duration')) { - saveRequestInfo.push({ - key: 'Task duration', - value: saveRequestTaskInfo.duration + ' s' - }); - } - if (saveRequestTaskInfo.hasOwnProperty('worker')) { - saveRequestInfo.push({ - key: 'Task executor', - value: saveRequestTaskInfo.worker - }); - } - if (saveRequestTaskInfo.hasOwnProperty('message')) { - saveRequestInfo.push({ - key: 'Task log', - value: saveRequestTaskInfo.message - }); - } - content = ''; - for (let info of saveRequestInfo) { - content += - ` - - - `; - } - content += '
'; - } - $('.swh-popover').html(content); - $(event.target).popover('update'); - }); -} diff --git a/swh/web/assets/src/bundles/save/index.js b/swh/web/assets/src/bundles/save/index.js index 8bb21d58..bb739c07 100644 --- a/swh/web/assets/src/bundles/save/index.js +++ b/swh/web/assets/src/bundles/save/index.js @@ -1,300 +1,433 @@ /** * Copyright (C) 2018-2020 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ import {handleFetchError, csrfPost, isGitRepoUrl, htmlAlert, removeUrlFragment} from 'utils/functions'; import {swhSpinnerSrc} from 'utils/constants'; import {validate} from 'validate.js'; let saveRequestsTable; function originSaveRequest(originType, originUrl, acceptedCallback, pendingCallback, errorCallback) { let addSaveOriginRequestUrl = Urls.origin_save_request(originType, originUrl); let headers = { 'Accept': 'application/json', 'Content-Type': 'application/json' }; $('.swh-processing-save-request').css('display', 'block'); csrfPost(addSaveOriginRequestUrl, headers) .then(handleFetchError) .then(response => response.json()) .then(data => { $('.swh-processing-save-request').css('display', 'none'); if (data.save_request_status === 'accepted') { acceptedCallback(); } else { pendingCallback(); } }) .catch(response => { $('.swh-processing-save-request').css('display', 'none'); response.json().then(errorData => { errorCallback(response.status, errorData); }); }); } export function initOriginSave() { $(document).ready(() => { $.fn.dataTable.ext.errMode = 'none'; fetch(Urls.origin_save_types_list()) .then(response => response.json()) .then(data => { for (let originType of data) { $('#swh-input-visit-type').append(``); } }); saveRequestsTable = $('#swh-origin-save-requests') .on('error.dt', (e, settings, techNote, message) => { $('#swh-origin-save-request-list-error').text('An error occurred while retrieving the save requests list'); console.log(message); }) .DataTable({ serverSide: true, processing: true, language: { processing: `` }, ajax: Urls.origin_save_requests_list('all'), searchDelay: 1000, columns: [ { data: 'save_request_date', name: 'request_date', render: (data, type, row) => { if (type === 'display') { let date = new Date(data); return date.toLocaleString(); } return data; } }, { data: 'visit_type', name: 'visit_type' }, { data: 'origin_url', name: 'origin_url', render: (data, type, row) => { if (type === 'display') { let html = ''; const sanitizedURL = $.fn.dataTable.render.text().display(data); if (row.save_task_status === 'succeed') { let browseOriginUrl = `${Urls.browse_origin()}?origin_url=${sanitizedURL}`; browseOriginUrl += `&timestamp=${row.visit_date}`; html += `${sanitizedURL}`; } else { html += sanitizedURL; } html += ` `; return html; } return data; } }, { data: 'save_request_status', name: 'status' }, { data: 'save_task_status', name: 'loading_task_status' + }, + { + name: 'info', + render: (data, type, row) => { + if (row.save_task_status === 'succeed' || row.save_task_status === 'failed') { + return ``; + } else { + return ''; + } + } } ], scrollY: '50vh', scrollCollapse: true, order: [[0, 'desc']], responsive: { details: { type: 'none' } } }); swh.webapp.addJumpToPagePopoverToDataTable(saveRequestsTable); $('#swh-origin-save-requests-list-tab').on('shown.bs.tab', () => { saveRequestsTable.draw(); window.location.hash = '#requests'; }); - $('#swh-origin-save-request-create-tab').on('shown.bs.tab', () => { + $('#swh-origin-save-request-help-tab').on('shown.bs.tab', () => { removeUrlFragment(); + $('.swh-save-request-info').popover('dispose'); }); let saveRequestAcceptedAlert = htmlAlert( 'success', 'The "save code now" request has been accepted and will be processed as soon as possible.', true ); let saveRequestPendingAlert = htmlAlert( 'warning', 'The "save code now" request has been put in pending state and may be accepted for processing after manual review.', true ); let saveRequestRateLimitedAlert = htmlAlert( 'danger', 'The rate limit for "save code now" requests has been reached. Please try again later.', true ); let saveRequestUnknownErrorAlert = htmlAlert( 'danger', 'An unexpected error happened when submitting the "save code now request".', true ); $('#swh-save-origin-form').submit(event => { event.preventDefault(); event.stopPropagation(); $('.alert').alert('close'); if (event.target.checkValidity()) { $(event.target).removeClass('was-validated'); let originType = $('#swh-input-visit-type').val(); let originUrl = $('#swh-input-origin-url').val(); originSaveRequest(originType, originUrl, () => $('#swh-origin-save-request-status').html(saveRequestAcceptedAlert), () => $('#swh-origin-save-request-status').html(saveRequestPendingAlert), (statusCode, errorData) => { $('#swh-origin-save-request-status').css('color', 'red'); if (statusCode === 403) { const errorAlert = htmlAlert('danger', `Error: ${errorData['detail']}`); $('#swh-origin-save-request-status').html(errorAlert); } else if (statusCode === 429) { $('#swh-origin-save-request-status').html(saveRequestRateLimitedAlert); } else { $('#swh-origin-save-request-status').html(saveRequestUnknownErrorAlert); } }); } else { $(event.target).addClass('was-validated'); } }); $('#swh-show-origin-save-requests-list').on('click', (event) => { event.preventDefault(); $('.nav-tabs a[href="#swh-origin-save-requests-list"]').tab('show'); }); $('#swh-input-origin-url').on('input', function(event) { let originUrl = $(this).val().trim(); $(this).val(originUrl); $('#swh-input-visit-type option').each(function() { let val = $(this).val(); if (val && originUrl.includes(val)) { $(this).prop('selected', true); } }); }); if (window.location.hash === '#requests') { $('.nav-tabs a[href="#swh-origin-save-requests-list"]').tab('show'); } }); } export function validateSaveOriginUrl(input) { let originUrl = input.value.trim(); let validUrl = validate({website: originUrl}, { website: { url: { schemes: ['http', 'https', 'svn', 'git'] } } }) === undefined; let originType = $('#swh-input-visit-type').val(); if (originType === 'git' && validUrl) { // additional checks for well known code hosting providers let githubIdx = originUrl.indexOf('://github.com'); let gitlabIdx = originUrl.indexOf('://gitlab.'); let gitSfIdx = originUrl.indexOf('://git.code.sf.net'); let bitbucketIdx = originUrl.indexOf('://bitbucket.org'); if (githubIdx !== -1 && githubIdx <= 5) { validUrl = isGitRepoUrl(originUrl, 'github.com'); } else if (gitlabIdx !== -1 && gitlabIdx <= 5) { let startIdx = gitlabIdx + 3; let idx = originUrl.indexOf('/', startIdx); if (idx !== -1) { let gitlabDomain = originUrl.substr(startIdx, idx - startIdx); validUrl = isGitRepoUrl(originUrl, gitlabDomain); } else { validUrl = false; } } else if (gitSfIdx !== -1 && gitSfIdx <= 5) { validUrl = isGitRepoUrl(originUrl, 'git.code.sf.net/p'); } else if (bitbucketIdx !== -1 && bitbucketIdx <= 5) { validUrl = isGitRepoUrl(originUrl, 'bitbucket.org'); } } if (validUrl) { input.setCustomValidity(''); } else { input.setCustomValidity('The origin url is not valid or does not reference a code repository'); } } export function initTakeNewSnapshot() { let newSnapshotRequestAcceptedAlert = htmlAlert( 'success', 'The "take new snapshot" request has been accepted and will be processed as soon as possible.', true ); let newSnapshotRequestPendingAlert = htmlAlert( 'warning', 'The "take new snapshot" request has been put in pending state and may be accepted for processing after manual review.', true ); let newSnapshotRequestRateLimitAlert = htmlAlert( 'danger', 'The rate limit for "take new snapshot" requests has been reached. Please try again later.', true ); let newSnapshotRequestUnknownErrorAlert = htmlAlert( 'danger', 'An unexpected error happened when submitting the "save code now request".', true ); $(document).ready(() => { $('#swh-take-new-snapshot-form').submit(event => { event.preventDefault(); event.stopPropagation(); let originType = $('#swh-input-visit-type').val(); let originUrl = $('#swh-input-origin-url').val(); originSaveRequest(originType, originUrl, () => $('#swh-take-new-snapshot-request-status').html(newSnapshotRequestAcceptedAlert), () => $('#swh-take-new-snapshot-request-status').html(newSnapshotRequestPendingAlert), (statusCode, errorData) => { $('#swh-take-new-snapshot-request-status').css('color', 'red'); if (statusCode === 403) { const errorAlert = htmlAlert('danger', `Error: ${errorData['detail']}`, true); $('#swh-take-new-snapshot-request-status').html(errorAlert); } else if (statusCode === 429) { $('#swh-take-new-snapshot-request-status').html(newSnapshotRequestRateLimitAlert); } else { $('#swh-take-new-snapshot-request-status').html(newSnapshotRequestUnknownErrorAlert); } }); }); }); } + +export function displaySaveRequestInfo(event, saveRequestId) { + event.stopPropagation(); + const saveRequestTaskInfoUrl = Urls.origin_save_task_info(saveRequestId); + // close popover when clicking again on the info icon + if ($(event.target).data('bs.popover')) { + $(event.target).popover('dispose'); + return; + } + $('.swh-save-request-info').popover('dispose'); + $(event.target).popover({ + animation: false, + boundary: 'viewport', + container: 'body', + title: 'Save request task information ' + + '`, + content: `
+
+ +

Fetching task information ...

+
+
`, + html: true, + placement: 'left', + sanitizeFn: swh.webapp.filterXSS + }); + + $(event.target).on('shown.bs.popover', function() { + const popoverId = $(this).attr('aria-describedby'); + $(`#${popoverId} .mdi-close`).click(() => { + $(this).popover('dispose'); + }); + }); + + $(event.target).popover('show'); + fetch(saveRequestTaskInfoUrl) + .then(response => response.json()) + .then(saveRequestTaskInfo => { + let content; + if ($.isEmptyObject(saveRequestTaskInfo)) { + content = 'Not available'; + } else { + let saveRequestInfo = []; + if (saveRequestTaskInfo.type) { + saveRequestInfo.push({ + key: 'Task type', + value: saveRequestTaskInfo.type + }); + } + if (saveRequestTaskInfo.arguments) { + saveRequestInfo.push({ + key: 'Task arguments', + value: JSON.stringify(saveRequestTaskInfo.arguments, null, 2) + }); + } + if (saveRequestTaskInfo.id) { + saveRequestInfo.push({ + key: 'Task id', + value: saveRequestTaskInfo.id + }); + } + if (saveRequestTaskInfo.backend_id) { + saveRequestInfo.push({ + key: 'Task backend id', + value: saveRequestTaskInfo.backend_id + }); + } + if (saveRequestTaskInfo.scheduled) { + saveRequestInfo.push({ + key: 'Task scheduling date', + value: new Date(saveRequestTaskInfo.scheduled).toLocaleString() + }); + } + if (saveRequestTaskInfo.started) { + saveRequestInfo.push({ + key: 'Task start date', + value: new Date(saveRequestTaskInfo.started).toLocaleString() + }); + } + if (saveRequestTaskInfo.ended) { + saveRequestInfo.push({ + key: 'Task termination date', + value: new Date(saveRequestTaskInfo.ended).toLocaleString() + }); + } + if (saveRequestTaskInfo.duration) { + saveRequestInfo.push({ + key: 'Task duration', + value: saveRequestTaskInfo.duration + ' seconds' + }); + } + if (saveRequestTaskInfo.worker) { + saveRequestInfo.push({ + key: 'Task executor', + value: saveRequestTaskInfo.worker + }); + } + if (saveRequestTaskInfo.message) { + saveRequestInfo.push({ + key: 'Task log', + value: saveRequestTaskInfo.message + }); + } + content = ''; + for (let info of saveRequestInfo) { + content += + ` + + + `; + } + content += '
'; + } + $('.swh-popover').html(content); + $(event.target).popover('update'); + }); +} diff --git a/swh/web/common/origin_save.py b/swh/web/common/origin_save.py index 28445322..58d4fdf0 100644 --- a/swh/web/common/origin_save.py +++ b/swh/web/common/origin_save.py @@ -1,602 +1,620 @@ # Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from bisect import bisect_right from datetime import datetime, timezone, timedelta from itertools import product import json import logging +from typing import Any, Dict from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ValidationError from django.core.validators import URLValidator from django.utils.html import escape from prometheus_client import Gauge import requests import sentry_sdk from swh.web import config from swh.web.common import service from swh.web.common.exc import BadInputExc, ForbiddenExc, NotFoundExc from swh.web.common.models import ( SaveUnauthorizedOrigin, SaveAuthorizedOrigin, SaveOriginRequest, SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEED, SAVE_TASK_FAILED, SAVE_TASK_RUNNING, SAVE_TASK_NOT_CREATED, ) from swh.web.common.origin_visits import get_origin_visits from swh.web.common.utils import parse_timestamp, SWH_WEB_METRICS_REGISTRY from swh.scheduler.utils import create_oneshot_task_dict scheduler = config.scheduler() logger = logging.getLogger(__name__) def get_origin_save_authorized_urls(): """ Get the list of origin url prefixes authorized to be immediately loaded into the archive (whitelist). Returns: list: The list of authorized origin url prefix """ return [origin.url for origin in SaveAuthorizedOrigin.objects.all()] def get_origin_save_unauthorized_urls(): """ Get the list of origin url prefixes forbidden to be loaded into the archive (blacklist). Returns: list: the list of unauthorized origin url prefix """ return [origin.url for origin in SaveUnauthorizedOrigin.objects.all()] def can_save_origin(origin_url): """ Check if a software origin can be saved into the archive. Based on the origin url, the save request will be either: * immediately accepted if the url is whitelisted * rejected if the url is blacklisted * put in pending state for manual review otherwise Args: origin_url (str): the software origin url to check Returns: str: the origin save request status, either **accepted**, **rejected** or **pending** """ # origin url may be blacklisted for url_prefix in get_origin_save_unauthorized_urls(): if origin_url.startswith(url_prefix): return SAVE_REQUEST_REJECTED # if the origin url is in the white list, it can be immediately saved for url_prefix in get_origin_save_authorized_urls(): if origin_url.startswith(url_prefix): return SAVE_REQUEST_ACCEPTED # otherwise, the origin url needs to be manually verified return SAVE_REQUEST_PENDING # map visit type to scheduler task # TODO: do not hardcode the task name here (T1157) _visit_type_task = {"git": "load-git", "hg": "load-hg", "svn": "load-svn"} # map scheduler task status to origin save status _save_task_status = { "next_run_not_scheduled": SAVE_TASK_NOT_YET_SCHEDULED, "next_run_scheduled": SAVE_TASK_SCHEDULED, "completed": SAVE_TASK_SUCCEED, "disabled": SAVE_TASK_FAILED, } def get_savable_visit_types(): return sorted(list(_visit_type_task.keys())) def _check_visit_type_savable(visit_type): """ Get the list of visit types that can be performed through a save request. Returns: list: the list of saveable visit types """ allowed_visit_types = ", ".join(get_savable_visit_types()) if visit_type not in _visit_type_task: raise BadInputExc( "Visit of type %s can not be saved! " "Allowed types are the following: %s" % (visit_type, allowed_visit_types) ) _validate_url = URLValidator(schemes=["http", "https", "svn", "git"]) def _check_origin_url_valid(origin_url): try: _validate_url(origin_url) except ValidationError: raise BadInputExc( "The provided origin url (%s) is not valid!" % escape(origin_url) ) def _get_visit_info_for_save_request(save_request): visit_date = None visit_status = None time_now = datetime.now(tz=timezone.utc) time_delta = time_now - save_request.request_date # stop trying to find a visit date one month after save request submission # as those requests to storage are expensive and associated loading task # surely ended up with errors if time_delta.days <= 30: try: origin = {"url": save_request.origin_url} origin_info = service.lookup_origin(origin) origin_visits = get_origin_visits(origin_info) visit_dates = [parse_timestamp(v["date"]) for v in origin_visits] i = bisect_right(visit_dates, save_request.request_date) if i != len(visit_dates): visit_date = visit_dates[i] visit_status = origin_visits[i]["status"] if origin_visits[i]["status"] == "ongoing": visit_date = None except Exception as exc: sentry_sdk.capture_exception(exc) return visit_date, visit_status def _check_visit_update_status(save_request, save_task_status): visit_date, visit_status = _get_visit_info_for_save_request(save_request) save_request.visit_date = visit_date # visit has been performed, mark the saving task as succeed if visit_date and visit_status is not None: save_task_status = SAVE_TASK_SUCCEED elif visit_status == "ongoing": save_task_status = SAVE_TASK_RUNNING else: time_now = datetime.now(tz=timezone.utc) time_delta = time_now - save_request.request_date # consider the task as failed if it is still in scheduled state # 30 days after its submission if time_delta.days > 30: save_task_status = SAVE_TASK_FAILED return visit_date, save_task_status def _save_request_dict(save_request, task=None): must_save = False visit_date = save_request.visit_date # save task still in scheduler db if task: save_task_status = _save_task_status[task["status"]] # Consider request from which a visit date has already been found # as succeeded to avoid retrieving it again if save_task_status == SAVE_TASK_SCHEDULED and visit_date: save_task_status = SAVE_TASK_SUCCEED if save_task_status in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEED) and not visit_date: visit_date, _ = _get_visit_info_for_save_request(save_request) save_request.visit_date = visit_date must_save = True # Check tasks still marked as scheduled / not yet scheduled if save_task_status in (SAVE_TASK_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED): visit_date, save_task_status = _check_visit_update_status( save_request, save_task_status ) # save task may have been archived else: save_task_status = save_request.loading_task_status if save_task_status in (SAVE_TASK_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED): visit_date, save_task_status = _check_visit_update_status( save_request, save_task_status ) else: save_task_status = save_request.loading_task_status if save_request.loading_task_status != save_task_status: save_request.loading_task_status = save_task_status must_save = True if must_save: save_request.save() return { "id": save_request.id, "visit_type": save_request.visit_type, "origin_url": save_request.origin_url, "save_request_date": save_request.request_date.isoformat(), "save_request_status": save_request.status, "save_task_status": save_task_status, "visit_date": visit_date.isoformat() if visit_date else None, } def create_save_origin_request(visit_type, origin_url): """ Create a loading task to save a software origin into the archive. This function aims to create a software origin loading task trough the use of the swh-scheduler component. First, some checks are performed to see if the visit type and origin url are valid but also if the the save request can be accepted. If those checks passed, the loading task is then created. Otherwise, the save request is put in pending or rejected state. All the submitted save requests are logged into the swh-web database to keep track of them. Args: visit_type (str): the type of visit to perform (currently only ``git`` but ``svn`` and ``hg`` will soon be available) origin_url (str): the url of the origin to save Raises: BadInputExc: the visit type or origin url is invalid ForbiddenExc: the provided origin url is blacklisted Returns: dict: A dict describing the save request with the following keys: * **visit_type**: the type of visit to perform * **origin_url**: the url of the origin * **save_request_date**: the date the request was submitted * **save_request_status**: the request status, either **accepted**, **rejected** or **pending** * **save_task_status**: the origin loading task status, either **not created**, **not yet scheduled**, **scheduled**, **succeed** or **failed** """ _check_visit_type_savable(visit_type) _check_origin_url_valid(origin_url) save_request_status = can_save_origin(origin_url) task = None # if the origin save request is accepted, create a scheduler # task to load it into the archive if save_request_status == SAVE_REQUEST_ACCEPTED: # create a task with high priority kwargs = { "priority": "high", "url": origin_url, } sor = None # get list of previously sumitted save requests current_sors = list( SaveOriginRequest.objects.filter( visit_type=visit_type, origin_url=origin_url ) ) can_create_task = False # if no save requests previously submitted, create the scheduler task if not current_sors: can_create_task = True else: # get the latest submitted save request sor = current_sors[0] # if it was in pending state, we need to create the scheduler task # and update the save request info in the database if sor.status == SAVE_REQUEST_PENDING: can_create_task = True # a task has already been created to load the origin elif sor.loading_task_id != -1: # get the scheduler task and its status tasks = scheduler.get_tasks([sor.loading_task_id]) task = tasks[0] if tasks else None task_status = _save_request_dict(sor, task)["save_task_status"] # create a new scheduler task only if the previous one has been # already executed if task_status == SAVE_TASK_FAILED or task_status == SAVE_TASK_SUCCEED: can_create_task = True sor = None else: can_create_task = False if can_create_task: # effectively create the scheduler task task_dict = create_oneshot_task_dict(_visit_type_task[visit_type], **kwargs) task = scheduler.create_tasks([task_dict])[0] # pending save request has been accepted if sor: sor.status = SAVE_REQUEST_ACCEPTED sor.loading_task_id = task["id"] sor.save() else: sor = SaveOriginRequest.objects.create( visit_type=visit_type, origin_url=origin_url, status=save_request_status, loading_task_id=task["id"], ) # save request must be manually reviewed for acceptation elif save_request_status == SAVE_REQUEST_PENDING: # check if there is already such a save request already submitted, # no need to add it to the database in that case try: sor = SaveOriginRequest.objects.get( visit_type=visit_type, origin_url=origin_url, status=save_request_status ) # if not add it to the database except ObjectDoesNotExist: sor = SaveOriginRequest.objects.create( visit_type=visit_type, origin_url=origin_url, status=save_request_status ) # origin can not be saved as its url is blacklisted, # log the request to the database anyway else: sor = SaveOriginRequest.objects.create( visit_type=visit_type, origin_url=origin_url, status=save_request_status ) if save_request_status == SAVE_REQUEST_REJECTED: raise ForbiddenExc( ( 'The "save code now" request has been rejected ' "because the provided origin url is blacklisted." ) ) return _save_request_dict(sor, task) def get_save_origin_requests_from_queryset(requests_queryset): """ Get all save requests from a SaveOriginRequest queryset. Args: requests_queryset (django.db.models.QuerySet): input SaveOriginRequest queryset Returns: list: A list of save origin requests dict as described in :func:`swh.web.common.origin_save.create_save_origin_request` """ task_ids = [] for sor in requests_queryset: task_ids.append(sor.loading_task_id) save_requests = [] if task_ids: tasks = scheduler.get_tasks(task_ids) tasks = {task["id"]: task for task in tasks} for sor in requests_queryset: sr_dict = _save_request_dict(sor, tasks.get(sor.loading_task_id)) save_requests.append(sr_dict) return save_requests def get_save_origin_requests(visit_type, origin_url): """ Get all save requests for a given software origin. Args: visit_type (str): the type of visit origin_url (str): the url of the origin Raises: BadInputExc: the visit type or origin url is invalid swh.web.common.exc.NotFoundExc: no save requests can be found for the given origin Returns: list: A list of save origin requests dict as described in :func:`swh.web.common.origin_save.create_save_origin_request` """ _check_visit_type_savable(visit_type) _check_origin_url_valid(origin_url) sors = SaveOriginRequest.objects.filter( visit_type=visit_type, origin_url=origin_url ) if sors.count() == 0: raise NotFoundExc( ("No save requests found for visit of type " "%s on origin with url %s.") % (visit_type, origin_url) ) return get_save_origin_requests_from_queryset(sors) -def get_save_origin_task_info(save_request_id): +def get_save_origin_task_info( + save_request_id: int, full_info: bool = True +) -> Dict[str, Any]: """ Get detailed information about an accepted save origin request and its associated loading task. If the associated loading task info is archived and removed from the scheduler database, returns an empty dictionary. Args: - save_request_id (int): identifier of a save origin request + save_request_id: identifier of a save origin request + full_info: whether to return detailed info for staff users Returns: - dict: A dictionary with the following keys: + A dictionary with the following keys: - **type**: loading task type - **arguments**: loading task arguments - **id**: loading task database identifier - **backend_id**: loading task celery identifier - **scheduled**: loading task scheduling date - **ended**: loading task termination date - **status**: loading task execution status Depending on the availability of the task logs in the elasticsearch cluster of Software Heritage, the returned dictionary may also contain the following keys: - **name**: associated celery task name - **message**: relevant log message from task execution - **duration**: task execution time (only if it succeeded) - **worker**: name of the worker that executed the task """ try: save_request = SaveOriginRequest.objects.get(id=save_request_id) except ObjectDoesNotExist: return {} task = scheduler.get_tasks([save_request.loading_task_id]) task = task[0] if task else None if task is None: return {} task_run = scheduler.get_task_runs([task["id"]]) task_run = task_run[0] if task_run else None if task_run is None: return {} task_run["type"] = task["type"] task_run["arguments"] = task["arguments"] task_run["id"] = task_run["task"] del task_run["task"] del task_run["metadata"] - del task_run["started"] es_workers_index_url = config.get_config()["es_workers_index_url"] if not es_workers_index_url: return task_run es_workers_index_url += "/_search" if save_request.visit_date: min_ts = save_request.visit_date max_ts = min_ts + timedelta(days=7) else: min_ts = save_request.request_date max_ts = min_ts + timedelta(days=30) - min_ts = int(min_ts.timestamp()) * 1000 - max_ts = int(max_ts.timestamp()) * 1000 + min_ts_unix = int(min_ts.timestamp()) * 1000 + max_ts_unix = int(max_ts.timestamp()) * 1000 save_task_status = _save_task_status[task["status"]] priority = "3" if save_task_status == SAVE_TASK_FAILED else "6" query = { "bool": { "must": [ {"match_phrase": {"priority": {"query": priority}}}, {"match_phrase": {"swh_task_id": {"query": task_run["backend_id"]}}}, { "range": { "@timestamp": { - "gte": min_ts, - "lte": max_ts, + "gte": min_ts_unix, + "lte": max_ts_unix, "format": "epoch_millis", } } }, ] } } try: response = requests.post( es_workers_index_url, json={"query": query, "sort": ["@timestamp"]}, timeout=30, ) results = json.loads(response.text) if results["hits"]["total"]["value"] >= 1: task_run_info = results["hits"]["hits"][-1]["_source"] if "swh_logging_args_runtime" in task_run_info: duration = task_run_info["swh_logging_args_runtime"] task_run["duration"] = duration if "message" in task_run_info: task_run["message"] = task_run_info["message"] if "swh_logging_args_name" in task_run_info: task_run["name"] = task_run_info["swh_logging_args_name"] elif "swh_task_name" in task_run_info: task_run["name"] = task_run_info["swh_task_name"] if "hostname" in task_run_info: task_run["worker"] = task_run_info["hostname"] elif "host" in task_run_info: task_run["worker"] = task_run_info["host"] except Exception as exc: logger.warning("Request to Elasticsearch failed\n%s", exc) sentry_sdk.capture_exception(exc) + if not full_info: + for field in ("id", "backend_id", "worker"): + # remove some staff only fields + task_run.pop(field, None) + if "message" in task_run and "Loading failure" in task_run["message"]: + # hide traceback for non staff users, only display exception + message_lines = task_run["message"].split("\n") + message = "" + for line in message_lines: + if line.startswith("Traceback"): + break + message += f"{line}\n" + message += message_lines[-1] + task_run["message"] = message + return task_run SUBMITTED_SAVE_REQUESTS_METRIC = "swh_web_submitted_save_requests" _submitted_save_requests_gauge = Gauge( name=SUBMITTED_SAVE_REQUESTS_METRIC, documentation="Number of submitted origin save requests", labelnames=["status", "visit_type"], registry=SWH_WEB_METRICS_REGISTRY, ) ACCEPTED_SAVE_REQUESTS_METRIC = "swh_web_accepted_save_requests" _accepted_save_requests_gauge = Gauge( name=ACCEPTED_SAVE_REQUESTS_METRIC, documentation="Number of accepted origin save requests", labelnames=["load_task_status", "visit_type"], registry=SWH_WEB_METRICS_REGISTRY, ) def compute_save_requests_metrics(): """Compute a couple of Prometheus metrics related to origin save requests""" request_statuses = ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING, ) load_task_statuses = ( SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEED, SAVE_TASK_FAILED, SAVE_TASK_RUNNING, ) visit_types = get_savable_visit_types() labels_set = product(request_statuses, visit_types) for labels in labels_set: _submitted_save_requests_gauge.labels(*labels).set(0) labels_set = product(load_task_statuses, visit_types) for labels in labels_set: _accepted_save_requests_gauge.labels(*labels).set(0) for sor in SaveOriginRequest.objects.all(): if sor.status == SAVE_REQUEST_ACCEPTED: _accepted_save_requests_gauge.labels( load_task_status=sor.loading_task_status, visit_type=sor.visit_type ).inc() _submitted_save_requests_gauge.labels( status=sor.status, visit_type=sor.visit_type ).inc() diff --git a/swh/web/misc/origin_save.py b/swh/web/misc/origin_save.py index 313d02b7..94133945 100644 --- a/swh/web/misc/origin_save.py +++ b/swh/web/misc/origin_save.py @@ -1,117 +1,133 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json from django.conf.urls import url from django.core.paginator import Paginator from django.http import HttpResponse, HttpResponseForbidden, HttpResponseServerError from django.shortcuts import render from rest_framework.decorators import api_view, authentication_classes from swh.web.api.throttling import throttle_scope from swh.web.common.exc import ForbiddenExc from swh.web.common.models import SaveOriginRequest from swh.web.common.origin_save import ( create_save_origin_request, get_savable_visit_types, get_save_origin_requests_from_queryset, + get_save_origin_task_info, ) from swh.web.common.utils import EnforceCSRFAuthentication def _origin_save_view(request): return render( request, "misc/origin-save.html", {"heading": ("Request the saving of a software origin into " "the archive")}, ) @api_view(["POST"]) @authentication_classes((EnforceCSRFAuthentication,)) @throttle_scope("swh_save_origin") def _origin_save_request(request, visit_type, origin_url): """ This view is called through AJAX from the save code now form of swh-web. We use DRF here as we want to rate limit the number of submitted requests per user to avoid being possibly flooded by bots. """ try: response = json.dumps( create_save_origin_request(visit_type, origin_url), separators=(",", ": ") ) return HttpResponse(response, content_type="application/json") except ForbiddenExc as exc: return HttpResponseForbidden( json.dumps({"detail": str(exc)}), content_type="application/json" ) except Exception as exc: return HttpResponseServerError( json.dumps({"detail": str(exc)}), content_type="application/json" ) def _visit_save_types_list(request): visit_types = json.dumps(get_savable_visit_types(), separators=(",", ": ")) return HttpResponse(visit_types, content_type="application/json") def _origin_save_requests_list(request, status): if status != "all": save_requests = SaveOriginRequest.objects.filter(status=status) else: save_requests = SaveOriginRequest.objects.all() table_data = {} table_data["recordsTotal"] = save_requests.count() table_data["draw"] = int(request.GET["draw"]) search_value = request.GET["search[value]"] column_order = request.GET["order[0][column]"] field_order = request.GET["columns[%s][name]" % column_order] order_dir = request.GET["order[0][dir]"] if order_dir == "desc": field_order = "-" + field_order save_requests = save_requests.order_by(field_order) length = int(request.GET["length"]) page = int(request.GET["start"]) / length + 1 save_requests = get_save_origin_requests_from_queryset(save_requests) if search_value: save_requests = [ sr for sr in save_requests if search_value.lower() in sr["save_request_status"].lower() or search_value.lower() in sr["save_task_status"].lower() or search_value.lower() in sr["visit_type"].lower() or search_value.lower() in sr["origin_url"].lower() ] table_data["recordsFiltered"] = len(save_requests) paginator = Paginator(save_requests, length) table_data["data"] = paginator.page(page).object_list table_data_json = json.dumps(table_data, separators=(",", ": ")) return HttpResponse(table_data_json, content_type="application/json") +def _save_origin_task_info(request, save_request_id): + request_info = get_save_origin_task_info( + save_request_id, full_info=request.user.is_staff + ) + for date_field in ("scheduled", "started", "ended"): + if date_field in request_info and request_info[date_field] is not None: + request_info[date_field] = request_info[date_field].isoformat() + return HttpResponse(json.dumps(request_info), content_type="application/json") + + urlpatterns = [ url(r"^save/$", _origin_save_view, name="origin-save"), url( r"^save/(?P.+)/url/(?P.+)/$", _origin_save_request, name="origin-save-request", ), url(r"^save/types/list/$", _visit_save_types_list, name="origin-save-types-list"), url( r"^save/requests/list/(?P.+)/$", _origin_save_requests_list, name="origin-save-requests-list", ), + url( + r"^save/task/info/(?P.+)/", + _save_origin_task_info, + name="origin-save-task-info", + ), ] diff --git a/swh/web/templates/misc/origin-save.html b/swh/web/templates/misc/origin-save.html index cb3bc5bc..ead6ced3 100644 --- a/swh/web/templates/misc/origin-save.html +++ b/swh/web/templates/misc/origin-save.html @@ -1,124 +1,125 @@ {% extends "../layout.html" %} {% comment %} Copyright (C) 2018-2019 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% load render_bundle from webpack_loader %} {% load static %} {% block title %}{{ heading }} – Software Heritage archive{% endblock %} {% block header %} {% render_bundle 'save' %} {% endblock %} {% block navbar-content %}

Save code now

{% endblock %} {% block content %}

You can contribute to extend the content of the Software Heritage archive by submitting an origin save request. To do so, fill the required info in the form below:

{% csrf_token %}
The visit type must be specified
The origin url is not valid or does not reference a code repository

A "Save code now" request takes the following parameters:

  • Visit type: the type of version control system the software origin is using. Currently, the supported types are:
  • Origin url: the url of the remote repository for the software origin.
    In order to avoid saving errors from Software Heritage, you should provide the clone/checkout url as given by the provider hosting the software origin.
    It can easily be found in the web interface used to browse the software origin.
    For instance, if you want to save a git origin into the archive, you should check that the command $ git clone <origin_url>
    does not return an error before submitting a request.

Once submitted, your save request can either be:

  • accepted: a visit to the provided origin will then be scheduled by Software Heritage in order to load its content into the archive as soon as possible
  • rejected: the provided origin url is blacklisted and no visit will be scheduled
  • put in pending state: a manual review will then be performed in order to determine if the origin can be safely loaded or not into the archive

Once a save request has been accepted, you can follow its current status in the submitted save requests list.

+
Date Type Url Request StatusInfo

{% endblock %} \ No newline at end of file diff --git a/swh/web/tests/common/test_origin_save.py b/swh/web/tests/common/test_origin_save.py index a414821b..4e24721a 100644 --- a/swh/web/tests/common/test_origin_save.py +++ b/swh/web/tests/common/test_origin_save.py @@ -1,204 +1,226 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import re from datetime import datetime, timedelta, timezone from functools import partial import pytest import requests from swh.core.pytest_plugin import get_response_cb from swh.web.common.models import SaveOriginRequest from swh.web.common.origin_save import ( get_save_origin_task_info, get_save_origin_requests, ) from swh.web.common.typing import OriginVisitInfo from swh.web.config import get_config _es_url = "http://esnode1.internal.softwareheritage.org:9200" _es_workers_index_url = "%s/swh_workers-*" % _es_url _origin_url = "https://gitlab.com/inkscape/inkscape" _visit_type = "git" _task_id = 203525448 @pytest.fixture(autouse=True) def requests_mock_datadir(datadir, requests_mock_datadir): """Override default behavior to deal with post method """ cb = partial(get_response_cb, datadir=datadir) requests_mock_datadir.post(re.compile("https?://"), body=cb) return requests_mock_datadir @pytest.mark.django_db def test_get_save_origin_archived_task_info(mocker): _get_save_origin_task_info_test(mocker, task_archived=True) @pytest.mark.django_db -def test_get_save_origin_task_info_with_es(mocker): +def test_get_save_origin_task_full_info_with_es(mocker): _get_save_origin_task_info_test(mocker, es_available=True) +@pytest.mark.django_db +def test_get_save_origin_task_info_with_es(mocker): + _get_save_origin_task_info_test(mocker, es_available=True, full_info=False) + + @pytest.mark.django_db def test_get_save_origin_task_info_without_es(mocker): _get_save_origin_task_info_test(mocker, es_available=False) def _mock_scheduler(mocker, task_status="succeed", task_archived=False): mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler") task = ( { "arguments": {"args": [], "kwargs": {"repo_url": _origin_url},}, "current_interval": timedelta(days=64), "id": _task_id, "next_run": datetime.now(tz=timezone.utc) + timedelta(days=64), "policy": "oneshot", "priority": "high", "retries_left": 0, "status": "disabled", "type": "load-git", } if not task_archived else None ) - mock_scheduler.get_tasks.return_value = [task] + mock_scheduler.get_tasks.return_value = [dict(task) if task else None] task_run = { "backend_id": "f00c712c-e820-41ce-a07c-9bf8df914205", "ended": datetime.now(tz=timezone.utc) + timedelta(minutes=5), "id": 654270631, "metadata": {}, "scheduled": datetime.now(tz=timezone.utc), "started": None, "status": task_status, "task": _task_id, } - mock_scheduler.get_task_runs.return_value = [task_run] + mock_scheduler.get_task_runs.return_value = [dict(task_run)] return task, task_run -def _get_save_origin_task_info_test(mocker, task_archived=False, es_available=True): +def _get_save_origin_task_info_test( + mocker, task_archived=False, es_available=True, full_info=True +): swh_web_config = get_config() if es_available: swh_web_config.update({"es_workers_index_url": _es_workers_index_url}) else: swh_web_config.update({"es_workers_index_url": ""}) sor = SaveOriginRequest.objects.create( request_date=datetime.now(tz=timezone.utc), visit_type=_visit_type, origin_url="https://gitlab.com/inkscape/inkscape", status="accepted", visit_date=datetime.now(tz=timezone.utc) + timedelta(hours=1), loading_task_id=_task_id, ) task, task_run = _mock_scheduler(mocker, task_archived=task_archived) es_response = requests.post("%s/_search" % _es_workers_index_url).json() task_exec_data = es_response["hits"]["hits"][-1]["_source"] - sor_task_info = get_save_origin_task_info(sor.id) + sor_task_info = get_save_origin_task_info(sor.id, full_info=full_info) expected_result = ( { "type": task["type"], "arguments": task["arguments"], "id": task["id"], "backend_id": task_run["backend_id"], "scheduled": task_run["scheduled"], + "started": task_run["started"], "ended": task_run["ended"], "status": task_run["status"], } if not task_archived else {} ) if es_available and not task_archived: expected_result.update( { "message": task_exec_data["message"], "name": task_exec_data["swh_task_name"], "worker": task_exec_data["hostname"], } ) + if not full_info: + expected_result.pop("id", None) + expected_result.pop("backend_id", None) + expected_result.pop("worker", None) + if "message" in expected_result: + message = "" + message_lines = expected_result["message"].split("\n") + for line in message_lines: + if line.startswith("Traceback"): + break + message += f"{line}\n" + message += message_lines[-1] + expected_result["message"] = message + assert sor_task_info == expected_result @pytest.mark.django_db def test_get_save_origin_requests_find_visit_date(mocker): # create a save request SaveOriginRequest.objects.create( request_date=datetime.now(tz=timezone.utc), visit_type=_visit_type, origin_url=_origin_url, status="accepted", visit_date=None, loading_task_id=_task_id, ) # mock scheduler and services _mock_scheduler(mocker) mock_service = mocker.patch("swh.web.common.origin_save.service") mock_service.lookup_origin.return_value = {"url": _origin_url} mock_get_origin_visits = mocker.patch( "swh.web.common.origin_save.get_origin_visits" ) # create a visit for the save request visit_date = datetime.now(tz=timezone.utc).isoformat() visit_info = OriginVisitInfo( date=visit_date, formatted_date="", metadata={}, origin=_origin_url, snapshot="", status="full", type=_visit_type, url="", visit=34, ) mock_get_origin_visits.return_value = [visit_info] # check visit date has been correctly found sors = get_save_origin_requests(_visit_type, _origin_url) assert len(sors) == 1 assert sors[0]["visit_date"] == visit_date mock_get_origin_visits.assert_called_once() # check visit is not searched again when it has been found get_save_origin_requests(_visit_type, _origin_url) mock_get_origin_visits.assert_called_once() # check visit date are not searched for save requests older than # one month sor = SaveOriginRequest.objects.create( visit_type=_visit_type, origin_url=_origin_url, status="accepted", loading_task_id=_task_id, visit_date=None, ) sor.request_date = datetime.now(tz=timezone.utc) - timedelta(days=31) sor.save() _mock_scheduler(mocker, task_status="failed") sors = get_save_origin_requests(_visit_type, _origin_url) assert len(sors) == 2 assert sors[0]["visit_date"] is None mock_get_origin_visits.assert_called_once()