diff --git a/assets/src/bundles/admin/origin-save.js b/assets/src/bundles/admin/origin-save.js index 773fbd5d..f7d08220 100644 --- a/assets/src/bundles/admin/origin-save.js +++ b/assets/src/bundles/admin/origin-save.js @@ -1,412 +1,414 @@ /** - * Copyright (C) 2018-2020 The Software Heritage developers + * Copyright (C) 2018-2021 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ import {handleFetchError, csrfPost, htmlAlert} from 'utils/functions'; import {swhSpinnerSrc} from 'utils/constants'; let authorizedOriginTable; let unauthorizedOriginTable; let pendingSaveRequestsTable; let acceptedSaveRequestsTable; let rejectedSaveRequestsTable; function enableRowSelection(tableSel) { $(`${tableSel} tbody`).on('click', 'tr', function() { if ($(this).hasClass('selected')) { $(this).removeClass('selected'); $(tableSel).closest('.tab-pane').find('.swh-action-need-selection').prop('disabled', true); } else { $(`${tableSel} tr.selected`).removeClass('selected'); $(this).addClass('selected'); $(tableSel).closest('.tab-pane').find('.swh-action-need-selection').prop('disabled', false); } }); } export function initOriginSaveAdmin() { $(document).ready(() => { $.fn.dataTable.ext.errMode = 'throw'; authorizedOriginTable = $('#swh-authorized-origin-urls').DataTable({ serverSide: true, ajax: Urls.admin_origin_save_authorized_urls_list(), columns: [{data: 'url', name: 'url'}], scrollY: '50vh', scrollCollapse: true, info: false }); enableRowSelection('#swh-authorized-origin-urls'); swh.webapp.addJumpToPagePopoverToDataTable(authorizedOriginTable); unauthorizedOriginTable = $('#swh-unauthorized-origin-urls').DataTable({ serverSide: true, ajax: Urls.admin_origin_save_unauthorized_urls_list(), columns: [{data: 'url', name: 'url'}], scrollY: '50vh', scrollCollapse: true, info: false }); enableRowSelection('#swh-unauthorized-origin-urls'); swh.webapp.addJumpToPagePopoverToDataTable(unauthorizedOriginTable); const columnsData = [ { data: 'id', name: 'id', visible: false, searchable: false }, { data: 'save_request_date', name: 'request_date', render: (data, type, row) => { if (type === 'display') { const date = new Date(data); return date.toLocaleString(); } return data; } }, { data: 'visit_type', name: 'visit_type' }, { data: 'origin_url', name: 'origin_url', render: (data, type, row) => { if (type === 'display') { let html = ''; const sanitizedURL = $.fn.dataTable.render.text().display(data); if (row.save_task_status === 'succeeded') { let browseOriginUrl = `${Urls.browse_origin()}?origin_url=${encodeURIComponent(sanitizedURL)}`; if (row.visit_date) { browseOriginUrl += `&timestamp=${encodeURIComponent(row.visit_date)}`; } html += `${sanitizedURL}`; } else { html += sanitizedURL; } html += ` ` + ''; return html; } return data; } } ]; pendingSaveRequestsTable = $('#swh-origin-save-pending-requests').DataTable({ serverSide: true, processing: true, language: { processing: `` }, ajax: Urls.origin_save_requests_list('pending'), searchDelay: 1000, columns: columnsData, scrollY: '50vh', scrollCollapse: true, order: [[0, 'desc']], responsive: { details: { type: 'none' } } }); enableRowSelection('#swh-origin-save-pending-requests'); swh.webapp.addJumpToPagePopoverToDataTable(pendingSaveRequestsTable); + columnsData.push({ + name: 'info', + render: (data, type, row) => { + if (row.save_task_status === 'succeeded' || row.save_task_status === 'failed' || + row.note != null) { + return ``; + } else { + return ''; + } + } + }); + rejectedSaveRequestsTable = $('#swh-origin-save-rejected-requests').DataTable({ serverSide: true, processing: true, language: { processing: `` }, ajax: Urls.origin_save_requests_list('rejected'), searchDelay: 1000, columns: columnsData, scrollY: '50vh', scrollCollapse: true, order: [[0, 'desc']], responsive: { details: { type: 'none' } } }); enableRowSelection('#swh-origin-save-rejected-requests'); swh.webapp.addJumpToPagePopoverToDataTable(rejectedSaveRequestsTable); - columnsData.push({ + columnsData.splice(columnsData.length - 1, 0, { data: 'save_task_status', name: 'save_task_status' }); - columnsData.push({ - name: 'info', - render: (data, type, row) => { - if (row.save_task_status === 'succeeded' || row.save_task_status === 'failed') { - return '`; - } else { - return ''; - } - } - }); - acceptedSaveRequestsTable = $('#swh-origin-save-accepted-requests').DataTable({ serverSide: true, processing: true, language: { processing: `` }, ajax: Urls.origin_save_requests_list('accepted'), searchDelay: 1000, columns: columnsData, scrollY: '50vh', scrollCollapse: true, order: [[0, 'desc']], responsive: { details: { type: 'none' } } }); enableRowSelection('#swh-origin-save-accepted-requests'); swh.webapp.addJumpToPagePopoverToDataTable(acceptedSaveRequestsTable); $('#swh-origin-save-requests-nav-item').on('shown.bs.tab', () => { pendingSaveRequestsTable.draw(); }); $('#swh-origin-save-url-filters-nav-item').on('shown.bs.tab', () => { authorizedOriginTable.draw(); }); $('#swh-authorized-origins-tab').on('shown.bs.tab', () => { authorizedOriginTable.draw(); }); $('#swh-unauthorized-origins-tab').on('shown.bs.tab', () => { unauthorizedOriginTable.draw(); }); $('#swh-save-requests-pending-tab').on('shown.bs.tab', () => { pendingSaveRequestsTable.draw(); }); $('#swh-save-requests-accepted-tab').on('shown.bs.tab', () => { acceptedSaveRequestsTable.draw(); }); $('#swh-save-requests-rejected-tab').on('shown.bs.tab', () => { rejectedSaveRequestsTable.draw(); }); $('#swh-save-requests-pending-tab').click(() => { pendingSaveRequestsTable.ajax.reload(null, false); }); $('#swh-save-requests-accepted-tab').click(() => { acceptedSaveRequestsTable.ajax.reload(null, false); }); $('#swh-save-requests-rejected-tab').click(() => { rejectedSaveRequestsTable.ajax.reload(null, false); }); $('body').on('click', e => { if ($(e.target).parents('.popover').length > 0) { e.stopPropagation(); } else if ($(e.target).parents('.swh-save-request-info').length === 0) { $('.swh-save-request-info').popover('dispose'); } }); }); } export async function addAuthorizedOriginUrl() { const originUrl = $('#swh-authorized-url-prefix').val(); const addOriginUrl = Urls.admin_origin_save_add_authorized_url(originUrl); try { const response = await csrfPost(addOriginUrl); handleFetchError(response); authorizedOriginTable.row.add({'url': originUrl}).draw(); $('.swh-add-authorized-origin-status').html( htmlAlert('success', 'The origin url prefix has been successfully added in the authorized list.', true) ); } catch (_) { $('.swh-add-authorized-origin-status').html( htmlAlert('warning', 'The provided origin url prefix is already registered in the authorized list.', true) ); } } export async function removeAuthorizedOriginUrl() { const originUrl = $('#swh-authorized-origin-urls tr.selected').text(); if (originUrl) { const removeOriginUrl = Urls.admin_origin_save_remove_authorized_url(originUrl); try { const response = await csrfPost(removeOriginUrl); handleFetchError(response); authorizedOriginTable.row('.selected').remove().draw(); } catch (_) {} } } export async function addUnauthorizedOriginUrl() { const originUrl = $('#swh-unauthorized-url-prefix').val(); const addOriginUrl = Urls.admin_origin_save_add_unauthorized_url(originUrl); try { const response = await csrfPost(addOriginUrl); handleFetchError(response); unauthorizedOriginTable.row.add({'url': originUrl}).draw(); $('.swh-add-unauthorized-origin-status').html( htmlAlert('success', 'The origin url prefix has been successfully added in the unauthorized list.', true) ); } catch (_) { $('.swh-add-unauthorized-origin-status').html( htmlAlert('warning', 'The provided origin url prefix is already registered in the unauthorized list.', true) ); } } export async function removeUnauthorizedOriginUrl() { const originUrl = $('#swh-unauthorized-origin-urls tr.selected').text(); if (originUrl) { const removeOriginUrl = Urls.admin_origin_save_remove_unauthorized_url(originUrl); try { const response = await csrfPost(removeOriginUrl); handleFetchError(response); unauthorizedOriginTable.row('.selected').remove().draw(); } catch (_) {}; } } export function acceptOriginSaveRequest() { const selectedRow = pendingSaveRequestsTable.row('.selected'); if (selectedRow.length) { const acceptOriginSaveRequestCallback = async() => { const rowData = selectedRow.data(); const acceptSaveRequestUrl = Urls.admin_origin_save_request_accept(rowData['visit_type'], rowData['origin_url']); await csrfPost(acceptSaveRequestUrl); pendingSaveRequestsTable.ajax.reload(null, false); }; swh.webapp.showModalConfirm( 'Accept origin save request ?', 'Are you sure to accept this origin save request ?', acceptOriginSaveRequestCallback); } } const rejectModalHtml = `
`; export function rejectOriginSaveRequest() { const selectedRow = pendingSaveRequestsTable.row('.selected'); const rowData = selectedRow.data(); if (selectedRow.length) { const rejectOriginSaveRequestCallback = async() => { $('#swh-web-modal-html').modal('hide'); const rejectSaveRequestUrl = Urls.admin_origin_save_request_reject( rowData['visit_type'], rowData['origin_url']); await csrfPost(rejectSaveRequestUrl, {}, JSON.stringify({note: $('#swh-rejection-text').val()})); pendingSaveRequestsTable.ajax.reload(null, false); }; let currentRejectionReason = 'custom'; const rejectionTexts = {}; swh.webapp.showModalHtml('Reject origin save request ?', rejectModalHtml); $('#swh-rejection-reason').on('change', (event) => { // backup current textarea value rejectionTexts[currentRejectionReason] = $('#swh-rejection-text').val(); currentRejectionReason = event.target.value; let newRejectionText = ''; if (rejectionTexts.hasOwnProperty(currentRejectionReason)) { // restore previous textarea value newRejectionText = rejectionTexts[currentRejectionReason]; } else { // fill textarea with default text according to rejection type if (currentRejectionReason === 'invalid-origin') { newRejectionText = `The origin with URL ${rowData['origin_url']} is not ` + `a link to a ${rowData['visit_type']} repository.`; } else if (currentRejectionReason === 'invalid-origin-type') { newRejectionText = `The origin with URL ${rowData['origin_url']} is not ` + `of type ${rowData['visit_type']}.`; } else if (currentRejectionReason === 'origin-not-found') { newRejectionText = `The origin with URL ${rowData['origin_url']} cannot be found.`; } } $('#swh-rejection-text').val(newRejectionText); }); $('#swh-rejection-form').on('submit', (event) => { event.preventDefault(); event.stopPropagation(); // ensure confirmation modal will be displayed above the html modal $('#swh-web-modal-html').css('z-index', 4000); swh.webapp.showModalConfirm( 'Reject origin save request ?', 'Are you sure to reject this origin save request ?', rejectOriginSaveRequestCallback); }); } } function removeOriginSaveRequest(requestTable) { const selectedRow = requestTable.row('.selected'); if (selectedRow.length) { const requestId = selectedRow.data()['id']; const removeOriginSaveRequestCallback = async() => { const removeSaveRequestUrl = Urls.admin_origin_save_request_remove(requestId); await csrfPost(removeSaveRequestUrl); requestTable.ajax.reload(null, false); }; swh.webapp.showModalConfirm( 'Remove origin save request ?', 'Are you sure to remove this origin save request ?', removeOriginSaveRequestCallback); } } export function removePendingOriginSaveRequest() { removeOriginSaveRequest(pendingSaveRequestsTable); } export function removeAcceptedOriginSaveRequest() { removeOriginSaveRequest(acceptedSaveRequestsTable); } export function removeRejectedOriginSaveRequest() { removeOriginSaveRequest(rejectedSaveRequestsTable); } diff --git a/assets/src/bundles/save/index.js b/assets/src/bundles/save/index.js index 1a893b32..8a6681cd 100644 --- a/assets/src/bundles/save/index.js +++ b/assets/src/bundles/save/index.js @@ -1,561 +1,565 @@ /** * Copyright (C) 2018-2021 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ import {csrfPost, handleFetchError, isGitRepoUrl, htmlAlert, removeUrlFragment, getCanonicalOriginURL} from 'utils/functions'; import {swhSpinnerSrc} from 'utils/constants'; import artifactFormRowTemplate from './artifact-form-row.ejs'; let saveRequestsTable; async function originSaveRequest( originType, originUrl, extraData, acceptedCallback, pendingCallback, errorCallback ) { // Actually trigger the origin save request const addSaveOriginRequestUrl = Urls.api_1_save_origin(originType, originUrl); $('.swh-processing-save-request').css('display', 'block'); let headers = {}; let body = null; if (extraData !== {}) { body = JSON.stringify(extraData); headers = { 'Content-Type': 'application/json' }; }; try { const response = await csrfPost(addSaveOriginRequestUrl, headers, body); handleFetchError(response); const data = await response.json(); $('.swh-processing-save-request').css('display', 'none'); if (data.save_request_status === 'accepted') { acceptedCallback(); } else { pendingCallback(); } } catch (response) { $('.swh-processing-save-request').css('display', 'none'); const errorData = await response.json(); errorCallback(response.status, errorData); }; } function addArtifactVersionAutofillHandler(formId) { // autofill artifact version input with the filename from // the artifact url without extensions $(`#swh-input-artifact-url-${formId}`).on('input', function(event) { const artifactUrl = $(this).val().trim(); let filename = artifactUrl.split('/').slice(-1)[0]; if (filename !== artifactUrl) { filename = filename.replace(/tar.*$/, 'tar'); const filenameNoExt = filename.split('.').slice(0, -1).join('.'); const artifactVersion = $(`#swh-input-artifact-version-${formId}`); if (filenameNoExt !== filename) { artifactVersion.val(filenameNoExt); } } }); } export function maybeRequireExtraInputs() { // Read the actual selected value and depending on the origin type, display some extra // inputs or hide them. This makes the extra inputs disabled when not displayed. const originType = $('#swh-input-visit-type').val(); let display = 'none'; let disabled = true; if (originType === 'archives') { display = 'flex'; disabled = false; } $('.swh-save-origin-archives-form').css('display', display); if (!disabled) { // help paragraph must have block display for proper rendering $('#swh-save-origin-archives-help').css('display', 'block'); } $('.swh-save-origin-archives-form .form-control').prop('disabled', disabled); if (originType === 'archives' && $('.swh-save-origin-archives-form').length === 1) { // insert first artifact row when the archives visit type is selected for the first time $('.swh-save-origin-archives-form').last().after( artifactFormRowTemplate({deletableRow: false, formId: 0})); addArtifactVersionAutofillHandler(0); } } export function addArtifactFormRow() { const formId = $('.swh-save-origin-artifact-form').length; $('.swh-save-origin-artifact-form').last().after( artifactFormRowTemplate({ deletableRow: true, formId: formId }) ); addArtifactVersionAutofillHandler(formId); } export function deleteArtifactFormRow(event) { $(event.target).closest('.swh-save-origin-artifact-form').remove(); } const userRequestsFilterCheckbox = `
`; export function initOriginSave() { $(document).ready(() => { $.fn.dataTable.ext.errMode = 'none'; // set git as the default value as before $('#swh-input-visit-type').val('git'); saveRequestsTable = $('#swh-origin-save-requests') .on('error.dt', (e, settings, techNote, message) => { $('#swh-origin-save-request-list-error').text('An error occurred while retrieving the save requests list'); console.log(message); }) .DataTable({ serverSide: true, processing: true, language: { processing: `` }, ajax: { url: Urls.origin_save_requests_list('all'), data: (d) => { if (swh.webapp.isUserLoggedIn() && $('#swh-save-requests-user-filter').prop('checked')) { d.user_requests_only = '1'; } } }, searchDelay: 1000, // see https://datatables.net/examples/advanced_init/dom_toolbar.html and the comments section // this option customizes datatables UI components by adding an extra checkbox above the table // while keeping bootstrap layout dom: '<"row"<"col-sm-3"l><"col-sm-6 text-left user-requests-filter"><"col-sm-3"f>>' + '<"row"<"col-sm-12"tr>>' + '<"row"<"col-sm-5"i><"col-sm-7"p>>', fnInitComplete: function() { if (swh.webapp.isUserLoggedIn()) { $('div.user-requests-filter').html(userRequestsFilterCheckbox); $('#swh-save-requests-user-filter').on('change', () => { saveRequestsTable.draw(); }); } }, columns: [ { data: 'save_request_date', name: 'request_date', render: (data, type, row) => { if (type === 'display') { const date = new Date(data); return date.toLocaleString(); } return data; } }, { data: 'visit_type', name: 'visit_type' }, { data: 'origin_url', name: 'origin_url', render: (data, type, row) => { if (type === 'display') { let html = ''; const sanitizedURL = $.fn.dataTable.render.text().display(data); if (row.save_task_status === 'succeeded') { let browseOriginUrl = `${Urls.browse_origin()}?origin_url=${encodeURIComponent(sanitizedURL)}`; if (row.visit_date) { browseOriginUrl += `&timestamp=${encodeURIComponent(row.visit_date)}`; } html += `${sanitizedURL}`; } else { html += sanitizedURL; } html += ` ` + ''; return html; } return data; } }, { data: 'save_request_status', name: 'status' }, { data: 'save_task_status', name: 'loading_task_status' }, { name: 'info', render: (data, type, row) => { - if (row.save_task_status === 'succeeded' || row.save_task_status === 'failed') { - return ``; + if (row.save_task_status === 'succeeded' || row.save_task_status === 'failed' || + row.note != null) { + return ``; } else { return ''; } } }, { render: (data, type, row) => { if (row.save_request_status === 'accepted') { const saveAgainButton = ''; return saveAgainButton; } else { return ''; } } } ], scrollY: '50vh', scrollCollapse: true, order: [[0, 'desc']], responsive: { details: { type: 'none' } } }); swh.webapp.addJumpToPagePopoverToDataTable(saveRequestsTable); $('#swh-origin-save-requests-list-tab').on('shown.bs.tab', () => { saveRequestsTable.draw(); window.location.hash = '#requests'; }); $('#swh-origin-save-request-help-tab').on('shown.bs.tab', () => { removeUrlFragment(); $('.swh-save-request-info').popover('dispose'); }); const saveRequestAcceptedAlert = htmlAlert( 'success', 'The "save code now" request has been accepted and will be processed as soon as possible.', true ); const saveRequestPendingAlert = htmlAlert( 'warning', 'The "save code now" request has been put in pending state and may be accepted for processing after manual review.', true ); const saveRequestRateLimitedAlert = htmlAlert( 'danger', 'The rate limit for "save code now" requests has been reached. Please try again later.', true ); const saveRequestUnknownErrorAlert = htmlAlert( 'danger', 'An unexpected error happened when submitting the "save code now request".', true ); $('#swh-save-origin-form').submit(async event => { event.preventDefault(); event.stopPropagation(); $('.alert').alert('close'); if (event.target.checkValidity()) { $(event.target).removeClass('was-validated'); const originType = $('#swh-input-visit-type').val(); let originUrl = $('#swh-input-origin-url').val(); originUrl = await getCanonicalOriginURL(originUrl); // read the extra inputs for the 'archives' type const extraData = {}; if (originType === 'archives') { extraData['archives_data'] = []; for (let i = 0; i < $('.swh-save-origin-artifact-form').length; ++i) { extraData['archives_data'].push({ 'artifact_url': $(`#swh-input-artifact-url-${i}`).val(), 'artifact_version': $(`#swh-input-artifact-version-${i}`).val() }); } } originSaveRequest(originType, originUrl, extraData, () => $('#swh-origin-save-request-status').html(saveRequestAcceptedAlert), () => $('#swh-origin-save-request-status').html(saveRequestPendingAlert), (statusCode, errorData) => { $('#swh-origin-save-request-status').css('color', 'red'); if (statusCode === 403) { const errorAlert = htmlAlert('danger', `Error: ${errorData['reason']}`); $('#swh-origin-save-request-status').html(errorAlert); } else if (statusCode === 429) { $('#swh-origin-save-request-status').html(saveRequestRateLimitedAlert); } else if (statusCode === 400) { const errorAlert = htmlAlert('danger', errorData['reason']); $('#swh-origin-save-request-status').html(errorAlert); } else { $('#swh-origin-save-request-status').html(saveRequestUnknownErrorAlert); } }); } else { $(event.target).addClass('was-validated'); } }); $('#swh-show-origin-save-requests-list').on('click', (event) => { event.preventDefault(); $('.nav-tabs a[href="#swh-origin-save-requests-list"]').tab('show'); }); $('#swh-input-origin-url').on('input', function(event) { const originUrl = $(this).val().trim(); $(this).val(originUrl); $('#swh-input-visit-type option').each(function() { const val = $(this).val(); if (val && originUrl.includes(val)) { $(this).prop('selected', true); } }); }); if (window.location.hash === '#requests') { $('.nav-tabs a[href="#swh-origin-save-requests-list"]').tab('show'); } }); } export function validateSaveOriginUrl(input) { const originType = $('#swh-input-visit-type').val(); let originUrl = null; let validUrl = true; try { originUrl = new URL(input.value.trim()); } catch (TypeError) { validUrl = false; } if (validUrl) { const allowedProtocols = ['http:', 'https:', 'svn:', 'git:', 'rsync:', 'pserver:', 'ssh:']; validUrl = ( allowedProtocols.find(protocol => protocol === originUrl.protocol) !== undefined ); } if (validUrl && originType === 'git') { // additional checks for well known code hosting providers switch (originUrl.hostname) { case 'github.com': validUrl = isGitRepoUrl(originUrl); break; case 'git.code.sf.net': validUrl = isGitRepoUrl(originUrl, '/p/'); break; case 'bitbucket.org': validUrl = isGitRepoUrl(originUrl); break; default: if (originUrl.hostname.startsWith('gitlab.')) { validUrl = isGitRepoUrl(originUrl); } break; } } if (validUrl) { input.setCustomValidity(''); } else { input.setCustomValidity('The origin url is not valid or does not reference a code repository'); } } export function initTakeNewSnapshot() { const newSnapshotRequestAcceptedAlert = htmlAlert( 'success', 'The "take new snapshot" request has been accepted and will be processed as soon as possible.', true ); const newSnapshotRequestPendingAlert = htmlAlert( 'warning', 'The "take new snapshot" request has been put in pending state and may be accepted for processing after manual review.', true ); const newSnapshotRequestRateLimitAlert = htmlAlert( 'danger', 'The rate limit for "take new snapshot" requests has been reached. Please try again later.', true ); const newSnapshotRequestUnknownErrorAlert = htmlAlert( 'danger', 'An unexpected error happened when submitting the "save code now request".', true ); $(document).ready(() => { $('#swh-take-new-snapshot-form').submit(event => { event.preventDefault(); event.stopPropagation(); const originType = $('#swh-input-visit-type').val(); const originUrl = $('#swh-input-origin-url').val(); const extraData = {}; originSaveRequest(originType, originUrl, extraData, () => $('#swh-take-new-snapshot-request-status').html(newSnapshotRequestAcceptedAlert), () => $('#swh-take-new-snapshot-request-status').html(newSnapshotRequestPendingAlert), (statusCode, errorData) => { $('#swh-take-new-snapshot-request-status').css('color', 'red'); if (statusCode === 403) { const errorAlert = htmlAlert('danger', `Error: ${errorData['detail']}`, true); $('#swh-take-new-snapshot-request-status').html(errorAlert); } else if (statusCode === 429) { $('#swh-take-new-snapshot-request-status').html(newSnapshotRequestRateLimitAlert); } else { $('#swh-take-new-snapshot-request-status').html(newSnapshotRequestUnknownErrorAlert); } }); }); }); } export function formatValuePerType(type, value) { // Given some typed value, format and return accordingly formatted value const mapFormatPerTypeFn = { 'json': (v) => JSON.stringify(v, null, 2), 'date': (v) => new Date(v).toLocaleString(), 'raw': (v) => v, 'duration': (v) => v + ' seconds' }; return value === null ? null : mapFormatPerTypeFn[type](value); } export async function displaySaveRequestInfo(event, saveRequestId) { event.stopPropagation(); const saveRequestTaskInfoUrl = Urls.origin_save_task_info(saveRequestId); // close popover when clicking again on the info icon if ($(event.target).data('bs.popover')) { $(event.target).popover('dispose'); return; } $('.swh-save-request-info').popover('dispose'); $(event.target).popover({ animation: false, boundary: 'viewport', container: 'body', title: 'Save request task information ' + '`, content: `

Fetching task information ...

`, html: true, placement: 'left', sanitizeFn: swh.webapp.filterXSS }); $(event.target).on('shown.bs.popover', function() { const popoverId = $(this).attr('aria-describedby'); $(`#${popoverId} .mdi-close`).click(() => { $(this).popover('dispose'); }); }); $(event.target).popover('show'); const response = await fetch(saveRequestTaskInfoUrl); const saveRequestTaskInfo = await response.json(); let content; if ($.isEmptyObject(saveRequestTaskInfo)) { content = 'Not available'; + + } else if (saveRequestTaskInfo.note != null) { + content = saveRequestTaskInfo.note; } else { const saveRequestInfo = []; const taskData = { 'Type': ['raw', 'type'], 'Visit status': ['raw', 'visit_status'], 'Arguments': ['json', 'arguments'], 'Id': ['raw', 'id'], 'Backend id': ['raw', 'backend_id'], 'Scheduling date': ['date', 'scheduled'], 'Start date': ['date', 'started'], 'Completion date': ['date', 'ended'], 'Duration': ['duration', 'duration'], 'Runner': ['raw', 'worker'], 'Log': ['raw', 'message'] }; for (const [title, [type, property]] of Object.entries(taskData)) { if (saveRequestTaskInfo.hasOwnProperty(property)) { saveRequestInfo.push({ key: title, value: formatValuePerType(type, saveRequestTaskInfo[property]) }); } } content = ''; for (const info of saveRequestInfo) { content += ``; } content += '
'; } $('.swh-popover').html(content); $(event.target).popover('update'); } export function fillSaveRequestFormAndScroll(visitType, originUrl) { $('#swh-input-origin-url').val(originUrl); let originTypeFound = false; $('#swh-input-visit-type option').each(function() { const val = $(this).val(); if (val && originUrl.includes(val)) { $(this).prop('selected', true); originTypeFound = true; } }); if (!originTypeFound) { $('#swh-input-visit-type option').each(function() { const val = $(this).val(); if (val === visitType) { $(this).prop('selected', true); } }); } window.scrollTo(0, 0); } diff --git a/cypress/integration/admin.spec.js b/cypress/integration/admin.spec.js index 33ccf286..321d38ec 100644 --- a/cypress/integration/admin.spec.js +++ b/cypress/integration/admin.spec.js @@ -1,281 +1,295 @@ /** * Copyright (C) 2019-2021 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ const $ = Cypress.$; const defaultRedirect = '/admin/origin/save/'; let url; function logout() { cy.contains('a', 'logout') .click(); } describe('Test Admin Login/logout', function() { before(function() { url = this.Urls.admin(); }); it('should redirect to default page', function() { cy.visit(url) .get('input[name="username"]') .type('admin') .get('input[name="password"]') .type('admin') .get('.container form') .submit(); cy.location('pathname') .should('be.equal', defaultRedirect); logout(); }); it('should display admin-origin-save and deposit in sidebar', function() { cy.adminLogin(); cy.visit(url); cy.get(`.sidebar a[href="${this.Urls.admin_origin_save()}"]`) .should('be.visible'); cy.get(`.sidebar a[href="${this.Urls.admin_deposit()}"]`) .should('be.visible'); logout(); }); it('should display username on top-right', function() { cy.adminLogin(); cy.visit(url); cy.get('.swh-position-right') .should('contain', 'admin'); logout(); }); it('should get info about a user logged in from javascript', function() { cy.window().then(win => { expect(win.swh.webapp.isUserLoggedIn()).to.be.false; }); cy.adminLogin(); cy.visit(url); cy.window().then(win => { expect(win.swh.webapp.isUserLoggedIn()).to.be.true; }); logout(); cy.visit(url); cy.window().then(win => { expect(win.swh.webapp.isUserLoggedIn()).to.be.false; }); }); it('should prevent unauthorized access after logout', function() { cy.visit(this.Urls.admin_origin_save()) .location('pathname') .should('be.equal', '/admin/login/'); cy.visit(this.Urls.admin_deposit()) .location('pathname') .should('be.equal', '/admin/login/'); }); it('should redirect to correct page after login', function() { // mock calls to deposit list api to avoid possible errors // while running the test cy.intercept(`${this.Urls.admin_deposit_list()}**`, { body: { data: [], recordsTotal: 0, recordsFiltered: 0, draw: 1 } }); cy.visit(this.Urls.admin_deposit()) .location('search') .should('contain', `next=${this.Urls.admin_deposit()}`); cy.adminLogin(); cy.visit(this.Urls.admin_deposit()); cy.location('pathname') .should('be.equal', this.Urls.admin_deposit()); logout(); }); }); const existingRowToSelect = 'https://bitbucket.org/'; const originUrlListTestData = [ { listType: 'authorized', originToAdd: 'git://git.archlinux.org/', originToRemove: 'https://github.com/' }, { listType: 'unauthorized', originToAdd: 'https://random.org', originToRemove: 'https://gitlab.com' } ]; const capitalize = s => s.charAt(0).toUpperCase() + s.slice(1); describe('Test Admin Origin Save Urls Filtering', function() { beforeEach(function() { cy.adminLogin(); cy.visit(this.Urls.admin_origin_save()); cy.contains('a', 'Origin urls filtering') .click() .wait(500); }); it(`should select or unselect a table row by clicking on it`, function() { cy.contains(`#swh-authorized-origin-urls tr`, existingRowToSelect) .click() .should('have.class', 'selected') .click() .should('not.have.class', 'selected'); }); originUrlListTestData.forEach(testData => { it(`should add a new origin url prefix in the ${testData.listType} list`, function() { const tabName = capitalize(testData.listType) + ' urls'; cy.contains('a', tabName) .click() .wait(500); cy.get(`#swh-${testData.listType}-origin-urls tr`).each(elt => { if ($(elt).text() === testData.originToAdd) { cy.get(elt).click(); cy.get(`#swh-remove-${testData.listType}-origin-url`).click(); } }); cy.get(`#swh-${testData.listType}-url-prefix`) .type(testData.originToAdd); cy.get(`#swh-add-${testData.listType}-origin-url`) .click(); cy.contains(`#swh-${testData.listType}-origin-urls tr`, testData.originToAdd) .should('be.visible'); cy.contains('.alert-success', `The origin url prefix has been successfully added in the ${testData.listType} list.`) .should('be.visible'); cy.get(`#swh-add-${testData.listType}-origin-url`) .click(); cy.contains('.alert-warning', `The provided origin url prefix is already registered in the ${testData.listType} list.`) .should('be.visible'); }); it(`should remove an origin url prefix from the ${testData.listType} list`, function() { const tabName = capitalize(testData.listType) + ' urls'; cy.contains('a', tabName) .click(); let originUrlMissing = true; cy.get(`#swh-${testData.listType}-origin-urls tr`).each(elt => { if ($(elt).text() === testData.originToRemove) { originUrlMissing = false; } }); if (originUrlMissing) { cy.get(`#swh-${testData.listType}-url-prefix`) .type(testData.originToRemove); cy.get(`#swh-add-${testData.listType}-origin-url`) .click(); cy.get('.alert-dismissible button').click(); } cy.contains(`#swh-${testData.listType}-origin-urls tr`, testData.originToRemove) .click(); cy.get(`#swh-remove-${testData.listType}-origin-url`).click(); cy.contains(`#swh-${testData.listType}-origin-urls tr`, testData.originToRemove) .should('not.exist'); }); }); }); describe('Test Admin Origin Save', function() { it(`should reject a save code now request with note`, function() { const originUrl = `https://example.org/${Date.now()}`; const rejectionNote = 'The provided URL does not target a git repository.'; // anonymous user creates a request put in pending state cy.visit(this.Urls.origin_save()); cy.get('#swh-input-origin-url') .type(originUrl); cy.get('#swh-input-origin-save-submit') .click(); // admin user logs in and visits save code now admin page cy.adminLogin(); cy.visit(this.Urls.admin_origin_save()); // admin rejects the save request and adds a rejection note cy.contains('#swh-origin-save-pending-requests', originUrl) .click(); cy.get('#swh-reject-save-origin-request') .click(); cy.get('#swh-rejection-text') .then(textarea => { textarea.val(rejectionNote); }); cy.get('#swh-rejection-submit') .click(); cy.get('#swh-web-modal-confirm-ok-btn') .click(); // checks rejection note has been saved to swh-web database cy.request(this.Urls.api_1_save_origin('git', originUrl)) .then(response => { expect(response.body[0]['note']).to.equal(rejectionNote); }); + // check rejection note is displayed by clicking on the info icon + // in requests table from public save code now page + cy.visit(this.Urls.origin_save()); + cy.get('#swh-origin-save-requests-list-tab') + .click(); + + cy.contains('#swh-origin-save-requests tr', originUrl); + cy.get('.swh-save-request-info') + .eq(0) + .click(); + + cy.get('.popover-body') + .should('have.text', rejectionNote); + // remove rejected request from swh-web database to avoid side effects // in tests located in origin-save.spec.js cy.visit(this.Urls.admin_origin_save()); cy.get('#swh-save-requests-rejected-tab') .click(); cy.contains('#swh-origin-save-rejected-requests', originUrl) .click(); cy.get('#swh-remove-rejected-save-origin-request') .click(); cy.get('#swh-web-modal-confirm-ok-btn') .click(); }); }); diff --git a/swh/web/common/origin_save.py b/swh/web/common/origin_save.py index d762b6ce..3749d134 100644 --- a/swh/web/common/origin_save.py +++ b/swh/web/common/origin_save.py @@ -1,924 +1,935 @@ # Copyright (C) 2018-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from bisect import bisect_right from datetime import datetime, timedelta, timezone from functools import lru_cache from itertools import product import json import logging from typing import Any, Dict, List, Optional, Tuple from prometheus_client import Gauge import requests import sentry_sdk from django.core.exceptions import ObjectDoesNotExist, ValidationError from django.core.validators import URLValidator from django.db.models import Q, QuerySet from django.utils.html import escape from swh.scheduler.utils import create_oneshot_task_dict from swh.web.common import archive from swh.web.common.exc import BadInputExc, ForbiddenExc, NotFoundExc from swh.web.common.models import ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_PENDING, SAVE_REQUEST_REJECTED, SAVE_TASK_FAILED, SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_RUNNING, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, VISIT_STATUS_CREATED, VISIT_STATUS_ONGOING, SaveAuthorizedOrigin, SaveOriginRequest, SaveUnauthorizedOrigin, ) from swh.web.common.origin_visits import get_origin_visits from swh.web.common.typing import ( OriginExistenceCheckInfo, OriginInfo, SaveOriginRequestInfo, ) from swh.web.common.utils import SWH_WEB_METRICS_REGISTRY, parse_iso8601_date_to_utc from swh.web.config import get_config, scheduler logger = logging.getLogger(__name__) # Number of days in the past to lookup for information MAX_THRESHOLD_DAYS = 30 # Non terminal visit statuses which needs updates NON_TERMINAL_STATUSES = [ VISIT_STATUS_CREATED, VISIT_STATUS_ONGOING, ] def get_origin_save_authorized_urls() -> List[str]: """ Get the list of origin url prefixes authorized to be immediately loaded into the archive (whitelist). Returns: list: The list of authorized origin url prefix """ return [origin.url for origin in SaveAuthorizedOrigin.objects.all()] def get_origin_save_unauthorized_urls() -> List[str]: """ Get the list of origin url prefixes forbidden to be loaded into the archive (blacklist). Returns: list: the list of unauthorized origin url prefix """ return [origin.url for origin in SaveUnauthorizedOrigin.objects.all()] def can_save_origin(origin_url: str, bypass_pending_review: bool = False) -> str: """ Check if a software origin can be saved into the archive. Based on the origin url, the save request will be either: * immediately accepted if the url is whitelisted * rejected if the url is blacklisted * put in pending state for manual review otherwise Args: origin_url (str): the software origin url to check Returns: str: the origin save request status, either **accepted**, **rejected** or **pending** """ # origin url may be blacklisted for url_prefix in get_origin_save_unauthorized_urls(): if origin_url.startswith(url_prefix): return SAVE_REQUEST_REJECTED # if the origin url is in the white list, it can be immediately saved for url_prefix in get_origin_save_authorized_urls(): if origin_url.startswith(url_prefix): return SAVE_REQUEST_ACCEPTED # otherwise, the origin url needs to be manually verified if the user # that submitted it does not have special permission if bypass_pending_review: # mark the origin URL as trusted in that case SaveAuthorizedOrigin.objects.get_or_create(url=origin_url) return SAVE_REQUEST_ACCEPTED else: return SAVE_REQUEST_PENDING # map visit type to scheduler task # TODO: do not hardcode the task name here (T1157) _visit_type_task = { "git": "load-git", "hg": "load-hg", "svn": "load-svn", "cvs": "load-cvs", } _visit_type_task_privileged = { "archives": "load-archive-files", } # map scheduler task status to origin save status _save_task_status = { "next_run_not_scheduled": SAVE_TASK_NOT_YET_SCHEDULED, "next_run_scheduled": SAVE_TASK_SCHEDULED, "completed": SAVE_TASK_SUCCEEDED, "disabled": SAVE_TASK_FAILED, } # map scheduler task_run status to origin save status _save_task_run_status = { "scheduled": SAVE_TASK_SCHEDULED, "started": SAVE_TASK_RUNNING, "eventful": SAVE_TASK_SUCCEEDED, "uneventful": SAVE_TASK_SUCCEEDED, "failed": SAVE_TASK_FAILED, "permfailed": SAVE_TASK_FAILED, "lost": SAVE_TASK_FAILED, } @lru_cache() def get_scheduler_load_task_types() -> List[str]: task_types = scheduler().get_task_types() return [t["type"] for t in task_types if t["type"].startswith("load")] def get_savable_visit_types_dict(privileged_user: bool = False) -> Dict: """Returned the supported task types the user has access to. Args: privileged_user: Flag to determine if all visit types should be returned or not. Default to False to only list unprivileged visit types. Returns: the dict of supported visit types for the user """ if privileged_user: task_types = {**_visit_type_task, **_visit_type_task_privileged} else: task_types = _visit_type_task # filter visit types according to scheduler load task types if available try: load_task_types = get_scheduler_load_task_types() return {k: v for k, v in task_types.items() if v in load_task_types} except Exception: return task_types def get_savable_visit_types(privileged_user: bool = False) -> List[str]: """Return the list of visit types the user can perform save requests on. Args: privileged_user: Flag to determine if all visit types should be returned or not. Default to False to only list unprivileged visit types. Returns: the list of saveable visit types """ return sorted(list(get_savable_visit_types_dict(privileged_user).keys())) def _check_visit_type_savable(visit_type: str, privileged_user: bool = False) -> None: visit_type_tasks = get_savable_visit_types(privileged_user) if visit_type not in visit_type_tasks: allowed_visit_types = ", ".join(visit_type_tasks) raise BadInputExc( f"Visit of type {visit_type} can not be saved! " f"Allowed types are the following: {allowed_visit_types}" ) _validate_url = URLValidator( schemes=["http", "https", "svn", "git", "rsync", "pserver", "ssh"] ) def _check_origin_url_valid(origin_url: str) -> None: try: _validate_url(origin_url) except ValidationError: raise BadInputExc( "The provided origin url (%s) is not valid!" % escape(origin_url) ) def origin_exists(origin_url: str) -> OriginExistenceCheckInfo: """Check the origin url for existence. If it exists, extract some more useful information on the origin. """ resp = requests.head(origin_url, allow_redirects=True) exists = resp.ok content_length: Optional[int] = None last_modified: Optional[str] = None if exists: # Also process X-Archive-Orig-* headers in case the URL targets the # Internet Archive. size_ = resp.headers.get( "Content-Length", resp.headers.get("X-Archive-Orig-Content-Length") ) content_length = int(size_) if size_ else None try: date_str = resp.headers.get( "Last-Modified", resp.headers.get("X-Archive-Orig-Last-Modified", "") ) date = datetime.strptime(date_str, "%a, %d %b %Y %H:%M:%S %Z") last_modified = date.isoformat() except ValueError: # if not provided or not parsable as per the expected format, keep it None pass return OriginExistenceCheckInfo( origin_url=origin_url, exists=exists, last_modified=last_modified, content_length=content_length, ) def _check_origin_exists(url: str) -> OriginExistenceCheckInfo: """Ensure an URL exists, if not raise an explicit message.""" metadata = origin_exists(url) if not metadata["exists"]: raise BadInputExc(f"The provided url ({escape(url)}) does not exist!") return metadata def _get_visit_info_for_save_request( save_request: SaveOriginRequest, ) -> Tuple[Optional[datetime], Optional[str]]: """Retrieve visit information out of a save request Args: save_request: Input save origin request to retrieve information for. Returns: Tuple of (visit date, optional visit status) for such save request origin """ visit_date = None visit_status = None time_now = datetime.now(tz=timezone.utc) time_delta = time_now - save_request.request_date # stop trying to find a visit date one month after save request submission # as those requests to storage are expensive and associated loading task # surely ended up with errors if time_delta.days <= MAX_THRESHOLD_DAYS: try: origin_info = archive.lookup_origin(OriginInfo(url=save_request.origin_url)) origin_visits = get_origin_visits(origin_info) visit_dates = [parse_iso8601_date_to_utc(v["date"]) for v in origin_visits] i = bisect_right(visit_dates, save_request.request_date) if i != len(visit_dates): visit_date = visit_dates[i] visit_status = origin_visits[i]["status"] except Exception as exc: sentry_sdk.capture_exception(exc) return visit_date, visit_status def _check_visit_update_status( save_request: SaveOriginRequest, ) -> Tuple[Optional[datetime], Optional[str], Optional[str]]: """Given a save request, determine whether a save request was successful or failed. Args: save_request: Input save origin request to retrieve information for. Returns: Tuple of (optional visit date, optional visit status, optional save task status) for such save request origin """ visit_date, visit_status = _get_visit_info_for_save_request(save_request) loading_task_status = None if visit_date and visit_status in ("full", "partial"): # visit has been performed, mark the saving task as succeeded loading_task_status = SAVE_TASK_SUCCEEDED elif visit_status in ("created", "ongoing"): # visit is currently running loading_task_status = SAVE_TASK_RUNNING elif visit_status in ("not_found", "failed"): loading_task_status = SAVE_TASK_FAILED else: time_now = datetime.now(tz=timezone.utc) time_delta = time_now - save_request.request_date # consider the task as failed if it is still in scheduled state # 30 days after its submission if time_delta.days > MAX_THRESHOLD_DAYS: loading_task_status = SAVE_TASK_FAILED return visit_date, visit_status, loading_task_status def _compute_task_loading_status( task: Optional[Dict[str, Any]] = None, task_run: Optional[Dict[str, Any]] = None, ) -> Optional[str]: loading_task_status: Optional[str] = None # First determine the loading task status out of task information if task: loading_task_status = _save_task_status[task["status"]] if task_run: loading_task_status = _save_task_run_status[task_run["status"]] return loading_task_status def _update_save_request_info( save_request: SaveOriginRequest, task: Optional[Dict[str, Any]] = None, task_run: Optional[Dict[str, Any]] = None, ) -> SaveOriginRequestInfo: """Update save request information out of the visit status and fallback to the task and task_run information if the visit status is missing. Args: save_request: Save request task: Associated scheduler task information about the save request task_run: Most recent run occurrence of the associated task Returns: Summary of the save request information updated. """ must_save = False # To determine the save code now request's final status, the visit date must be set # and the visit status must be a final one. Once they do, the save code now is # definitely done. if ( not save_request.visit_date or not save_request.visit_status or save_request.visit_status in NON_TERMINAL_STATUSES ): visit_date, visit_status, loading_task_status = _check_visit_update_status( save_request ) if not loading_task_status: # fallback when not provided loading_task_status = _compute_task_loading_status(task, task_run) if visit_date != save_request.visit_date: must_save = True save_request.visit_date = visit_date if visit_status != save_request.visit_status: must_save = True save_request.visit_status = visit_status if ( loading_task_status is not None and loading_task_status != save_request.loading_task_status ): must_save = True save_request.loading_task_status = loading_task_status if must_save: save_request.save() return save_request.to_dict() def create_save_origin_request( visit_type: str, origin_url: str, privileged_user: bool = False, user_id: Optional[int] = None, **kwargs, ) -> SaveOriginRequestInfo: """Create a loading task to save a software origin into the archive. This function aims to create a software origin loading task trough the use of the swh-scheduler component. First, some checks are performed to see if the visit type and origin url are valid but also if the the save request can be accepted. For the 'archives' visit type, this also ensures the artifacts actually exists. If those checks passed, the loading task is then created. Otherwise, the save request is put in pending or rejected state. All the submitted save requests are logged into the swh-web database to keep track of them. Args: visit_type: the type of visit to perform (e.g. git, hg, svn, archives, ...) origin_url: the url of the origin to save privileged: Whether the user has some more privilege than other (bypass review, access to privileged other visit types) user_id: User identifier (provided when authenticated) kwargs: Optional parameters (e.g. artifact_url, artifact_filename, artifact_version) Raises: BadInputExc: the visit type or origin url is invalid or inexistent ForbiddenExc: the provided origin url is blacklisted Returns: dict: A dict describing the save request with the following keys: * **visit_type**: the type of visit to perform * **origin_url**: the url of the origin * **save_request_date**: the date the request was submitted * **save_request_status**: the request status, either **accepted**, **rejected** or **pending** * **save_task_status**: the origin loading task status, either **not created**, **not yet scheduled**, **scheduled**, **succeed** or **failed** """ visit_type_tasks = get_savable_visit_types_dict(privileged_user) _check_visit_type_savable(visit_type, privileged_user) _check_origin_url_valid(origin_url) # if all checks passed so far, we can try and save the origin save_request_status = can_save_origin(origin_url, privileged_user) task = None # if the origin save request is accepted, create a scheduler # task to load it into the archive if save_request_status == SAVE_REQUEST_ACCEPTED: # create a task with high priority task_kwargs: Dict[str, Any] = { "priority": "high", "url": origin_url, } if visit_type == "archives": # extra arguments for that type are required archives_data = kwargs.get("archives_data", []) if not archives_data: raise BadInputExc( "Artifacts data are missing for the archives visit type." ) artifacts = [] for artifact in archives_data: artifact_url = artifact.get("artifact_url") artifact_version = artifact.get("artifact_version") if not artifact_url or not artifact_version: raise BadInputExc("Missing url or version for an artifact to load.") metadata = _check_origin_exists(artifact_url) artifacts.append( { "url": artifact_url, "version": artifact_version, "time": metadata["last_modified"], "length": metadata["content_length"], } ) task_kwargs = dict(**task_kwargs, artifacts=artifacts, snapshot_append=True) sor = None # get list of previously submitted save requests (most recent first) current_sors = list( SaveOriginRequest.objects.filter( visit_type=visit_type, origin_url=origin_url ).order_by("-request_date") ) can_create_task = False # if no save requests previously submitted, create the scheduler task if not current_sors: can_create_task = True else: # get the latest submitted save request sor = current_sors[0] # if it was in pending state, we need to create the scheduler task # and update the save request info in the database if sor.status == SAVE_REQUEST_PENDING: can_create_task = True # a task has already been created to load the origin elif sor.loading_task_id != -1: # get the scheduler task and its status tasks = scheduler().get_tasks([sor.loading_task_id]) task = tasks[0] if tasks else None task_runs = scheduler().get_task_runs([sor.loading_task_id]) task_run = task_runs[0] if task_runs else None save_request_info = _update_save_request_info(sor, task, task_run) task_status = save_request_info["save_task_status"] # create a new scheduler task only if the previous one has been # already executed if ( task_status == SAVE_TASK_FAILED or task_status == SAVE_TASK_SUCCEEDED ): can_create_task = True sor = None else: can_create_task = False if can_create_task: # effectively create the scheduler task task_dict = create_oneshot_task_dict( visit_type_tasks[visit_type], **task_kwargs ) task = scheduler().create_tasks([task_dict])[0] # pending save request has been accepted if sor: sor.status = SAVE_REQUEST_ACCEPTED sor.loading_task_id = task["id"] sor.save() else: sor = SaveOriginRequest.objects.create( visit_type=visit_type, origin_url=origin_url, status=save_request_status, loading_task_id=task["id"], user_ids=f'"{user_id}"' if user_id else None, ) # save request must be manually reviewed for acceptation elif save_request_status == SAVE_REQUEST_PENDING: # check if there is already such a save request already submitted, # no need to add it to the database in that case try: sor = SaveOriginRequest.objects.get( visit_type=visit_type, origin_url=origin_url, status=save_request_status ) user_ids = sor.user_ids if sor.user_ids is not None else "" if user_id is not None and f'"{user_id}"' not in user_ids: # update user ids list sor.user_ids = f'{sor.user_ids},"{user_id}"' sor.save() # if not add it to the database except ObjectDoesNotExist: sor = SaveOriginRequest.objects.create( visit_type=visit_type, origin_url=origin_url, status=save_request_status, user_ids=f'"{user_id}"' if user_id else None, ) # origin can not be saved as its url is blacklisted, # log the request to the database anyway else: sor = SaveOriginRequest.objects.create( visit_type=visit_type, origin_url=origin_url, status=save_request_status, user_ids=f'"{user_id}"' if user_id else None, ) if save_request_status == SAVE_REQUEST_REJECTED: raise ForbiddenExc( ( 'The "save code now" request has been rejected ' "because the provided origin url is blacklisted." ) ) assert sor is not None return _update_save_request_info(sor, task) def update_save_origin_requests_from_queryset( requests_queryset: QuerySet, ) -> List[SaveOriginRequestInfo]: """Update all save requests from a SaveOriginRequest queryset, update their status in db and return the list of impacted save_requests. Args: requests_queryset: input SaveOriginRequest queryset Returns: list: A list of save origin request info dicts as described in :func:`swh.web.common.origin_save.create_save_origin_request` """ task_ids = [] for sor in requests_queryset: task_ids.append(sor.loading_task_id) save_requests = [] if task_ids: try: tasks = scheduler().get_tasks(task_ids) tasks = {task["id"]: task for task in tasks} task_runs = scheduler().get_task_runs(tasks) task_runs = {task_run["task"]: task_run for task_run in task_runs} except Exception: # allow to avoid mocking api GET responses for /origin/save endpoint when # running cypress tests as scheduler is not available tasks = {} task_runs = {} for sor in requests_queryset: sr_dict = _update_save_request_info( sor, tasks.get(sor.loading_task_id), task_runs.get(sor.loading_task_id), ) save_requests.append(sr_dict) return save_requests def refresh_save_origin_request_statuses() -> List[SaveOriginRequestInfo]: """Refresh non-terminal save origin requests (SOR) in the backend. Non-terminal SOR are requests whose status is **accepted** and their task status are either **created**, **not yet scheduled**, **scheduled** or **running**. This shall compute this list of SOR, checks their status in the scheduler and optionally elasticsearch for their current status. Then update those in db. Finally, this returns the refreshed information on those SOR. """ pivot_date = datetime.now(tz=timezone.utc) - timedelta(days=MAX_THRESHOLD_DAYS) save_requests = SaveOriginRequest.objects.filter( # Retrieve accepted request statuses (all statuses) Q(status=SAVE_REQUEST_ACCEPTED), # those without the required information we need to update Q(visit_date__isnull=True) | Q(visit_status__isnull=True) | Q(visit_status__in=NON_TERMINAL_STATUSES), # limit results to recent ones (that is roughly 30 days old at best) Q(request_date__gte=pivot_date), ) return ( update_save_origin_requests_from_queryset(save_requests) if save_requests.count() > 0 else [] ) def get_save_origin_requests( visit_type: str, origin_url: str ) -> List[SaveOriginRequestInfo]: """ Get all save requests for a given software origin. Args: visit_type: the type of visit origin_url: the url of the origin Raises: BadInputExc: the visit type or origin url is invalid swh.web.common.exc.NotFoundExc: no save requests can be found for the given origin Returns: list: A list of save origin requests dict as described in :func:`swh.web.common.origin_save.create_save_origin_request` """ _check_visit_type_savable(visit_type) _check_origin_url_valid(origin_url) sors = SaveOriginRequest.objects.filter( visit_type=visit_type, origin_url=origin_url ) if sors.count() == 0: raise NotFoundExc( f"No save requests found for visit of type {visit_type} " f"on origin with url {origin_url}." ) return update_save_origin_requests_from_queryset(sors) def get_save_origin_task_info( save_request_id: int, full_info: bool = True ) -> Dict[str, Any]: """ Get detailed information about an accepted save origin request and its associated loading task. If the associated loading task info is archived and removed from the scheduler database, returns an empty dictionary. Args: save_request_id: identifier of a save origin request full_info: whether to return detailed info for staff users Returns: A dictionary with the following keys: - **type**: loading task type - **arguments**: loading task arguments - **id**: loading task database identifier - **backend_id**: loading task celery identifier - **scheduled**: loading task scheduling date - **ended**: loading task termination date - **status**: loading task execution status - **visit_status**: Actual visit status Depending on the availability of the task logs in the elasticsearch cluster of Software Heritage, the returned dictionary may also contain the following keys: - **name**: associated celery task name - **message**: relevant log message from task execution - **duration**: task execution time (only if it succeeded) - **worker**: name of the worker that executed the task """ try: save_request = SaveOriginRequest.objects.get(id=save_request_id) except ObjectDoesNotExist: return {} - task = scheduler().get_tasks([save_request.loading_task_id]) + task_info: Dict[str, Any] = {} + if save_request.note is not None: + task_info["note"] = save_request.note + + try: + task = scheduler().get_tasks([save_request.loading_task_id]) + except Exception: + # to avoid mocking GET responses of /save/task/info/ endpoint when running + # cypress tests as scheduler is not available in that case + task = None + task = task[0] if task else None if task is None: - return {} + return task_info task_run = scheduler().get_task_runs([task["id"]]) task_run = task_run[0] if task_run else None if task_run is None: - return {} - task_run["type"] = task["type"] - task_run["arguments"] = task["arguments"] - task_run["id"] = task_run["task"] - del task_run["task"] - del task_run["metadata"] - # Enrich the task run with the loading visit status - task_run["visit_status"] = save_request.visit_status + return task_info + task_info.update(task_run) + task_info["type"] = task["type"] + task_info["arguments"] = task["arguments"] + task_info["id"] = task_run["task"] + del task_info["task"] + del task_info["metadata"] + # Enrich the task info with the loading visit status + task_info["visit_status"] = save_request.visit_status es_workers_index_url = get_config()["es_workers_index_url"] if not es_workers_index_url: - return task_run + return task_info es_workers_index_url += "/_search" if save_request.visit_date: min_ts = save_request.visit_date max_ts = min_ts + timedelta(days=7) else: min_ts = save_request.request_date max_ts = min_ts + timedelta(days=MAX_THRESHOLD_DAYS) min_ts_unix = int(min_ts.timestamp()) * 1000 max_ts_unix = int(max_ts.timestamp()) * 1000 save_task_status = _save_task_status[task["status"]] priority = "3" if save_task_status == SAVE_TASK_FAILED else "6" query = { "bool": { "must": [ {"match_phrase": {"priority": {"query": priority}}}, {"match_phrase": {"swh_task_id": {"query": task_run["backend_id"]}}}, { "range": { "@timestamp": { "gte": min_ts_unix, "lte": max_ts_unix, "format": "epoch_millis", } } }, ] } } try: response = requests.post( es_workers_index_url, json={"query": query, "sort": ["@timestamp"]}, timeout=30, ) results = json.loads(response.text) if results["hits"]["total"]["value"] >= 1: task_run_info = results["hits"]["hits"][-1]["_source"] if "swh_logging_args_runtime" in task_run_info: duration = task_run_info["swh_logging_args_runtime"] - task_run["duration"] = duration + task_info["duration"] = duration if "message" in task_run_info: - task_run["message"] = task_run_info["message"] + task_info["message"] = task_run_info["message"] if "swh_logging_args_name" in task_run_info: - task_run["name"] = task_run_info["swh_logging_args_name"] + task_info["name"] = task_run_info["swh_logging_args_name"] elif "swh_task_name" in task_run_info: - task_run["name"] = task_run_info["swh_task_name"] + task_info["name"] = task_run_info["swh_task_name"] if "hostname" in task_run_info: - task_run["worker"] = task_run_info["hostname"] + task_info["worker"] = task_run_info["hostname"] elif "host" in task_run_info: - task_run["worker"] = task_run_info["host"] + task_info["worker"] = task_run_info["host"] except Exception as exc: logger.warning("Request to Elasticsearch failed\n%s", exc) sentry_sdk.capture_exception(exc) if not full_info: for field in ("id", "backend_id", "worker"): # remove some staff only fields - task_run.pop(field, None) + task_info.pop(field, None) if "message" in task_run and "Loading failure" in task_run["message"]: # hide traceback for non staff users, only display exception - message_lines = task_run["message"].split("\n") + message_lines = task_info["message"].split("\n") message = "" for line in message_lines: if line.startswith("Traceback"): break message += f"{line}\n" message += message_lines[-1] - task_run["message"] = message + task_info["message"] = message - return task_run + return task_info SUBMITTED_SAVE_REQUESTS_METRIC = "swh_web_submitted_save_requests" _submitted_save_requests_gauge = Gauge( name=SUBMITTED_SAVE_REQUESTS_METRIC, documentation="Number of submitted origin save requests", labelnames=["status", "visit_type"], registry=SWH_WEB_METRICS_REGISTRY, ) ACCEPTED_SAVE_REQUESTS_METRIC = "swh_web_accepted_save_requests" _accepted_save_requests_gauge = Gauge( name=ACCEPTED_SAVE_REQUESTS_METRIC, documentation="Number of accepted origin save requests", labelnames=["load_task_status", "visit_type"], registry=SWH_WEB_METRICS_REGISTRY, ) # Metric on the delay of save code now request per status and visit_type. This is the # time difference between the save code now is requested and the time it got ingested. ACCEPTED_SAVE_REQUESTS_DELAY_METRIC = "swh_web_save_requests_delay_seconds" _accepted_save_requests_delay_gauge = Gauge( name=ACCEPTED_SAVE_REQUESTS_DELAY_METRIC, documentation="Save Requests Duration", labelnames=["load_task_status", "visit_type"], registry=SWH_WEB_METRICS_REGISTRY, ) def compute_save_requests_metrics() -> None: """Compute Prometheus metrics related to origin save requests: - Number of submitted origin save requests - Number of accepted origin save requests - Save Code Now requests delay between request time and actual time of ingestion """ request_statuses = ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING, ) load_task_statuses = ( SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED, SAVE_TASK_RUNNING, ) # for metrics, we want access to all visit types visit_types = get_savable_visit_types(privileged_user=True) labels_set = product(request_statuses, visit_types) for labels in labels_set: _submitted_save_requests_gauge.labels(*labels).set(0) labels_set = product(load_task_statuses, visit_types) for labels in labels_set: _accepted_save_requests_gauge.labels(*labels).set(0) duration_load_task_statuses = ( SAVE_TASK_FAILED, SAVE_TASK_SUCCEEDED, ) for labels in product(duration_load_task_statuses, visit_types): _accepted_save_requests_delay_gauge.labels(*labels).set(0) for sor in SaveOriginRequest.objects.all(): if sor.status == SAVE_REQUEST_ACCEPTED: _accepted_save_requests_gauge.labels( load_task_status=sor.loading_task_status, visit_type=sor.visit_type, ).inc() _submitted_save_requests_gauge.labels( status=sor.status, visit_type=sor.visit_type ).inc() if ( sor.loading_task_status in (SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED) and sor.visit_date is not None and sor.request_date is not None ): delay = sor.visit_date.timestamp() - sor.request_date.timestamp() _accepted_save_requests_delay_gauge.labels( load_task_status=sor.loading_task_status, visit_type=sor.visit_type, ).inc(delay) diff --git a/swh/web/templates/admin/origin-save.html b/swh/web/templates/admin/origin-save.html index f4e210b6..832d5470 100644 --- a/swh/web/templates/admin/origin-save.html +++ b/swh/web/templates/admin/origin-save.html @@ -1,185 +1,186 @@ {% extends "layout.html" %} {% comment %} Copyright (C) 2018-2021 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% load swh_templatetags %} {% load render_bundle from webpack_loader %} {% block header %} {{ block.super }} {% render_bundle 'admin' %} {% render_bundle 'save' %} {% endblock %} {% block title %} Save origin administration {% endblock %} {% block navbar-content %}

Save origin administration

{% endblock %} {% block content %}
Date Type Url
Date Type Url Status Info
+
Date Type UrlInfo
Url
Url
{% endblock %}