diff --git a/Makefile.local b/Makefile.local index 118f57b1..9c1a51ea 100644 --- a/Makefile.local +++ b/Makefile.local @@ -1,96 +1,97 @@ TEST_DIRS := ./swh/web/tests TESTFLAGS = --hypothesis-profile=swh-web-fast TESTFULL_FLAGS = --hypothesis-profile=swh-web YARN ?= yarn yarn-install: package.json $(YARN) install .PHONY: build-webpack-dev build-webpack-dev: yarn-install $(YARN) build-dev .PHONY: build-webpack-test build-webpack-test: yarn-install $(YARN) build-test .PHONY: build-webpack-dev-no-verbose build-webpack-dev-no-verbose: yarn-install $(YARN) build-dev >/dev/null .PHONY: build-webpack-prod build-webpack-prod: yarn-install $(YARN) build .PHONY: run-migrations run-migrations: python3 swh/web/manage.py migrate --settings=swh.web.settings.development -v0 2>/dev/null .PHONY: run-migrations-prod run-migrations-prod: django-admin migrate --settings=swh.web.settings.production -v0 2>/dev/null .PHONY: run-migrations-test run-migrations-test: rm -f swh/web/settings/testdb.sqlite3 django-admin migrate --settings=swh.web.settings.tests -v0 2>/dev/null cat swh/web/tests/create_test_admin.py | django-admin shell --settings=swh.web.settings.tests + cat swh/web/tests/create_test_users.py | django-admin shell --settings=swh.web.settings.tests .PHONY: clear-memcached clear-memcached: echo "flush_all" | nc -q 2 localhost 11211 2>/dev/null run-django-webpack-devserver: run-migrations yarn-install bash -c "trap 'trap - SIGINT SIGTERM ERR; kill %1' SIGINT SIGTERM ERR; $(YARN) start-dev & sleep 10 && cd swh/web && python3 manage.py runserver --nostatic --settings=swh.web.settings.development" run-django-webpack-dev: build-webpack-dev run-migrations python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.development run-django-webpack-prod: build-webpack-prod run-migrations-prod clear-memcached python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.production run-django-server-dev: run-migrations python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.development run-django-server-prod: run-migrations-prod clear-memcached python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.production run-gunicorn-server: run-migrations-prod clear-memcached DJANGO_SETTINGS_MODULE=swh.web.settings.production \ gunicorn --bind 127.0.0.1:5004 \ --threads 2 \ --workers 2 'django.core.wsgi:get_wsgi_application()' run-django-webpack-memory-storages: build-webpack-dev run-migrations python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.tests test-full: $(TEST) $(TESTFULL_FLAGS) $(TEST_DIRS) .PHONY: test-frontend-cmd test-frontend-cmd: build-webpack-test run-migrations-test python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.tests & sleep 10 && $(YARN) run cypress run --config numTestsKeptInMemory=0 ; pkill -P $$! ; $(YARN) run mochawesome test-frontend: export CYPRESS_SKIP_SLOW_TESTS=1 test-frontend: test-frontend-cmd test-frontend-full: export CYPRESS_SKIP_SLOW_TESTS=0 test-frontend-full: test-frontend-cmd .PHONY: test-frontend-ui-cmd test-frontend-ui-cmd: build-webpack-test run-migrations-test bash -c "trap 'trap - SIGINT SIGTERM ERR EXIT; jobs -p | head -1 | xargs pkill -P' SIGINT SIGTERM ERR EXIT; python3 swh/web/manage.py runserver --nostatic --settings=swh.web.settings.tests & sleep 10 && $(YARN) run cypress open" test-frontend-ui: export CYPRESS_SKIP_SLOW_TESTS=1 test-frontend-ui: test-frontend-ui-cmd test-frontend-full-ui: export CYPRESS_SKIP_SLOW_TESTS=0 test-frontend-full-ui: test-frontend-ui-cmd # Override default rule to make sure DJANGO env var is properly set. It # *should* work without any override thanks to the mypy django-stubs plugin, # but it currently doesn't; see # https://github.com/typeddjango/django-stubs/issues/166 check-mypy: DJANGO_SETTINGS_MODULE=swh.web.settings.development $(MYPY) $(MYPYFLAGS) swh diff --git a/assets/src/bundles/save/index.js b/assets/src/bundles/save/index.js index 7c3b934d..61ce06fd 100644 --- a/assets/src/bundles/save/index.js +++ b/assets/src/bundles/save/index.js @@ -1,450 +1,481 @@ /** * Copyright (C) 2018-2021 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ import {csrfPost, handleFetchError, isGitRepoUrl, htmlAlert, removeUrlFragment} from 'utils/functions'; import {swhSpinnerSrc} from 'utils/constants'; let saveRequestsTable; function originSaveRequest(originType, originUrl, acceptedCallback, pendingCallback, errorCallback) { let addSaveOriginRequestUrl = Urls.api_1_save_origin(originType, originUrl); $('.swh-processing-save-request').css('display', 'block'); csrfPost(addSaveOriginRequestUrl) .then(handleFetchError) .then(response => response.json()) .then(data => { $('.swh-processing-save-request').css('display', 'none'); if (data.save_request_status === 'accepted') { acceptedCallback(); } else { pendingCallback(); } }) .catch(response => { $('.swh-processing-save-request').css('display', 'none'); response.json().then(errorData => { errorCallback(response.status, errorData); }); }); } +const userRequestsFilterCheckbox = ` +
+ + +
+`; + export function initOriginSave() { $(document).ready(() => { $.fn.dataTable.ext.errMode = 'none'; fetch(Urls.origin_save_types_list()) .then(response => response.json()) .then(data => { for (let originType of data) { $('#swh-input-visit-type').append(``); } }); saveRequestsTable = $('#swh-origin-save-requests') .on('error.dt', (e, settings, techNote, message) => { $('#swh-origin-save-request-list-error').text('An error occurred while retrieving the save requests list'); console.log(message); }) .DataTable({ serverSide: true, processing: true, language: { processing: `` }, - ajax: Urls.origin_save_requests_list('all'), + ajax: { + url: Urls.origin_save_requests_list('all'), + data: (d) => { + if (swh.webapp.isUserLoggedIn() && $('#swh-save-requests-user-filter').prop('checked')) { + d.user_requests_only = '1'; + } + } + }, searchDelay: 1000, + // see https://datatables.net/examples/advanced_init/dom_toolbar.html and the comments section + // this option customizes datatables UI components by adding an extra checkbox above the table + // while keeping bootstrap layout + dom: '<"row"<"col-sm-3"l><"col-sm-6 text-left user-requests-filter"><"col-sm-3"f>>' + + '<"row"<"col-sm-12"tr>>' + + '<"row"<"col-sm-5"i><"col-sm-7"p>>', + fnInitComplete: function() { + if (swh.webapp.isUserLoggedIn()) { + $('div.user-requests-filter').html(userRequestsFilterCheckbox); + $('#swh-save-requests-user-filter').on('change', () => { + saveRequestsTable.draw(); + }); + } + }, columns: [ { data: 'save_request_date', name: 'request_date', render: (data, type, row) => { if (type === 'display') { let date = new Date(data); return date.toLocaleString(); } return data; } }, { data: 'visit_type', name: 'visit_type' }, { data: 'origin_url', name: 'origin_url', render: (data, type, row) => { if (type === 'display') { let html = ''; const sanitizedURL = $.fn.dataTable.render.text().display(data); if (row.save_task_status === 'succeeded') { let browseOriginUrl = `${Urls.browse_origin()}?origin_url=${encodeURIComponent(sanitizedURL)}`; if (row.visit_date) { browseOriginUrl += `&timestamp=${encodeURIComponent(row.visit_date)}`; } html += `${sanitizedURL}`; } else { html += sanitizedURL; } html += ` `; return html; } return data; } }, { data: 'save_request_status', name: 'status' }, { data: 'save_task_status', name: 'loading_task_status' }, { name: 'info', render: (data, type, row) => { if (row.save_task_status === 'succeeded' || row.save_task_status === 'failed') { return ``; } else { return ''; } } }, { render: (data, type, row) => { if (row.save_request_status === 'accepted') { const saveAgainButton = ''; return saveAgainButton; } else { return ''; } } } ], scrollY: '50vh', scrollCollapse: true, order: [[0, 'desc']], responsive: { details: { type: 'none' } } }); swh.webapp.addJumpToPagePopoverToDataTable(saveRequestsTable); $('#swh-origin-save-requests-list-tab').on('shown.bs.tab', () => { saveRequestsTable.draw(); window.location.hash = '#requests'; }); $('#swh-origin-save-request-help-tab').on('shown.bs.tab', () => { removeUrlFragment(); $('.swh-save-request-info').popover('dispose'); }); let saveRequestAcceptedAlert = htmlAlert( 'success', 'The "save code now" request has been accepted and will be processed as soon as possible.', true ); let saveRequestPendingAlert = htmlAlert( 'warning', 'The "save code now" request has been put in pending state and may be accepted for processing after manual review.', true ); let saveRequestRateLimitedAlert = htmlAlert( 'danger', 'The rate limit for "save code now" requests has been reached. Please try again later.', true ); let saveRequestUnknownErrorAlert = htmlAlert( 'danger', 'An unexpected error happened when submitting the "save code now request".', true ); $('#swh-save-origin-form').submit(event => { event.preventDefault(); event.stopPropagation(); $('.alert').alert('close'); if (event.target.checkValidity()) { $(event.target).removeClass('was-validated'); let originType = $('#swh-input-visit-type').val(); let originUrl = $('#swh-input-origin-url').val(); originSaveRequest(originType, originUrl, () => $('#swh-origin-save-request-status').html(saveRequestAcceptedAlert), () => $('#swh-origin-save-request-status').html(saveRequestPendingAlert), (statusCode, errorData) => { $('#swh-origin-save-request-status').css('color', 'red'); if (statusCode === 403) { const errorAlert = htmlAlert('danger', `Error: ${errorData['reason']}`); $('#swh-origin-save-request-status').html(errorAlert); } else if (statusCode === 429) { $('#swh-origin-save-request-status').html(saveRequestRateLimitedAlert); } else if (statusCode === 400) { const errorAlert = htmlAlert('danger', errorData['reason']); $('#swh-origin-save-request-status').html(errorAlert); } else { $('#swh-origin-save-request-status').html(saveRequestUnknownErrorAlert); } }); } else { $(event.target).addClass('was-validated'); } }); $('#swh-show-origin-save-requests-list').on('click', (event) => { event.preventDefault(); $('.nav-tabs a[href="#swh-origin-save-requests-list"]').tab('show'); }); $('#swh-input-origin-url').on('input', function(event) { let originUrl = $(this).val().trim(); $(this).val(originUrl); $('#swh-input-visit-type option').each(function() { let val = $(this).val(); if (val && originUrl.includes(val)) { $(this).prop('selected', true); } }); }); if (window.location.hash === '#requests') { $('.nav-tabs a[href="#swh-origin-save-requests-list"]').tab('show'); } }); } export function validateSaveOriginUrl(input) { let originType = $('#swh-input-visit-type').val(); let originUrl = null; let validUrl = true; try { originUrl = new URL(input.value.trim()); } catch (TypeError) { validUrl = false; } if (validUrl) { let allowedProtocols = ['http:', 'https:', 'svn:', 'git:']; validUrl = ( allowedProtocols.find(protocol => protocol === originUrl.protocol) !== undefined ); } if (validUrl && originType === 'git') { // additional checks for well known code hosting providers switch (originUrl.hostname) { case 'github.com': validUrl = isGitRepoUrl(originUrl); break; case 'git.code.sf.net': validUrl = isGitRepoUrl(originUrl, '/p/'); break; case 'bitbucket.org': validUrl = isGitRepoUrl(originUrl); break; default: if (originUrl.hostname.startsWith('gitlab.')) { validUrl = isGitRepoUrl(originUrl); } break; } } if (validUrl) { input.setCustomValidity(''); } else { input.setCustomValidity('The origin url is not valid or does not reference a code repository'); } } export function initTakeNewSnapshot() { let newSnapshotRequestAcceptedAlert = htmlAlert( 'success', 'The "take new snapshot" request has been accepted and will be processed as soon as possible.', true ); let newSnapshotRequestPendingAlert = htmlAlert( 'warning', 'The "take new snapshot" request has been put in pending state and may be accepted for processing after manual review.', true ); let newSnapshotRequestRateLimitAlert = htmlAlert( 'danger', 'The rate limit for "take new snapshot" requests has been reached. Please try again later.', true ); let newSnapshotRequestUnknownErrorAlert = htmlAlert( 'danger', 'An unexpected error happened when submitting the "save code now request".', true ); $(document).ready(() => { $('#swh-take-new-snapshot-form').submit(event => { event.preventDefault(); event.stopPropagation(); let originType = $('#swh-input-visit-type').val(); let originUrl = $('#swh-input-origin-url').val(); originSaveRequest(originType, originUrl, () => $('#swh-take-new-snapshot-request-status').html(newSnapshotRequestAcceptedAlert), () => $('#swh-take-new-snapshot-request-status').html(newSnapshotRequestPendingAlert), (statusCode, errorData) => { $('#swh-take-new-snapshot-request-status').css('color', 'red'); if (statusCode === 403) { const errorAlert = htmlAlert('danger', `Error: ${errorData['detail']}`, true); $('#swh-take-new-snapshot-request-status').html(errorAlert); } else if (statusCode === 429) { $('#swh-take-new-snapshot-request-status').html(newSnapshotRequestRateLimitAlert); } else { $('#swh-take-new-snapshot-request-status').html(newSnapshotRequestUnknownErrorAlert); } }); }); }); } export function formatValuePerType(type, value) { // Given some typed value, format and return accordingly formatted value const mapFormatPerTypeFn = { 'json': (v) => JSON.stringify(v, null, 2), 'date': (v) => new Date(v).toLocaleString(), 'raw': (v) => v, 'duration': (v) => v + ' seconds' }; return value === null ? null : mapFormatPerTypeFn[type](value); } export function displaySaveRequestInfo(event, saveRequestId) { event.stopPropagation(); const saveRequestTaskInfoUrl = Urls.origin_save_task_info(saveRequestId); // close popover when clicking again on the info icon if ($(event.target).data('bs.popover')) { $(event.target).popover('dispose'); return; } $('.swh-save-request-info').popover('dispose'); $(event.target).popover({ animation: false, boundary: 'viewport', container: 'body', title: 'Save request task information ' + '`, content: `

Fetching task information ...

`, html: true, placement: 'left', sanitizeFn: swh.webapp.filterXSS }); $(event.target).on('shown.bs.popover', function() { const popoverId = $(this).attr('aria-describedby'); $(`#${popoverId} .mdi-close`).click(() => { $(this).popover('dispose'); }); }); $(event.target).popover('show'); fetch(saveRequestTaskInfoUrl) .then(response => response.json()) .then(saveRequestTaskInfo => { let content; if ($.isEmptyObject(saveRequestTaskInfo)) { content = 'Not available'; } else { let saveRequestInfo = []; const taskData = { 'Type': ['raw', 'type'], 'Visit status': ['raw', 'visit_status'], 'Arguments': ['json', 'arguments'], 'Id': ['raw', 'id'], 'Backend id': ['raw', 'backend_id'], 'Scheduling date': ['date', 'scheduled'], 'Start date': ['date', 'started'], 'Completion date': ['date', 'ended'], 'Duration': ['duration', 'duration'], 'Runner': ['raw', 'worker'], 'Log': ['raw', 'message'] }; for (const [title, [type, property]] of Object.entries(taskData)) { if (saveRequestTaskInfo.hasOwnProperty(property)) { saveRequestInfo.push({ key: title, value: formatValuePerType(type, saveRequestTaskInfo[property]) }); } } content = ''; for (let info of saveRequestInfo) { content += ``; } content += '
'; } $('.swh-popover').html(content); $(event.target).popover('update'); }); } export function fillSaveRequestFormAndScroll(visitType, originUrl) { $('#swh-input-origin-url').val(originUrl); let originTypeFound = false; $('#swh-input-visit-type option').each(function() { let val = $(this).val(); if (val && originUrl.includes(val)) { $(this).prop('selected', true); originTypeFound = true; } }); if (!originTypeFound) { $('#swh-input-visit-type option').each(function() { let val = $(this).val(); if (val === visitType) { $(this).prop('selected', true); } }); } window.scrollTo(0, 0); } diff --git a/cypress/integration/origin-save.spec.js b/cypress/integration/origin-save.spec.js index a04bed10..4e2abf8e 100644 --- a/cypress/integration/origin-save.spec.js +++ b/cypress/integration/origin-save.spec.js @@ -1,418 +1,475 @@ /** * Copyright (C) 2019-2021 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ let url; let origin; const $ = Cypress.$; const saveCodeMsg = { 'success': 'The "save code now" request has been accepted and will be processed as soon as possible.', 'warning': 'The "save code now" request has been put in pending state and may be accepted for processing after manual review.', 'rejected': 'The "save code now" request has been rejected because the provided origin url is blacklisted.', 'rateLimit': 'The rate limit for "save code now" requests has been reached. Please try again later.', 'not-found': 'The provided url does not exist', 'unknownError': 'An unexpected error happened when submitting the "save code now request', 'csrfError': 'CSRF Failed: Referrer checking failed - no Referrer.' }; function makeOriginSaveRequest(originType, originUrl) { cy.get('#swh-input-origin-url') .type(originUrl) .get('#swh-input-visit-type') .select(originType) .get('#swh-save-origin-form') .submit(); } function checkAlertVisible(alertType, msg) { cy.get('#swh-origin-save-request-status') .should('be.visible') .find(`.alert-${alertType}`) .should('be.visible') .and('contain', msg); } // Stub requests to save an origin function stubSaveRequest({ requestUrl, visitType = 'git', saveRequestStatus, originUrl, saveTaskStatus, responseStatus = 200, // For error code with the error message in the 'reason' key response errorMessage = '', saveRequestDate = new Date(), visitDate = new Date(), visitStatus = null } = {}) { let response; if (responseStatus !== 200 && errorMessage) { response = { 'reason': errorMessage }; } else { response = genOriginSaveResponse({visitType: visitType, saveRequestStatus: saveRequestStatus, originUrl: originUrl, saveRequestDate: saveRequestDate, saveTaskStatus: saveTaskStatus, visitDate: visitDate, visitStatus: visitStatus }); } cy.intercept('POST', requestUrl, {body: response, statusCode: responseStatus}) .as('saveRequest'); } // Mocks API response : /save/(:visit_type)/(:origin_url) // visit_type : {'git', 'hg', 'svn', ...} function genOriginSaveResponse({ visitType = 'git', saveRequestStatus, originUrl, saveRequestDate = new Date(), saveTaskStatus, visitDate = new Date(), visitStatus } = {}) { return { 'visit_type': visitType, 'save_request_status': saveRequestStatus, 'origin_url': originUrl, 'id': 1, 'save_request_date': saveRequestDate ? saveRequestDate.toISOString() : null, 'save_task_status': saveTaskStatus, 'visit_date': visitDate ? visitDate.toISOString() : null, 'visit_status': visitStatus }; }; describe('Origin Save Tests', function() { before(function() { url = this.Urls.origin_save(); origin = this.origin[0]; this.originSaveUrl = this.Urls.api_1_save_origin(origin.type, origin.url); }); beforeEach(function() { cy.fixture('origin-save').as('originSaveJSON'); cy.fixture('save-task-info').as('saveTaskInfoJSON'); cy.visit(url); }); it('should format appropriately values depending on their type', function() { let inputValues = [ // null values stay null {type: 'json', value: null, expectedValue: null}, {type: 'date', value: null, expectedValue: null}, {type: 'raw', value: null, expectedValue: null}, {type: 'duration', value: null, expectedValue: null}, // non null values formatted depending on their type {type: 'json', value: '{}', expectedValue: '"{}"'}, {type: 'date', value: '04/04/2021 01:00:00', expectedValue: '4/4/2021, 1:00:00 AM'}, {type: 'raw', value: 'value-for-identity', expectedValue: 'value-for-identity'}, {type: 'duration', value: '10', expectedValue: '10 seconds'}, {type: 'duration', value: 100, expectedValue: '100 seconds'} ]; cy.window().then(win => { inputValues.forEach(function(input, index, array) { let actualValue = win.swh.save.formatValuePerType(input.type, input.value); assert.equal(actualValue, input.expectedValue); }); }); }); it('should display accepted message when accepted', function() { stubSaveRequest({requestUrl: this.originSaveUrl, saveRequestStatus: 'accepted', originUrl: origin.url, saveTaskStatus: 'not yet scheduled'}); makeOriginSaveRequest(origin.type, origin.url); cy.wait('@saveRequest').then(() => { checkAlertVisible('success', saveCodeMsg['success']); }); }); it('should validate gitlab subproject url', function() { const gitlabSubProjectUrl = 'https://gitlab.com/user/project/sub/'; const originSaveUrl = this.Urls.api_1_save_origin('git', gitlabSubProjectUrl); stubSaveRequest({requestUrl: originSaveUrl, saveRequestStatus: 'accepted', originurl: gitlabSubProjectUrl, saveTaskStatus: 'not yet scheduled'}); makeOriginSaveRequest('git', gitlabSubProjectUrl); cy.wait('@saveRequest').then(() => { checkAlertVisible('success', saveCodeMsg['success']); }); }); it('should validate project url with _ in username', function() { const gitlabSubProjectUrl = 'https://gitlab.com/user_name/project.git'; const originSaveUrl = this.Urls.api_1_save_origin('git', gitlabSubProjectUrl); stubSaveRequest({requestUrl: originSaveUrl, saveRequestStatus: 'accepted', originurl: gitlabSubProjectUrl, saveTaskStatus: 'not yet scheduled'}); makeOriginSaveRequest('git', gitlabSubProjectUrl); cy.wait('@saveRequest').then(() => { checkAlertVisible('success', saveCodeMsg['success']); }); }); it('should display warning message when pending', function() { stubSaveRequest({requestUrl: this.originSaveUrl, saveRequestStatus: 'pending', originUrl: origin.url, saveTaskStatus: 'not created'}); makeOriginSaveRequest(origin.type, origin.url); cy.wait('@saveRequest').then(() => { checkAlertVisible('warning', saveCodeMsg['warning']); }); }); it('should show error when the origin does not exist (status: 400)', function() { stubSaveRequest({requestUrl: this.originSaveUrl, originUrl: origin.url, responseStatus: 400, errorMessage: saveCodeMsg['not-found']}); makeOriginSaveRequest(origin.type, origin.url); cy.wait('@saveRequest').then(() => { checkAlertVisible('danger', saveCodeMsg['not-found']); }); }); it('should show error when csrf validation failed (status: 403)', function() { stubSaveRequest({requestUrl: this.originSaveUrl, saveRequestStatus: 'rejected', originUrl: origin.url, saveTaskStatus: 'not created', responseStatus: 403, errorMessage: saveCodeMsg['csrfError']}); makeOriginSaveRequest(origin.type, origin.url); cy.wait('@saveRequest').then(() => { checkAlertVisible('danger', saveCodeMsg['csrfError']); }); }); it('should show error when origin is rejected (status: 403)', function() { stubSaveRequest({requestUrl: this.originSaveUrl, saveRequestStatus: 'rejected', originUrl: origin.url, saveTaskStatus: 'not created', responseStatus: 403, errorMessage: saveCodeMsg['rejected']}); makeOriginSaveRequest(origin.type, origin.url); cy.wait('@saveRequest').then(() => { checkAlertVisible('danger', saveCodeMsg['rejected']); }); }); it('should show error when rate limited (status: 429)', function() { stubSaveRequest({requestUrl: this.originSaveUrl, saveRequestStatus: 'Request was throttled. Expected available in 60 seconds.', originUrl: origin.url, saveTaskStatus: 'not created', responseStatus: 429}); makeOriginSaveRequest(origin.type, origin.url); cy.wait('@saveRequest').then(() => { checkAlertVisible('danger', saveCodeMsg['rateLimit']); }); }); it('should show error when unknown error occurs (status other than 200, 403, 429)', function() { stubSaveRequest({requestUrl: this.originSaveUrl, saveRequestStatus: 'Error', originUrl: origin.url, saveTaskStatus: 'not created', responseStatus: 406}); makeOriginSaveRequest(origin.type, origin.url); cy.wait('@saveRequest').then(() => { checkAlertVisible('danger', saveCodeMsg['unknownError']); }); }); it('should display origin save info in the requests table', function() { cy.intercept('/save/requests/list/**', {fixture: 'origin-save'}); cy.get('#swh-origin-save-requests-list-tab').click(); cy.get('tbody tr').then(rows => { let i = 0; for (let row of rows) { const cells = row.cells; const requestDateStr = new Date(this.originSaveJSON.data[i].save_request_date).toLocaleString(); const saveStatus = this.originSaveJSON.data[i].save_task_status; assert.equal($(cells[0]).text(), requestDateStr); assert.equal($(cells[1]).text(), this.originSaveJSON.data[i].visit_type); let html = ''; if (saveStatus === 'succeeded') { let browseOriginUrl = `${this.Urls.browse_origin()}?origin_url=${encodeURIComponent(this.originSaveJSON.data[i].origin_url)}`; browseOriginUrl += `&timestamp=${encodeURIComponent(this.originSaveJSON.data[i].visit_date)}`; html += `${this.originSaveJSON.data[i].origin_url}`; } else { html += this.originSaveJSON.data[i].origin_url; } html += ` `; html += ''; assert.equal($(cells[2]).html(), html); assert.equal($(cells[3]).text(), this.originSaveJSON.data[i].save_request_status); assert.equal($(cells[4]).text(), saveStatus); ++i; } }); }); it('should not add timestamp to the browse origin URL is no visit date has been found', function() { const originUrl = 'https://git.example.org/example.git'; const saveRequestData = genOriginSaveResponse({ saveRequestStatus: 'accepted', originUrl: originUrl, saveTaskStatus: 'succeeded', visitDate: null, visitStatus: 'full' }); const saveRequestsListData = { 'recordsTotal': 1, 'draw': 2, 'recordsFiltered': 1, 'data': [saveRequestData] }; cy.intercept('/save/requests/list/**', {body: saveRequestsListData}) .as('saveRequestsList'); cy.get('#swh-origin-save-requests-list-tab').click(); cy.wait('@saveRequestsList'); cy.get('tbody tr').then(rows => { const firstRowCells = rows[0].cells; const browseOriginUrl = `${this.Urls.browse_origin()}?origin_url=${encodeURIComponent(originUrl)}`; const browseOriginLink = `${originUrl}`; expect($(firstRowCells[2]).html()).to.have.string(browseOriginLink); }); }); it('should display/close task info popover when clicking on the info button', function() { cy.intercept('/save/requests/list/**', {fixture: 'origin-save'}); cy.intercept('/save/task/info/**', {fixture: 'save-task-info'}); cy.get('#swh-origin-save-requests-list-tab').click(); cy.get('.swh-save-request-info') .eq(0) .click(); cy.get('.swh-save-request-info-popover') .should('be.visible'); cy.get('.swh-save-request-info') .eq(0) .click(); cy.get('.swh-save-request-info-popover') .should('not.exist'); }); it('should hide task info popover when clicking on the close button', function() { cy.intercept('/save/requests/list/**', {fixture: 'origin-save'}); cy.intercept('/save/task/info/**', {fixture: 'save-task-info'}); cy.get('#swh-origin-save-requests-list-tab').click(); cy.get('.swh-save-request-info') .eq(0) .click(); cy.get('.swh-save-request-info-popover') .should('be.visible'); cy.get('.swh-save-request-info-close') .click(); cy.get('.swh-save-request-info-popover') .should('not.exist'); }); it('should fill save request form when clicking on "Save again" button', function() { cy.intercept('/save/requests/list/**', {fixture: 'origin-save'}); cy.get('#swh-origin-save-requests-list-tab').click(); cy.get('.swh-save-origin-again') .eq(0) .click(); cy.get('tbody tr').eq(0).then(row => { const cells = row[0].cells; cy.get('#swh-input-visit-type') .should('have.value', $(cells[1]).text()); cy.get('#swh-input-origin-url') .should('have.value', $(cells[2]).text().slice(0, -1)); }); }); it('should select correct visit type if possible when clicking on "Save again" button', function() { const originUrl = 'https://gitlab.inria.fr/solverstack/maphys/maphys/'; const badVisitType = 'hg'; const goodVisitType = 'git'; cy.intercept('/save/requests/list/**', {fixture: 'origin-save'}); stubSaveRequest({requestUrl: this.Urls.api_1_save_origin(badVisitType, originUrl), visitType: badVisitType, saveRequestStatus: 'accepted', originUrl: originUrl, saveTaskStatus: 'failed', visitStatus: 'failed', responseStatus: 200, errorMessage: saveCodeMsg['accepted']}); makeOriginSaveRequest(badVisitType, originUrl); cy.get('#swh-origin-save-requests-list-tab').click(); cy.wait('@saveRequest').then(() => { cy.get('.swh-save-origin-again') .eq(0) .click(); cy.get('tbody tr').eq(0).then(row => { const cells = row[0].cells; cy.get('#swh-input-visit-type') .should('have.value', goodVisitType); cy.get('#swh-input-origin-url') .should('have.value', $(cells[2]).text().slice(0, -1)); }); }); }); it('should create save request for authenticated user', function() { - cy.adminLogin(); + cy.userLogin(); cy.visit(url); const originUrl = 'https://git.example.org/account/repo'; stubSaveRequest({requestUrl: this.Urls.api_1_save_origin('git', originUrl), saveRequestStatus: 'accepted', originUrl: origin.url, saveTaskStatus: 'not yet scheduled'}); makeOriginSaveRequest('git', originUrl); cy.wait('@saveRequest').then(() => { checkAlertVisible('success', saveCodeMsg['success']); }); }); + it('should not show user requests filter checkbox for anonymous users', function() { + cy.get('#swh-origin-save-requests-list-tab').click(); + cy.get('#swh-save-requests-user-filter').should('not.exist'); + }); + + it('should show user requests filter checkbox for authenticated users', function() { + cy.userLogin(); + cy.visit(url); + cy.get('#swh-origin-save-requests-list-tab').click(); + cy.get('#swh-save-requests-user-filter').should('exist'); + }); + + it('should show only user requests when filter is activated', function() { + cy.intercept('POST', '/api/1/origin/save/**') + .as('saveRequest'); + + const originAnonymousUser = 'https://some.git.server/project/'; + const originAuthUser = 'https://other.git.server/project/'; + + // anonymous user creates a save request + makeOriginSaveRequest('git', originAnonymousUser); + cy.wait('@saveRequest'); + + // authenticated user creates another save request + cy.userLogin(); + cy.visit(url); + makeOriginSaveRequest('git', originAuthUser); + cy.wait('@saveRequest'); + + // user requests filter checkbox should be in the DOM + cy.get('#swh-origin-save-requests-list-tab').click(); + cy.get('#swh-save-requests-user-filter').should('exist'); + + // check unfiltered user requests + cy.get('tbody tr').then(rows => { + expect(rows.length).to.eq(2); + expect($(rows[0].cells[2]).text()).to.contain(originAuthUser); + expect($(rows[1].cells[2]).text()).to.contain(originAnonymousUser); + }); + + // activate filter and check filtered user requests + cy.get('#swh-save-requests-user-filter') + .click({force: true}); + cy.get('tbody tr').then(rows => { + expect(rows.length).to.eq(1); + expect($(rows[0].cells[2]).text()).to.contain(originAuthUser); + }); + + // deactivate filter and check unfiltered user requests + cy.get('#swh-save-requests-user-filter') + .click({force: true}); + cy.get('tbody tr').then(rows => { + expect(rows.length).to.eq(2); + }); + + }); + }); diff --git a/cypress/support/index.js b/cypress/support/index.js index 9d789da8..b5652aef 100644 --- a/cypress/support/index.js +++ b/cypress/support/index.js @@ -1,145 +1,153 @@ /** * Copyright (C) 2019-2020 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ import '@cypress/code-coverage/support'; import {httpGetJson} from '../utils'; Cypress.Screenshot.defaults({ screenshotOnRunFailure: false }); Cypress.Commands.add('xhrShouldBeCalled', (alias, timesCalled) => { const testRoutes = cy.state('routes'); const aliasRoute = Cypress._.find(testRoutes, {alias}); expect(Object.keys(aliasRoute.requests || {})).to.have.length(timesCalled); }); -Cypress.Commands.add('adminLogin', () => { +function loginUser(username, password) { const url = '/admin/login/'; return cy.request({ url: url, method: 'GET' }).then(() => { cy.getCookie('sessionid').should('not.exist'); cy.getCookie('csrftoken').its('value').then((token) => { cy.request({ url: url, method: 'POST', form: true, followRedirect: false, body: { - username: 'admin', - password: 'admin', + username: username, + password: password, csrfmiddlewaretoken: token } }).then(() => { cy.getCookie('sessionid').should('exist'); return cy.getCookie('csrftoken').its('value'); }); }); }); +} + +Cypress.Commands.add('adminLogin', () => { + return loginUser('admin', 'admin'); +}); + +Cypress.Commands.add('userLogin', () => { + return loginUser('user', 'user'); }); before(function() { this.unarchivedRepo = { url: 'https://github.com/SoftwareHeritage/swh-web', type: 'git', revision: '7bf1b2f489f16253527807baead7957ca9e8adde', snapshot: 'd9829223095de4bb529790de8ba4e4813e38672d', rootDirectory: '7d887d96c0047a77e2e8c4ee9bb1528463677663', content: [{ sha1git: 'b203ec39300e5b7e97b6e20986183cbd0b797859' }] }; this.origin = [{ url: 'https://github.com/memononen/libtess2', type: 'git', content: [{ path: 'Source/tess.h' }, { path: 'premake4.lua' }], directory: [{ path: 'Source', id: 'cd19126d815470b28919d64b2a8e6a3e37f900dd' }], revisions: [], invalidSubDir: 'Source1' }, { url: 'https://github.com/wcoder/highlightjs-line-numbers.js', type: 'git', content: [{ path: 'src/highlightjs-line-numbers.js' }], directory: [], revisions: ['1c480a4573d2a003fc2630c21c2b25829de49972'], release: { name: 'v2.6.0', id: '6877028d6e5412780517d0bfa81f07f6c51abb41', directory: '5b61d50ef35ca9a4618a3572bde947b8cccf71ad' } }]; const getMetadataForOrigin = async originUrl => { const originVisitsApiUrl = this.Urls.api_1_origin_visits(originUrl); const originVisits = await httpGetJson(originVisitsApiUrl); const lastVisit = originVisits[0]; const snapshotApiUrl = this.Urls.api_1_snapshot(lastVisit.snapshot); const lastOriginSnapshot = await httpGetJson(snapshotApiUrl); let revision = lastOriginSnapshot.branches.HEAD.target; if (lastOriginSnapshot.branches.HEAD.target_type === 'alias') { revision = lastOriginSnapshot.branches[revision].target; } const revisionApiUrl = this.Urls.api_1_revision(revision); const lastOriginHeadRevision = await httpGetJson(revisionApiUrl); return { 'directory': lastOriginHeadRevision.directory, 'revision': lastOriginHeadRevision.id, 'snapshot': lastOriginSnapshot.id }; }; cy.visit('/').window().then(async win => { this.Urls = win.Urls; for (let origin of this.origin) { const metadata = await getMetadataForOrigin(origin.url); const directoryApiUrl = this.Urls.api_1_directory(metadata.directory); origin.dirContent = await httpGetJson(directoryApiUrl); origin.rootDirectory = metadata.directory; origin.revisions.push(metadata.revision); origin.snapshot = metadata.snapshot; for (let content of origin.content) { const contentPathApiUrl = this.Urls.api_1_directory(origin.rootDirectory, content.path); const contentMetaData = await httpGetJson(contentPathApiUrl); content.name = contentMetaData.name.split('/').slice(-1)[0]; content.sha1git = contentMetaData.target; content.directory = contentMetaData.dir_id; content.rawFilePath = this.Urls.browse_content_raw(`sha1_git:${content.sha1git}`) + `?filename=${encodeURIComponent(content.name)}`; cy.request(content.rawFilePath) .then((response) => { const fileText = response.body; const fileLines = fileText.split('\n'); content.numberLines = fileLines.length; // If last line is empty its not shown if (!fileLines[content.numberLines - 1]) content.numberLines -= 1; }); } } }); }); diff --git a/swh/web/common/migrations/0011_saveoriginrequest_user_ids.py b/swh/web/common/migrations/0011_saveoriginrequest_user_ids.py new file mode 100644 index 00000000..f08b181c --- /dev/null +++ b/swh/web/common/migrations/0011_saveoriginrequest_user_ids.py @@ -0,0 +1,22 @@ +# Copyright (C) 2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("swh_web_common", "0010_saveoriginrequest_user_id"), + ] + + operations = [ + migrations.RemoveField(model_name="saveoriginrequest", name="user_id",), + migrations.AddField( + model_name="saveoriginrequest", + name="user_ids", + field=models.TextField(null=True), + ), + ] diff --git a/swh/web/common/models.py b/swh/web/common/models.py index f69fc60f..380ecd77 100644 --- a/swh/web/common/models.py +++ b/swh/web/common/models.py @@ -1,134 +1,133 @@ # Copyright (C) 2018-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.db import models from swh.web.common.typing import SaveOriginRequestInfo class SaveAuthorizedOrigin(models.Model): """ Model table holding origin urls authorized to be loaded into the archive. """ url = models.CharField(max_length=200, null=False) class Meta: app_label = "swh_web_common" db_table = "save_authorized_origin" indexes = [models.Index(fields=["url"])] def __str__(self): return self.url class SaveUnauthorizedOrigin(models.Model): """ Model table holding origin urls not authorized to be loaded into the archive. """ url = models.CharField(max_length=200, null=False) class Meta: app_label = "swh_web_common" db_table = "save_unauthorized_origin" indexes = [models.Index(fields=["url"])] def __str__(self): return self.url SAVE_REQUEST_ACCEPTED = "accepted" SAVE_REQUEST_REJECTED = "rejected" SAVE_REQUEST_PENDING = "pending" SAVE_REQUEST_STATUS = [ (SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_ACCEPTED), (SAVE_REQUEST_REJECTED, SAVE_REQUEST_REJECTED), (SAVE_REQUEST_PENDING, SAVE_REQUEST_PENDING), ] SAVE_TASK_NOT_CREATED = "not created" SAVE_TASK_NOT_YET_SCHEDULED = "not yet scheduled" SAVE_TASK_SCHEDULED = "scheduled" SAVE_TASK_SUCCEEDED = "succeeded" SAVE_TASK_FAILED = "failed" SAVE_TASK_RUNNING = "running" SAVE_TASK_STATUS = [ (SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_CREATED), (SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED), (SAVE_TASK_SCHEDULED, SAVE_TASK_SCHEDULED), (SAVE_TASK_SUCCEEDED, SAVE_TASK_SUCCEEDED), (SAVE_TASK_FAILED, SAVE_TASK_FAILED), (SAVE_TASK_RUNNING, SAVE_TASK_RUNNING), ] VISIT_STATUS_CREATED = "created" VISIT_STATUS_ONGOING = "ongoing" VISIT_STATUS_FULL = "full" VISIT_STATUS_PARTIAL = "partial" VISIT_STATUS_NOT_FOUND = "not_found" VISIT_STATUS_FAILED = "failed" VISIT_STATUSES = [ (VISIT_STATUS_CREATED, VISIT_STATUS_CREATED), (VISIT_STATUS_ONGOING, VISIT_STATUS_ONGOING), (VISIT_STATUS_FULL, VISIT_STATUS_FULL), (VISIT_STATUS_PARTIAL, VISIT_STATUS_PARTIAL), (VISIT_STATUS_NOT_FOUND, VISIT_STATUS_NOT_FOUND), (VISIT_STATUS_FAILED, VISIT_STATUS_FAILED), ] class SaveOriginRequest(models.Model): """ Model table holding all the save origin requests issued by users. """ id = models.BigAutoField(primary_key=True) request_date = models.DateTimeField(auto_now_add=True) visit_type = models.CharField(max_length=200, null=False) visit_status = models.TextField(choices=VISIT_STATUSES, null=True) origin_url = models.CharField(max_length=200, null=False) status = models.TextField(choices=SAVE_REQUEST_STATUS, default=SAVE_REQUEST_PENDING) loading_task_id = models.IntegerField(default=-1) visit_date = models.DateTimeField(null=True) loading_task_status = models.TextField( choices=SAVE_TASK_STATUS, default=SAVE_TASK_NOT_CREATED ) - # user integer ids computed from keycloak subs are too large - # to be stored in SQLite so we store them as strings - user_id = models.CharField(max_length=200, null=True) + # store ids of users that submitted the request as string list + user_ids = models.TextField(null=True) class Meta: app_label = "swh_web_common" db_table = "save_origin_request" ordering = ["-id"] indexes = [models.Index(fields=["origin_url", "status"])] def to_dict(self) -> SaveOriginRequestInfo: """Map the request save model object to a json serializable dict. Returns: The corresponding SaveOriginRequetsInfo json serializable dict. """ visit_date = self.visit_date return SaveOriginRequestInfo( id=self.id, origin_url=self.origin_url, visit_type=self.visit_type, save_request_date=self.request_date.isoformat(), save_request_status=self.status, save_task_status=self.loading_task_status, visit_status=self.visit_status, visit_date=visit_date.isoformat() if visit_date else None, loading_task_id=self.loading_task_id, ) def __str__(self) -> str: return str(self.to_dict()) diff --git a/swh/web/common/origin_save.py b/swh/web/common/origin_save.py index 4fd27612..bf92262f 100644 --- a/swh/web/common/origin_save.py +++ b/swh/web/common/origin_save.py @@ -1,813 +1,819 @@ # Copyright (C) 2018-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from bisect import bisect_right from datetime import datetime, timedelta, timezone from itertools import product import json import logging from typing import Any, Dict, List, Optional, Tuple from prometheus_client import Gauge import requests import sentry_sdk from django.core.exceptions import ObjectDoesNotExist, ValidationError from django.core.validators import URLValidator from django.db.models import QuerySet from django.utils.html import escape from swh.scheduler.utils import create_oneshot_task_dict from swh.web import config from swh.web.common import archive from swh.web.common.exc import BadInputExc, ForbiddenExc, NotFoundExc from swh.web.common.models import ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_PENDING, SAVE_REQUEST_REJECTED, SAVE_TASK_FAILED, SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_RUNNING, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, SaveAuthorizedOrigin, SaveOriginRequest, SaveUnauthorizedOrigin, ) from swh.web.common.origin_visits import get_origin_visits from swh.web.common.typing import ( OriginExistenceCheckInfo, OriginInfo, SaveOriginRequestInfo, ) from swh.web.common.utils import SWH_WEB_METRICS_REGISTRY, parse_iso8601_date_to_utc scheduler = config.scheduler() logger = logging.getLogger(__name__) def get_origin_save_authorized_urls() -> List[str]: """ Get the list of origin url prefixes authorized to be immediately loaded into the archive (whitelist). Returns: list: The list of authorized origin url prefix """ return [origin.url for origin in SaveAuthorizedOrigin.objects.all()] def get_origin_save_unauthorized_urls() -> List[str]: """ Get the list of origin url prefixes forbidden to be loaded into the archive (blacklist). Returns: list: the list of unauthorized origin url prefix """ return [origin.url for origin in SaveUnauthorizedOrigin.objects.all()] def can_save_origin(origin_url: str, bypass_pending_review: bool = False) -> str: """ Check if a software origin can be saved into the archive. Based on the origin url, the save request will be either: * immediately accepted if the url is whitelisted * rejected if the url is blacklisted * put in pending state for manual review otherwise Args: origin_url (str): the software origin url to check Returns: str: the origin save request status, either **accepted**, **rejected** or **pending** """ # origin url may be blacklisted for url_prefix in get_origin_save_unauthorized_urls(): if origin_url.startswith(url_prefix): return SAVE_REQUEST_REJECTED # if the origin url is in the white list, it can be immediately saved for url_prefix in get_origin_save_authorized_urls(): if origin_url.startswith(url_prefix): return SAVE_REQUEST_ACCEPTED # otherwise, the origin url needs to be manually verified if the user # that submitted it does not have special permission if bypass_pending_review: # mark the origin URL as trusted in that case SaveAuthorizedOrigin.objects.get_or_create(url=origin_url) return SAVE_REQUEST_ACCEPTED else: return SAVE_REQUEST_PENDING # map visit type to scheduler task # TODO: do not hardcode the task name here (T1157) _visit_type_task = {"git": "load-git", "hg": "load-hg", "svn": "load-svn"} # map scheduler task status to origin save status _save_task_status = { "next_run_not_scheduled": SAVE_TASK_NOT_YET_SCHEDULED, "next_run_scheduled": SAVE_TASK_SCHEDULED, "completed": SAVE_TASK_SUCCEEDED, "disabled": SAVE_TASK_FAILED, } # map scheduler task_run status to origin save status _save_task_run_status = { "scheduled": SAVE_TASK_SCHEDULED, "started": SAVE_TASK_RUNNING, "eventful": SAVE_TASK_SUCCEEDED, "uneventful": SAVE_TASK_SUCCEEDED, "failed": SAVE_TASK_FAILED, "permfailed": SAVE_TASK_FAILED, "lost": SAVE_TASK_FAILED, } def get_savable_visit_types() -> List[str]: """ Get the list of visit types that can be performed through a save request. Returns: list: the list of saveable visit types """ return sorted(list(_visit_type_task.keys())) def _check_visit_type_savable(visit_type: str) -> None: allowed_visit_types = ", ".join(get_savable_visit_types()) if visit_type not in _visit_type_task: raise BadInputExc( "Visit of type %s can not be saved! " "Allowed types are the following: %s" % (visit_type, allowed_visit_types) ) _validate_url = URLValidator(schemes=["http", "https", "svn", "git"]) def _check_origin_url_valid(origin_url: str) -> None: try: _validate_url(origin_url) except ValidationError: raise BadInputExc( "The provided origin url (%s) is not valid!" % escape(origin_url) ) def origin_exists(origin_url: str) -> OriginExistenceCheckInfo: """Check the origin url for existence. If it exists, extract some more useful information on the origin. """ resp = requests.head(origin_url) exists = resp.ok content_length: Optional[int] = None last_modified: Optional[str] = None if exists: size_ = resp.headers.get("Content-Length") content_length = int(size_) if size_ else None last_modified = resp.headers.get("Last-Modified") return OriginExistenceCheckInfo( origin_url=origin_url, exists=exists, last_modified=last_modified, content_length=content_length, ) def _check_origin_exists(origin_url: str) -> None: """Ensure the origin exists, if not raise an explicit message.""" check = origin_exists(origin_url) if not check["exists"]: raise BadInputExc( f"The provided origin url ({escape(origin_url)}) does not exist!" ) def _get_visit_info_for_save_request( save_request: SaveOriginRequest, ) -> Tuple[Optional[datetime], Optional[str]]: """Retrieve visit information out of a save request Args: save_request: Input save origin request to retrieve information for. Returns: Tuple of (visit date, optional visit status) for such save request origin """ visit_date = None visit_status = None time_now = datetime.now(tz=timezone.utc) time_delta = time_now - save_request.request_date # stop trying to find a visit date one month after save request submission # as those requests to storage are expensive and associated loading task # surely ended up with errors if time_delta.days <= 30: try: origin_info = archive.lookup_origin(OriginInfo(url=save_request.origin_url)) origin_visits = get_origin_visits(origin_info) visit_dates = [parse_iso8601_date_to_utc(v["date"]) for v in origin_visits] i = bisect_right(visit_dates, save_request.request_date) if i != len(visit_dates): visit_date = visit_dates[i] visit_status = origin_visits[i]["status"] if visit_status not in ("full", "partial", "not_found"): visit_date = None except Exception as exc: sentry_sdk.capture_exception(exc) return visit_date, visit_status def _check_visit_update_status( save_request: SaveOriginRequest, save_task_status: str ) -> Tuple[Optional[datetime], str]: """Given a save request and a save task status, determine whether a save request was successful or failed. Args: save_request: Input save origin request to retrieve information for. Returns: Tuple of (optional visit date, save task status) for such save request origin """ visit_date, visit_status = _get_visit_info_for_save_request(save_request) save_request.visit_date = visit_date save_request.visit_status = visit_status if visit_date and visit_status in ("full", "partial"): # visit has been performed, mark the saving task as succeeded save_task_status = SAVE_TASK_SUCCEEDED elif visit_status in ("created", "ongoing"): # visit is currently running save_task_status = SAVE_TASK_RUNNING elif visit_status in ("not_found", "failed"): save_task_status = SAVE_TASK_FAILED else: time_now = datetime.now(tz=timezone.utc) time_delta = time_now - save_request.request_date # consider the task as failed if it is still in scheduled state # 30 days after its submission if time_delta.days > 30: save_task_status = SAVE_TASK_FAILED return visit_date, save_task_status def _update_save_request_info( save_request: SaveOriginRequest, task: Optional[Dict[str, Any]] = None, task_run: Optional[Dict[str, Any]] = None, ) -> SaveOriginRequestInfo: """Update save request information out of task and task_run information. Args: save_request: Save request task: Associated scheduler task information about the save request task_run: Most recent run occurrence of the associated task Returns: Summary of the save request information updated. """ must_save = False visit_date = save_request.visit_date # save task still in scheduler db if task: save_task_status = _save_task_status[task["status"]] if task_run: save_task_status = _save_task_run_status[task_run["status"]] # Consider request from which a visit date has already been found # as succeeded to avoid retrieving it again if save_task_status == SAVE_TASK_SCHEDULED and visit_date: save_task_status = SAVE_TASK_SUCCEEDED if ( save_task_status in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEEDED) and not visit_date ): visit_date, visit_status = _get_visit_info_for_save_request(save_request) save_request.visit_date = visit_date save_request.visit_status = visit_status if visit_status in ("failed", "not_found"): save_task_status = SAVE_TASK_FAILED must_save = True # Check tasks still marked as scheduled / not yet scheduled if save_task_status in (SAVE_TASK_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED): visit_date, save_task_status = _check_visit_update_status( save_request, save_task_status ) # save task may have been archived else: save_task_status = save_request.loading_task_status if save_task_status in (SAVE_TASK_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED): visit_date, save_task_status = _check_visit_update_status( save_request, save_task_status ) else: save_task_status = save_request.loading_task_status if ( # avoid to override final loading task status when already found # as visit status is no longer checked once a visit date has been found save_request.loading_task_status not in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEEDED) and save_request.loading_task_status != save_task_status ): save_request.loading_task_status = save_task_status must_save = True if must_save: save_request.save() return save_request.to_dict() def create_save_origin_request( visit_type: str, origin_url: str, bypass_pending_review: bool = False, user_id: Optional[int] = None, ) -> SaveOriginRequestInfo: """ Create a loading task to save a software origin into the archive. This function aims to create a software origin loading task trough the use of the swh-scheduler component. First, some checks are performed to see if the visit type and origin url are valid but also if the the save request can be accepted. If those checks passed, the loading task is then created. Otherwise, the save request is put in pending or rejected state. All the submitted save requests are logged into the swh-web database to keep track of them. Args: visit_type: the type of visit to perform (e.g git, hg, svn, ...) origin_url: the url of the origin to save Raises: BadInputExc: the visit type or origin url is invalid or inexistent ForbiddenExc: the provided origin url is blacklisted Returns: dict: A dict describing the save request with the following keys: * **visit_type**: the type of visit to perform * **origin_url**: the url of the origin * **save_request_date**: the date the request was submitted * **save_request_status**: the request status, either **accepted**, **rejected** or **pending** * **save_task_status**: the origin loading task status, either **not created**, **not yet scheduled**, **scheduled**, **succeed** or **failed** """ _check_visit_type_savable(visit_type) _check_origin_url_valid(origin_url) # if all checks passed so far, we can try and save the origin save_request_status = can_save_origin(origin_url, bypass_pending_review) task = None # if the origin save request is accepted, create a scheduler # task to load it into the archive if save_request_status == SAVE_REQUEST_ACCEPTED: # create a task with high priority kwargs = { "priority": "high", "url": origin_url, } sor = None # get list of previously sumitted save requests current_sors = list( SaveOriginRequest.objects.filter( visit_type=visit_type, origin_url=origin_url ) ) can_create_task = False # if no save requests previously submitted, create the scheduler task if not current_sors: can_create_task = True else: # get the latest submitted save request sor = current_sors[0] # if it was in pending state, we need to create the scheduler task # and update the save request info in the database if sor.status == SAVE_REQUEST_PENDING: can_create_task = True # a task has already been created to load the origin elif sor.loading_task_id != -1: # get the scheduler task and its status tasks = scheduler.get_tasks([sor.loading_task_id]) task = tasks[0] if tasks else None task_runs = scheduler.get_task_runs([sor.loading_task_id]) task_run = task_runs[0] if task_runs else None save_request_info = _update_save_request_info(sor, task, task_run) task_status = save_request_info["save_task_status"] # create a new scheduler task only if the previous one has been # already executed if ( task_status == SAVE_TASK_FAILED or task_status == SAVE_TASK_SUCCEEDED ): can_create_task = True sor = None else: can_create_task = False if can_create_task: # effectively create the scheduler task task_dict = create_oneshot_task_dict(_visit_type_task[visit_type], **kwargs) task = scheduler.create_tasks([task_dict])[0] # pending save request has been accepted if sor: sor.status = SAVE_REQUEST_ACCEPTED sor.loading_task_id = task["id"] sor.save() else: sor = SaveOriginRequest.objects.create( visit_type=visit_type, origin_url=origin_url, status=save_request_status, loading_task_id=task["id"], - user_id=str(user_id) if user_id else None, + user_ids=f'"{user_id}"' if user_id else None, ) # save request must be manually reviewed for acceptation elif save_request_status == SAVE_REQUEST_PENDING: # check if there is already such a save request already submitted, # no need to add it to the database in that case try: sor = SaveOriginRequest.objects.get( visit_type=visit_type, origin_url=origin_url, status=save_request_status ) + user_ids = sor.user_ids if sor.user_ids is not None else "" + if user_id is not None and f'"{user_id}"' not in user_ids: + # update user ids list + sor.user_ids = f'{sor.user_ids},"{user_id}"' + sor.save() + # if not add it to the database except ObjectDoesNotExist: sor = SaveOriginRequest.objects.create( visit_type=visit_type, origin_url=origin_url, status=save_request_status, - user_id=str(user_id) if user_id else None, + user_ids=f'"{user_id}"' if user_id else None, ) # origin can not be saved as its url is blacklisted, # log the request to the database anyway else: sor = SaveOriginRequest.objects.create( visit_type=visit_type, origin_url=origin_url, status=save_request_status, - user_id=str(user_id) if user_id else None, + user_ids=f'"{user_id}"' if user_id else None, ) if save_request_status == SAVE_REQUEST_REJECTED: raise ForbiddenExc( ( 'The "save code now" request has been rejected ' "because the provided origin url is blacklisted." ) ) assert sor is not None return _update_save_request_info(sor, task) def update_save_origin_requests_from_queryset( requests_queryset: QuerySet, ) -> List[SaveOriginRequestInfo]: """Update all save requests from a SaveOriginRequest queryset, update their status in db and return the list of impacted save_requests. Args: requests_queryset: input SaveOriginRequest queryset Returns: list: A list of save origin request info dicts as described in :func:`swh.web.common.origin_save.create_save_origin_request` """ task_ids = [] for sor in requests_queryset: task_ids.append(sor.loading_task_id) save_requests = [] if task_ids: tasks = scheduler.get_tasks(task_ids) tasks = {task["id"]: task for task in tasks} task_runs = scheduler.get_task_runs(tasks) task_runs = {task_run["task"]: task_run for task_run in task_runs} for sor in requests_queryset: sr_dict = _update_save_request_info( sor, tasks.get(sor.loading_task_id), task_runs.get(sor.loading_task_id), ) save_requests.append(sr_dict) return save_requests def refresh_save_origin_request_statuses() -> List[SaveOriginRequestInfo]: """Refresh non-terminal save origin requests (SOR) in the backend. Non-terminal SOR are requests whose status is **accepted** and their task status are either **created**, **not yet scheduled**, **scheduled** or **running**. This shall compute this list of SOR, checks their status in the scheduler and optionally elasticsearch for their current status. Then update those in db. Finally, this returns the refreshed information on those SOR. """ non_terminal_statuses = ( SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_RUNNING, SAVE_TASK_SCHEDULED, ) save_requests = SaveOriginRequest.objects.filter( status=SAVE_REQUEST_ACCEPTED, loading_task_status__in=non_terminal_statuses ) # update save request statuses return ( update_save_origin_requests_from_queryset(save_requests) if save_requests.count() > 0 else [] ) def get_save_origin_requests( visit_type: str, origin_url: str ) -> List[SaveOriginRequestInfo]: """ Get all save requests for a given software origin. Args: visit_type: the type of visit origin_url: the url of the origin Raises: BadInputExc: the visit type or origin url is invalid swh.web.common.exc.NotFoundExc: no save requests can be found for the given origin Returns: list: A list of save origin requests dict as described in :func:`swh.web.common.origin_save.create_save_origin_request` """ _check_visit_type_savable(visit_type) _check_origin_url_valid(origin_url) sors = SaveOriginRequest.objects.filter( visit_type=visit_type, origin_url=origin_url ) if sors.count() == 0: raise NotFoundExc( f"No save requests found for visit of type {visit_type} " f"on origin with url {origin_url}." ) return update_save_origin_requests_from_queryset(sors) def get_save_origin_task_info( save_request_id: int, full_info: bool = True ) -> Dict[str, Any]: """ Get detailed information about an accepted save origin request and its associated loading task. If the associated loading task info is archived and removed from the scheduler database, returns an empty dictionary. Args: save_request_id: identifier of a save origin request full_info: whether to return detailed info for staff users Returns: A dictionary with the following keys: - **type**: loading task type - **arguments**: loading task arguments - **id**: loading task database identifier - **backend_id**: loading task celery identifier - **scheduled**: loading task scheduling date - **ended**: loading task termination date - **status**: loading task execution status - **visit_status**: Actual visit status Depending on the availability of the task logs in the elasticsearch cluster of Software Heritage, the returned dictionary may also contain the following keys: - **name**: associated celery task name - **message**: relevant log message from task execution - **duration**: task execution time (only if it succeeded) - **worker**: name of the worker that executed the task """ try: save_request = SaveOriginRequest.objects.get(id=save_request_id) except ObjectDoesNotExist: return {} task = scheduler.get_tasks([save_request.loading_task_id]) task = task[0] if task else None if task is None: return {} task_run = scheduler.get_task_runs([task["id"]]) task_run = task_run[0] if task_run else None if task_run is None: return {} task_run["type"] = task["type"] task_run["arguments"] = task["arguments"] task_run["id"] = task_run["task"] del task_run["task"] del task_run["metadata"] # Enrich the task run with the loading visit status task_run["visit_status"] = save_request.visit_status es_workers_index_url = config.get_config()["es_workers_index_url"] if not es_workers_index_url: return task_run es_workers_index_url += "/_search" if save_request.visit_date: min_ts = save_request.visit_date max_ts = min_ts + timedelta(days=7) else: min_ts = save_request.request_date max_ts = min_ts + timedelta(days=30) min_ts_unix = int(min_ts.timestamp()) * 1000 max_ts_unix = int(max_ts.timestamp()) * 1000 save_task_status = _save_task_status[task["status"]] priority = "3" if save_task_status == SAVE_TASK_FAILED else "6" query = { "bool": { "must": [ {"match_phrase": {"priority": {"query": priority}}}, {"match_phrase": {"swh_task_id": {"query": task_run["backend_id"]}}}, { "range": { "@timestamp": { "gte": min_ts_unix, "lte": max_ts_unix, "format": "epoch_millis", } } }, ] } } try: response = requests.post( es_workers_index_url, json={"query": query, "sort": ["@timestamp"]}, timeout=30, ) results = json.loads(response.text) if results["hits"]["total"]["value"] >= 1: task_run_info = results["hits"]["hits"][-1]["_source"] if "swh_logging_args_runtime" in task_run_info: duration = task_run_info["swh_logging_args_runtime"] task_run["duration"] = duration if "message" in task_run_info: task_run["message"] = task_run_info["message"] if "swh_logging_args_name" in task_run_info: task_run["name"] = task_run_info["swh_logging_args_name"] elif "swh_task_name" in task_run_info: task_run["name"] = task_run_info["swh_task_name"] if "hostname" in task_run_info: task_run["worker"] = task_run_info["hostname"] elif "host" in task_run_info: task_run["worker"] = task_run_info["host"] except Exception as exc: logger.warning("Request to Elasticsearch failed\n%s", exc) sentry_sdk.capture_exception(exc) if not full_info: for field in ("id", "backend_id", "worker"): # remove some staff only fields task_run.pop(field, None) if "message" in task_run and "Loading failure" in task_run["message"]: # hide traceback for non staff users, only display exception message_lines = task_run["message"].split("\n") message = "" for line in message_lines: if line.startswith("Traceback"): break message += f"{line}\n" message += message_lines[-1] task_run["message"] = message return task_run SUBMITTED_SAVE_REQUESTS_METRIC = "swh_web_submitted_save_requests" _submitted_save_requests_gauge = Gauge( name=SUBMITTED_SAVE_REQUESTS_METRIC, documentation="Number of submitted origin save requests", labelnames=["status", "visit_type"], registry=SWH_WEB_METRICS_REGISTRY, ) ACCEPTED_SAVE_REQUESTS_METRIC = "swh_web_accepted_save_requests" _accepted_save_requests_gauge = Gauge( name=ACCEPTED_SAVE_REQUESTS_METRIC, documentation="Number of accepted origin save requests", labelnames=["load_task_status", "visit_type"], registry=SWH_WEB_METRICS_REGISTRY, ) # Metric on the delay of save code now request per status and visit_type. This is the # time difference between the save code now is requested and the time it got ingested. ACCEPTED_SAVE_REQUESTS_DELAY_METRIC = "swh_web_save_requests_delay_seconds" _accepted_save_requests_delay_gauge = Gauge( name=ACCEPTED_SAVE_REQUESTS_DELAY_METRIC, documentation="Save Requests Duration", labelnames=["load_task_status", "visit_type"], registry=SWH_WEB_METRICS_REGISTRY, ) def compute_save_requests_metrics() -> None: """Compute Prometheus metrics related to origin save requests: - Number of submitted origin save requests - Number of accepted origin save requests - Save Code Now requests delay between request time and actual time of ingestion """ request_statuses = ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_REJECTED, SAVE_REQUEST_PENDING, ) load_task_statuses = ( SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED, SAVE_TASK_RUNNING, ) visit_types = get_savable_visit_types() labels_set = product(request_statuses, visit_types) for labels in labels_set: _submitted_save_requests_gauge.labels(*labels).set(0) labels_set = product(load_task_statuses, visit_types) for labels in labels_set: _accepted_save_requests_gauge.labels(*labels).set(0) duration_load_task_statuses = ( SAVE_TASK_FAILED, SAVE_TASK_SUCCEEDED, ) for labels in product(duration_load_task_statuses, visit_types): _accepted_save_requests_delay_gauge.labels(*labels).set(0) for sor in SaveOriginRequest.objects.all(): if sor.status == SAVE_REQUEST_ACCEPTED: _accepted_save_requests_gauge.labels( load_task_status=sor.loading_task_status, visit_type=sor.visit_type, ).inc() _submitted_save_requests_gauge.labels( status=sor.status, visit_type=sor.visit_type ).inc() if ( sor.loading_task_status in (SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED) and sor.visit_date is not None and sor.request_date is not None ): delay = sor.visit_date.timestamp() - sor.request_date.timestamp() _accepted_save_requests_delay_gauge.labels( load_task_status=sor.loading_task_status, visit_type=sor.visit_type, ).inc(delay) diff --git a/swh/web/misc/origin_save.py b/swh/web/misc/origin_save.py index 993d7b41..70b4b3a7 100644 --- a/swh/web/misc/origin_save.py +++ b/swh/web/misc/origin_save.py @@ -1,93 +1,99 @@ # Copyright (C) 2018-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.conf.urls import url from django.core.paginator import Paginator from django.db.models import Q from django.http import JsonResponse from django.shortcuts import render from swh.web.common.models import SaveOriginRequest from swh.web.common.origin_save import ( get_savable_visit_types, get_save_origin_task_info, ) def _origin_save_view(request): return render( request, "misc/origin-save.html", {"heading": ("Request the saving of a software origin into the archive")}, ) def _visit_save_types_list(request): visit_types = get_savable_visit_types() return JsonResponse(visit_types, safe=False) def _origin_save_requests_list(request, status): if status != "all": save_requests = SaveOriginRequest.objects.filter(status=status) else: save_requests = SaveOriginRequest.objects.all() table_data = {} table_data["recordsTotal"] = save_requests.count() table_data["draw"] = int(request.GET["draw"]) search_value = request.GET["search[value]"] column_order = request.GET["order[0][column]"] field_order = request.GET["columns[%s][name]" % column_order] order_dir = request.GET["order[0][dir]"] if order_dir == "desc": field_order = "-" + field_order save_requests = save_requests.order_by(field_order) length = int(request.GET["length"]) page = int(request.GET["start"]) / length + 1 if search_value: save_requests = save_requests.filter( Q(status__icontains=search_value) | Q(loading_task_status__icontains=search_value) | Q(visit_type__icontains=search_value) | Q(origin_url__icontains=search_value) ) + if ( + int(request.GET.get("user_requests_only", "0")) + and request.user.is_authenticated + ): + save_requests = save_requests.filter(user_ids__contains=f'"{request.user.id}"') + table_data["recordsFiltered"] = save_requests.count() paginator = Paginator(save_requests, length) table_data["data"] = [sor.to_dict() for sor in paginator.page(page).object_list] return JsonResponse(table_data) def _save_origin_task_info(request, save_request_id): request_info = get_save_origin_task_info( save_request_id, full_info=request.user.is_staff ) for date_field in ("scheduled", "started", "ended"): if date_field in request_info and request_info[date_field] is not None: request_info[date_field] = request_info[date_field].isoformat() return JsonResponse(request_info) urlpatterns = [ url(r"^save/$", _origin_save_view, name="origin-save"), url(r"^save/types/list/$", _visit_save_types_list, name="origin-save-types-list"), url( r"^save/requests/list/(?P.+)/$", _origin_save_requests_list, name="origin-save-requests-list", ), url( r"^save/task/info/(?P.+)/", _save_origin_task_info, name="origin-save-task-info", ), ] diff --git a/swh/web/templates/misc/origin-save.html b/swh/web/templates/misc/origin-save.html index 3f85cc21..af4a7fa9 100644 --- a/swh/web/templates/misc/origin-save.html +++ b/swh/web/templates/misc/origin-save.html @@ -1,126 +1,129 @@ {% extends "../layout.html" %} {% comment %} Copyright (C) 2018-2019 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% load render_bundle from webpack_loader %} {% load static %} {% block title %}{{ heading }} – Software Heritage archive{% endblock %} {% block header %} {% render_bundle 'save' %} {% endblock %} {% block navbar-content %}

Save code now

{% endblock %} {% block content %}

You can contribute to extend the content of the Software Heritage archive by submitting an origin save request. To do so, fill the required info in the form below:

{% csrf_token %}
The origin type must be specified
The origin url is not valid or does not reference a code repository

A "Save code now" request takes the following parameters:

  • Origin type: the type of version control system the software origin is using. Currently, the supported types are:
  • Origin url: the url of the remote repository for the software origin.
    In order to avoid saving errors from Software Heritage, you should provide the clone/checkout url as given by the provider hosting the software origin.
    It can easily be found in the web interface used to browse the software origin.
    For instance, if you want to save a git origin into the archive, you should check that the command $ git clone <origin_url>
    does not return an error before submitting a request.

Once submitted, your save request can either be:

  • accepted: a visit to the provided origin will then be scheduled by Software Heritage in order to load its content into the archive as soon as possible
  • rejected: the provided origin url is blacklisted and no visit will be scheduled
  • put in pending state: a manual review will then be performed in order to determine if the origin can be safely loaded or not into the archive

Once a save request has been accepted, you can follow its current status in the submitted save requests list. +
+ If you submitted requests while authenticated, you will be able + to only display your own requests.

Date Type Url Request Status Info

{% endblock %} \ No newline at end of file diff --git a/swh/web/tests/api/views/test_origin_save.py b/swh/web/tests/api/views/test_origin_save.py index 5034beb2..9573d33d 100644 --- a/swh/web/tests/api/views/test_origin_save.py +++ b/swh/web/tests/api/views/test_origin_save.py @@ -1,511 +1,530 @@ # Copyright (C) 2018-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime, timedelta import pytest +from django.contrib.auth.models import User from django.core.exceptions import ObjectDoesNotExist from django.utils import timezone from swh.web.auth.utils import SWH_AMBASSADOR_PERMISSION from swh.web.common.models import ( SAVE_REQUEST_ACCEPTED, SAVE_REQUEST_PENDING, SAVE_REQUEST_REJECTED, SAVE_TASK_FAILED, SAVE_TASK_NOT_CREATED, SAVE_TASK_NOT_YET_SCHEDULED, SAVE_TASK_SCHEDULED, SAVE_TASK_SUCCEEDED, VISIT_STATUS_FAILED, VISIT_STATUS_FULL, SaveAuthorizedOrigin, SaveOriginRequest, SaveUnauthorizedOrigin, ) from swh.web.common.typing import OriginExistenceCheckInfo from swh.web.common.utils import reverse from swh.web.settings.tests import save_origin_rate_post from swh.web.tests.utils import ( check_api_get_responses, check_api_post_response, check_api_post_responses, ) pytestmark = pytest.mark.django_db @pytest.fixture(autouse=True) def populated_db(): SaveAuthorizedOrigin.objects.create(url="https://github.com/"), SaveAuthorizedOrigin.objects.create(url="https://gitlab.com/"), SaveUnauthorizedOrigin.objects.create(url="https://github.com/user/illegal_repo") SaveUnauthorizedOrigin.objects.create(url="https://gitlab.com/user_to_exclude") def test_invalid_visit_type(api_client): url = reverse( "api-1-save-origin", url_args={ "visit_type": "foo", "origin_url": "https://github.com/torvalds/linux", }, ) check_api_get_responses(api_client, url, status_code=400) def test_invalid_origin_url(api_client): url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": "bar"} ) check_api_get_responses(api_client, url, status_code=400) def check_created_save_request_status( api_client, mocker, origin_url, expected_request_status, scheduler_task_status=None, scheduler_task_run_status=None, expected_task_status=None, visit_date=None, ): mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler") mock_origin_exists = mocker.patch("swh.web.common.origin_save.origin_exists") mock_origin_exists.return_value = OriginExistenceCheckInfo( origin_url=origin_url, exists=True, last_modified=None, content_length=None ) if scheduler_task_status is None: mock_scheduler.get_tasks.return_value = [] else: mock_scheduler.get_tasks.return_value = [ { "priority": "high", "policy": "oneshot", "type": "load-git", "arguments": {"kwargs": {"repo_url": origin_url}, "args": []}, "status": scheduler_task_status, "id": 1, } ] if scheduler_task_run_status is None: mock_scheduler.get_task_runs.return_value = [] else: mock_scheduler.get_task_runs.return_value = [ { "backend_id": "f00c712c-e820-41ce-a07c-9bf8df914205", "ended": datetime.now(tz=timezone.utc) + timedelta(minutes=5), "id": 1, "metadata": {}, "scheduled": datetime.now(tz=timezone.utc), "started": None, "status": scheduler_task_run_status, "task": 1, } ] mock_scheduler.create_tasks.return_value = [ { "priority": "high", "policy": "oneshot", "type": "load-git", "arguments": {"kwargs": {"repo_url": origin_url}, "args": []}, "status": "next_run_not_scheduled", "id": 1, } ] url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url} ) mock_visit_date = mocker.patch( ("swh.web.common.origin_save._get_visit_info_for_save_request") ) mock_visit_date.return_value = (visit_date, None) if expected_request_status != SAVE_REQUEST_REJECTED: response = check_api_post_responses(api_client, url, data=None, status_code=200) assert response.data["save_request_status"] == expected_request_status assert response.data["save_task_status"] == expected_task_status else: check_api_post_responses(api_client, url, data=None, status_code=403) def check_save_request_status( api_client, mocker, origin_url, expected_request_status, expected_task_status, scheduler_task_status="next_run_not_scheduled", scheduler_task_run_status=None, visit_date=None, visit_status=None, ): mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler") mock_scheduler.get_tasks.return_value = [ { "priority": "high", "policy": "oneshot", "type": "load-git", "arguments": {"kwargs": {"repo_url": origin_url}, "args": []}, "status": scheduler_task_status, "id": 1, } ] if scheduler_task_run_status is None: mock_scheduler.get_task_runs.return_value = [] else: mock_scheduler.get_task_runs.return_value = [ { "backend_id": "f00c712c-e820-41ce-a07c-9bf8df914205", "ended": datetime.now(tz=timezone.utc) + timedelta(minutes=5), "id": 1, "metadata": {}, "scheduled": datetime.now(tz=timezone.utc), "started": None, "status": scheduler_task_run_status, "task": 1, } ] url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url} ) mock_visit_date = mocker.patch( ("swh.web.common.origin_save._get_visit_info_for_save_request") ) mock_visit_date.return_value = (visit_date, visit_status) response = check_api_get_responses(api_client, url, status_code=200) save_request_data = response.data[0] assert save_request_data["save_request_status"] == expected_request_status assert save_request_data["save_task_status"] == expected_task_status assert save_request_data["visit_status"] == visit_status # Check that save task status is still available when # the scheduler task has been archived mock_scheduler.get_tasks.return_value = [] response = check_api_get_responses(api_client, url, status_code=200) save_request_data = response.data[0] assert save_request_data["save_task_status"] == expected_task_status assert save_request_data["visit_status"] == visit_status def test_save_request_rejected(api_client, mocker): origin_url = "https://github.com/user/illegal_repo" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_REJECTED, ) check_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_REJECTED, expected_task_status=SAVE_TASK_NOT_CREATED, ) def test_save_request_pending(api_client, mocker): origin_url = "https://unkwownforge.com/user/repo" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_PENDING, expected_task_status=SAVE_TASK_NOT_CREATED, ) check_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_PENDING, expected_task_status=SAVE_TASK_NOT_CREATED, ) def test_save_request_succeed(api_client, mocker): origin_url = "https://github.com/Kitware/CMake" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) check_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_SCHEDULED, scheduler_task_status="next_run_scheduled", scheduler_task_run_status="scheduled", ) check_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_SUCCEEDED, scheduler_task_status="completed", scheduler_task_run_status="eventful", visit_date=None, ) visit_date = datetime.now(tz=timezone.utc) + timedelta(hours=1) check_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_SUCCEEDED, scheduler_task_status="completed", scheduler_task_run_status="eventful", visit_date=visit_date, visit_status=VISIT_STATUS_FULL, ) def test_save_request_failed(api_client, mocker): origin_url = "https://gitlab.com/inkscape/inkscape" check_created_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) check_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_SCHEDULED, scheduler_task_status="next_run_scheduled", scheduler_task_run_status="scheduled", ) check_save_request_status( api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_FAILED, scheduler_task_status="disabled", scheduler_task_run_status="failed", visit_status=VISIT_STATUS_FAILED, ) def test_create_save_request_only_when_needed(api_client, mocker): origin_url = "https://github.com/webpack/webpack" SaveOriginRequest.objects.create( visit_type="git", origin_url=origin_url, status=SAVE_REQUEST_ACCEPTED, loading_task_id=56, ) check_created_save_request_status( api_client, mocker, origin_url, scheduler_task_status="next_run_not_scheduled", expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) sors = list( SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url) ) assert len(sors) == 1 check_created_save_request_status( api_client, mocker, origin_url, scheduler_task_status="next_run_scheduled", scheduler_task_run_status="scheduled", expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_SCHEDULED, ) sors = list( SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url) ) assert len(sors) == 1 visit_date = datetime.now(tz=timezone.utc) + timedelta(hours=1) check_created_save_request_status( api_client, mocker, origin_url, scheduler_task_status="completed", expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, visit_date=visit_date, ) sors = list( SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url) ) # check_api_post_responses sends two POST requests to check YAML and JSON response assert len(sors) == 3 check_created_save_request_status( api_client, mocker, origin_url, scheduler_task_status="disabled", expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) sors = list( SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url) ) assert len(sors) == 5 def test_get_save_requests_unknown_origin(api_client): unknown_origin_url = "https://gitlab.com/foo/bar" url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": unknown_origin_url}, ) response = check_api_get_responses(api_client, url, status_code=404) assert response.data == { "exception": "NotFoundExc", "reason": ( "No save requests found for visit of type git on origin with url %s." ) % unknown_origin_url, } _visit_type = "git" _origin_url = "https://github.com/python/cpython" def test_save_requests_rate_limit(api_client, mocker): create_save_origin_request = mocker.patch( "swh.web.api.views.origin_save.create_save_origin_request" ) def _save_request_dict(*args, **kwargs): return { "id": 1, "visit_type": _visit_type, "origin_url": _origin_url, "save_request_date": datetime.now().isoformat(), "save_request_status": SAVE_REQUEST_ACCEPTED, "save_task_status": SAVE_TASK_NOT_YET_SCHEDULED, "visit_date": None, "visit_status": None, } create_save_origin_request.side_effect = _save_request_dict url = reverse( "api-1-save-origin", url_args={"visit_type": _visit_type, "origin_url": _origin_url}, ) for _ in range(save_origin_rate_post): check_api_post_response(api_client, url, status_code=200) check_api_post_response(api_client, url, status_code=429) def test_save_request_form_server_error(api_client, mocker): create_save_origin_request = mocker.patch( "swh.web.api.views.origin_save.create_save_origin_request" ) create_save_origin_request.side_effect = Exception("Server error") url = reverse( "api-1-save-origin", url_args={"visit_type": _visit_type, "origin_url": _origin_url}, ) check_api_post_responses(api_client, url, status_code=500) @pytest.fixture def origin_to_review(): return "https://git.example.org/user/project" def test_create_save_request_pending_review_anonymous_user( api_client, origin_to_review ): url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_to_review}, ) response = check_api_post_responses(api_client, url, status_code=200) assert response.data["save_request_status"] == SAVE_REQUEST_PENDING with pytest.raises(ObjectDoesNotExist): SaveAuthorizedOrigin.objects.get(url=origin_to_review) def test_create_save_request_accepted_ambassador_user( api_client, origin_to_review, keycloak_oidc, mocker ): keycloak_oidc.realm_permissions = [SWH_AMBASSADOR_PERMISSION] oidc_profile = keycloak_oidc.login() api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}") check_created_save_request_status( api_client, mocker, origin_to_review, expected_request_status=SAVE_REQUEST_ACCEPTED, expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED, ) assert SaveAuthorizedOrigin.objects.get(url=origin_to_review) def test_create_save_request_anonymous_user_no_user_id(api_client): origin_url = "https://some.git.hosters/user/repo" url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url}, ) check_api_post_responses(api_client, url, status_code=200) sor = SaveOriginRequest.objects.get(origin_url=origin_url) - assert sor.user_id is None + assert sor.user_ids is None def test_create_save_request_authenticated_user_id( api_client, origin_to_review, keycloak_oidc, mocker ): oidc_profile = keycloak_oidc.login() api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}") origin_url = "https://some.git.hosters/user/repo2" url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url}, ) response = check_api_post_response(api_client, url, status_code=200) assert response.wsgi_request.user.id is not None user_id = str(response.wsgi_request.user.id) - sor = SaveOriginRequest.objects.get(user_id=user_id) - assert sor.user_id == user_id + sor = SaveOriginRequest.objects.get(user_ids=f'"{user_id}"') + assert sor.user_ids == f'"{user_id}"' + + +def test_create_pending_save_request_multiple_authenticated_users(api_client): + origin_url = "https://some.git.hosters/user/repo3" + first_user = User.objects.create_user(username="first_user", password="") + second_user = User.objects.create_user(username="second_user", password="") + url = reverse( + "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url}, + ) + + api_client.force_login(first_user) + check_api_post_response(api_client, url, status_code=200) + + api_client.force_login(second_user) + check_api_post_response(api_client, url, status_code=200) + + assert SaveOriginRequest.objects.get(user_ids__contains=f'"{first_user.id}"') + assert SaveOriginRequest.objects.get(user_ids__contains=f'"{second_user.id}"') diff --git a/swh/web/tests/create_test_users.py b/swh/web/tests/create_test_users.py new file mode 100644 index 00000000..dfdd24d6 --- /dev/null +++ b/swh/web/tests/create_test_users.py @@ -0,0 +1,16 @@ +# Copyright (C) 2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +from django.contrib.auth import get_user_model + +User = get_user_model() + +username = "user" +password = "user" +email = "user@swh-web.org" + +if not User.objects.filter(username=username).exists(): + User.objects.create_user(username, email, password) diff --git a/swh/web/tests/misc/test_origin_save.py b/swh/web/tests/misc/test_origin_save.py index 399d594f..61222be6 100644 --- a/swh/web/tests/misc/test_origin_save.py +++ b/swh/web/tests/misc/test_origin_save.py @@ -1,96 +1,151 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime, timedelta, timezone import json import pytest +from swh.auth.django.utils import oidc_user_from_profile from swh.web.common.models import SaveOriginRequest from swh.web.common.origin_save import SAVE_REQUEST_ACCEPTED, SAVE_TASK_SUCCEEDED from swh.web.common.utils import reverse from swh.web.tests.utils import check_http_get_response def test_old_save_url_redirection(client): url = reverse("browse-origin-save") redirect_url = reverse("origin-save") resp = check_http_get_response(client, url, status_code=302) assert resp["location"] == redirect_url @pytest.mark.django_db -def test_save_origin_requests_list(client, mocker): +def test_save_origin_requests_list(client, mocker, keycloak_oidc): visit_types = ("git", "svn", "hg") nb_origins_per_type = 10 for visit_type in visit_types: for i in range(nb_origins_per_type): SaveOriginRequest.objects.create( request_date=datetime.now(tz=timezone.utc), visit_type=visit_type, origin_url=f"https://{visit_type}.example.org/project{i}", status=SAVE_REQUEST_ACCEPTED, visit_date=datetime.now(tz=timezone.utc) + timedelta(hours=1), loading_task_id=i, loading_task_status=SAVE_TASK_SUCCEEDED, ) mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler") mock_scheduler.get_tasks.return_value = [] mock_scheduler.get_task_runs.return_value = [] # retrieve all save requests in 3 pages, sorted in descending order # of request creation for i, visit_type in enumerate(reversed(visit_types)): url = reverse( "origin-save-requests-list", url_args={"status": "all"}, query_params={ "draw": i + 1, "search[value]": "", "order[0][column]": "0", "columns[0][name]": "request_date", "order[0][dir]": "desc", "length": nb_origins_per_type, "start": i * nb_origins_per_type, }, ) resp = check_http_get_response( client, url, status_code=200, content_type="application/json" ) sors = json.loads(resp.content.decode("utf-8")) assert sors["draw"] == i + 1 assert sors["recordsFiltered"] == len(visit_types) * nb_origins_per_type assert sors["recordsTotal"] == len(visit_types) * nb_origins_per_type assert len(sors["data"]) == nb_origins_per_type assert all(d["visit_type"] == visit_type for d in sors["data"]) # retrieve save requests filtered by visit type in a single page for i, visit_type in enumerate(reversed(visit_types)): url = reverse( "origin-save-requests-list", url_args={"status": "all"}, query_params={ "draw": i + 1, "search[value]": visit_type, "order[0][column]": "0", "columns[0][name]": "request_date", "order[0][dir]": "desc", "length": nb_origins_per_type, "start": 0, }, ) resp = check_http_get_response( client, url, status_code=200, content_type="application/json" ) sors = json.loads(resp.content.decode("utf-8")) assert sors["draw"] == i + 1 assert sors["recordsFiltered"] == nb_origins_per_type assert sors["recordsTotal"] == len(visit_types) * nb_origins_per_type assert len(sors["data"]) == nb_origins_per_type assert all(d["visit_type"] == visit_type for d in sors["data"]) + + +@pytest.mark.django_db +def test_save_origin_requests_list_user_filter(client, mocker, keycloak_oidc): + + # anonymous user created a save request + sor = SaveOriginRequest.objects.create( + request_date=datetime.now(tz=timezone.utc), + visit_type="svn", + origin_url="https://svn.example.org/user/project", + status=SAVE_REQUEST_ACCEPTED, + visit_date=datetime.now(tz=timezone.utc) + timedelta(hours=1), + loading_task_id=1, + loading_task_status=SAVE_TASK_SUCCEEDED, + ) + + # authenticated user created a save request + user = oidc_user_from_profile(keycloak_oidc, keycloak_oidc.login()) + client.login(code="", code_verifier="", redirect_uri="") + + sor = SaveOriginRequest.objects.create( + request_date=datetime.now(tz=timezone.utc), + visit_type="git", + origin_url="https://git.example.org/user/project", + status=SAVE_REQUEST_ACCEPTED, + visit_date=datetime.now(tz=timezone.utc) + timedelta(hours=1), + loading_task_id=2, + loading_task_status=SAVE_TASK_SUCCEEDED, + user_ids=f'"{user.id}"', + ) + + # filter save requests according to user id + url = reverse( + "origin-save-requests-list", + url_args={"status": "all"}, + query_params={ + "draw": 1, + "search[value]": "", + "order[0][column]": "0", + "columns[0][name]": "request_date", + "order[0][dir]": "desc", + "length": 10, + "start": "0", + "user_requests_only": "1", + }, + ) + + resp = check_http_get_response( + client, url, status_code=200, content_type="application/json" + ) + sors = json.loads(resp.content.decode("utf-8")) + assert sors["recordsFiltered"] == 1 + assert sors["recordsTotal"] == 2 + assert sors["data"][0] == sor.to_dict() diff --git a/swh/web/tests/test_migrations.py b/swh/web/tests/test_migrations.py index d61a1d02..3cb68884 100644 --- a/swh/web/tests/test_migrations.py +++ b/swh/web/tests/test_migrations.py @@ -1,38 +1,53 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information APP_NAME = "swh_web_common" MIGRATION_0008 = "0008_save-code-now_indexes_20210106_1327" MIGRATION_0009 = "0009_saveoriginrequest_visit_status" MIGRATION_0010 = "0010_saveoriginrequest_user_id" +MIGRATION_0011 = "0011_saveoriginrequest_user_ids" def test_migrations_09_add_visit_status_to_sor_model(migrator): """Ensures the migration adds the visit_status field to SaveOriginRequest table""" old_state = migrator.apply_initial_migration((APP_NAME, MIGRATION_0008),) old_model = old_state.apps.get_model(APP_NAME, "SaveOriginRequest") assert hasattr(old_model, "visit_status") is False new_state = migrator.apply_tested_migration((APP_NAME, MIGRATION_0009)) new_model = new_state.apps.get_model(APP_NAME, "SaveOriginRequest") assert hasattr(new_model, "visit_status") is True def test_migrations_10_add_user_id_to_sor_model(migrator): """Ensures the migration adds the user_id field to SaveOriginRequest table""" old_state = migrator.apply_initial_migration((APP_NAME, MIGRATION_0009),) old_model = old_state.apps.get_model(APP_NAME, "SaveOriginRequest") assert hasattr(old_model, "user_id") is False new_state = migrator.apply_tested_migration((APP_NAME, MIGRATION_0010)) new_model = new_state.apps.get_model(APP_NAME, "SaveOriginRequest") assert hasattr(new_model, "user_id") is True + + +def test_migrations_11_add_user_ids_to_sor_model(migrator): + """Ensures the migration adds the user_id field to SaveOriginRequest table""" + + old_state = migrator.apply_initial_migration((APP_NAME, MIGRATION_0010),) + old_model = old_state.apps.get_model(APP_NAME, "SaveOriginRequest") + + assert hasattr(old_model, "user_ids") is False + + new_state = migrator.apply_tested_migration((APP_NAME, MIGRATION_0011)) + new_model = new_state.apps.get_model(APP_NAME, "SaveOriginRequest") + + assert hasattr(new_model, "user_ids") is True